/**
 * HTML to Lexical JSON Converter
 * Story 1.3: Content Migration Script
 *
 * Converts HTML content to Payload CMS Lexical editor format
 */

import { parse } from 'html-parse-stringify'

// ============================================================
// LEXICAL JSON TYPES
// ============================================================

interface LexicalNode {
  type: string
  version: number
  [key: string]: any
}

interface LexicalTextContent {
  type: 'text'
  version: 1
  detail?: { 0: any; 1: any }
  format?: number
  mode?: string
  style?: string
  text: string
}

interface LexicalElementNode {
  type: 'element' | 'heading' | 'link' | 'list' | 'listitem' | 'quote' | 'paragraph'
  version: 1
  children: LexicalContent[]
  direction?: 'ltr' | 'rtl' | null
  format?: '' | 'left' | 'start' | 'center' | 'right' | 'end' | 'justify'
  indent?: number
  tag?: string
  listType?: 'bullet' | 'number'
  rel?: null | string
  target?: null | string
  title?: null | string
  url?: string
}

interface LexicalLinebreakNode {
  type: 'linebreak'
  version: 1
}

interface LexicalRoot {
  type: 'root'
  version: 1
  children: LexicalElementNode[]
  direction: 'ltr' | 'rtl' | null
}

type LexicalContent = LexicalTextContent | LexicalElementNode | LexicalLinebreakNode

// ============================================================
// HTML TO LEXICAL CONVERTER
// ============================================================

/**
 * Convert HTML string to Lexical JSON format (returns object for Payload local API)
 *
 * IMPORTANT: Payload's richText field expects content wrapped in { "root": {...} } structure
 */
export function htmlToLexical(html: string): string {
  if (!html || typeof html !== 'string') {
    return createEmptyLexical()
  }

  // Clean the HTML first
  const cleanedHtml = cleanHtml(html)

  try {
    const ast = parse(cleanedHtml)
    const children = convertNodes(ast)

    // Clean up empty text nodes that Payload doesn't accept
    const cleanedChildren = cleanEmptyTextNodes(children)

    const lexicalObject = {
      type: 'root',
      version: 1,
      children: cleanedChildren.length > 0 ? cleanedChildren : [createEmptyParagraph()],
      direction: null,
    } satisfies LexicalRoot

    // Wrap in { "root": ... } structure for Payload's richText field
    // This is the format Payload expects when storing Lexical content
    return JSON.stringify({ root: lexicalObject })
  } catch (error) {
    console.warn('Failed to parse HTML, using fallback:', error)
    return createTextLexical(cleanedHtml)
  }
}

/**
 * Convert HTML string to Lexical object (for direct use with Payload local API)
 * Returns { root: LexicalRoot } format for Payload richText field
 */
export function htmlToLexicalObject(html: string): { root: LexicalRoot } {
  if (!html || typeof html !== 'string') {
    return JSON.parse(createEmptyLexical())
  }

  // Clean the HTML first
  const cleanedHtml = cleanHtml(html)

  try {
    const ast = parse(cleanedHtml)
    const children = convertNodes(ast)

    return {
      root: {
        type: 'root',
        version: 1,
        children: children.length > 0 ? children : [createEmptyParagraph()],
        direction: null,
      },
    }
  } catch (error) {
    console.warn('Failed to parse HTML, using fallback:', error)
    return JSON.parse(createTextLexical(cleanedHtml))
  }
}

/**
 * Create empty Lexical JSON structure
 */
function createEmptyLexical(): string {
  return JSON.stringify({
    root: {
      type: 'root',
      version: 1,
      children: [createEmptyParagraph()],
      direction: null,
    },
  })
}

/**
 * Create Lexical JSON with plain text (fallback)
 */
function createTextLexical(text: string): string {
  return JSON.stringify({
    root: {
      type: 'root',
      version: 1,
      children: [
        {
          type: 'paragraph',
          version: 1,
          children: [createTextNode(text)],
        },
      ],
      direction: null,
    },
  })
}

/**
 * Create an empty paragraph node
 */
function createEmptyParagraph(): LexicalElementNode {
  return {
    type: 'paragraph',
    version: 1,
    children: [createTextNode('')],
  }
}

/**
 * Clean empty text nodes from Lexical tree
 * Payload's Lexical validator rejects empty text nodes
 */
function cleanEmptyTextNodes(nodes: LexicalElementNode[]): LexicalElementNode[] {
  return nodes
    .map((node) => {
      // Clean children recursively
      if (node.children && Array.isArray(node.children)) {
        const cleanedChildren = node.children
          .filter((child: any) => {
            // Remove empty text nodes
            if (child.type === 'text' && child.text === '') {
              return false
            }
            return true
          })
          .map((child: any) => {
            // If child has children, clean those too
            if (child.children && Array.isArray(child.children)) {
              return {
                ...child,
                children: child.children.filter((c: any) => {
                  if (c.type === 'text' && c.text === '') {
                    return false
                  }
                  return true
                }),
              }
            }
            return child
          })

        // If all children were removed, add an empty text node
        if (cleanedChildren.length === 0) {
          return { ...node, children: [createTextNode('')] }
        }

        return { ...node, children: cleanedChildren }
      }
      return node
    })
    .filter((node) => {
      // Remove nodes that became invalid after cleaning
      return node.type !== 'linebreak'
    })
}

/**
 * Clean HTML by removing unwanted elements
 */
function cleanHtml(html: string): string {
  return html
    // Remove script and style tags
    .replace(/<script\b[^<]*(?:(?!<\/script>)<[^<]*)*<\/script>/gi, '')
    .replace(/<style\b[^<]*(?:(?!<\/style>)<[^<]*)*<\/style>/gi, '')
    // Remove Webflow-specific attributes
    .replace(/\sdata-[a-z-]+="[^"]*"/gi, '')
    .replace(/\sclass="[^"]*"/gi, '')
    // Clean up empty tags
    .replace(/<p>\s*<\/p>/gi, '')
    .replace(/<div>\s*<\/div>/gi, '')
    .trim()
}

/**
 * Convert HTML AST nodes to Lexical nodes
 */
function convertNodes(nodes: any[]): LexicalElementNode[] {
  const result: LexicalElementNode[] = []
  let currentList: LexicalElementNode | null = null
  let listItems: LexicalElementNode[] = []

  for (const node of nodes) {
    // Handle text nodes
    if (node.type === 'text') {
      const text = node.value?.trim()
      if (text) {
        result.push({
          type: 'paragraph',
          version: 1,
          children: [createTextNode(text)],
        })
      }
      continue
    }

    if (!node.name) continue

    const tag = node.name.toLowerCase()

    // Handle headings
    if (['h1', 'h2', 'h3', 'h4', 'h5', 'h6'].includes(tag)) {
      flushList(result, currentList, listItems)
      currentList = null
      listItems = []
      result.push(createHeading(tag, node.children || []))
      continue
    }

    // Handle paragraphs
    if (tag === 'p') {
      flushList(result, currentList, listItems)
      currentList = null
      listItems = []
      const content = convertInlineNodes(node.children || [])
      if (content.length > 0) {
        result.push({
          type: 'paragraph',
          version: 1,
          children: content,
        })
      }
      continue
    }

    // Handle lists
    if (tag === 'ul' || tag === 'ol') {
      flushList(result, currentList, listItems)
      currentList = {
        type: 'list',
        version: 1,
        listType: tag === 'ol' ? 'number' : 'bullet',
        children: [],
      }
      listItems = convertListItems(node.children || [])
      continue
    }

    // Handle blockquotes
    if (tag === 'blockquote') {
      flushList(result, currentList, listItems)
      currentList = null
      listItems = []
      const content = convertInlineNodes(node.children || [])
      result.push({
        type: 'quote',
        version: 1,
        children: content,
      })
      continue
    }

    // Handle divs (treat as paragraphs)
    if (tag === 'div') {
      flushList(result, currentList, listItems)
      currentList = null
      listItems = []
      const content = convertInlineNodes(node.children || [])
      if (content.length > 0) {
        result.push({
          type: 'paragraph',
          version: 1,
          children: content,
        })
      }
      continue
    }

    // Handle line breaks and horizontal rules
    if (tag === 'br') {
      result.push({
        type: 'paragraph',
        version: 1,
        children: [{ type: 'linebreak', version: 1 } as any],
      })
      continue
    }

    if (tag === 'hr') {
      result.push({
        type: 'paragraph',
        version: 1,
        children: [createTextNode('---')],
      })
      continue
    }

    // Handle images
    if (tag === 'img') {
      flushList(result, currentList, listItems)
      currentList = null
      listItems = []
      const src = node.attributes?.src || ''
      const alt = node.attributes?.alt || ''
      result.push(createImageNode(src, alt))
      continue
    }
  }

  // Flush any remaining list
  flushList(result, currentList, listItems)

  return result.length > 0 ? result : [createEmptyParagraph()]
}

/**
 * Flush pending list items to result
 */
function flushList(
  result: LexicalElementNode[],
  list: LexicalElementNode | null,
  items: LexicalElementNode[],
): void {
  if (list && items.length > 0) {
    list.children = items
    result.push(list)
  }
}

/**
 * Convert list items (li) to Lexical format
 */
function convertListItems(items: any[]): LexicalElementNode[] {
  return items
    .filter((item) => item.name?.toLowerCase() === 'li')
    .map((item) => ({
      type: 'listitem',
      version: 1,
      children: convertInlineNodes(item.children || []),
    }))
}

/**
 * Create a standard text node with all required Lexical properties
 */
function createTextNode(text: string, format?: number): LexicalTextContent {
  return {
    type: 'text',
    version: 1,
    text,
    detail: 0,
    format: format ?? 0,
    mode: 'normal',
    style: '',
  }
}

/**
 * Convert inline nodes (text, links, formatting)
 */
function convertInlineNodes(nodes: any[]): LexicalContent[] {
  const result: LexicalContent[] = []

  for (const node of nodes) {
    // Handle text nodes (html-parse-stringify uses type for text)
    if (node.type === 'text') {
      const text = (node.value || node.content || '') as string
      if (text) {
        result.push(createTextNode(text))
      }
      continue
    }

    // Skip if no element name (not an element)
    if (!node.name && !node.type) continue

    const tag = node.name.toLowerCase()

    // Handle links
    // NOTE: Payload's Lexical link validation is very strict. For now, convert links to text
    // TODO: Implement proper link format after investigating Payload's link node requirements
    if (tag === 'a') {
      // Convert links to text with URL in parentheses
      const text = extractText(node.children || [])
      const href = node.attrs?.href || node.attributes?.href || ''
      if (text) {
        // Include URL as text for now
        const linkText = href && href !== '#' ? `${text} (${href})` : text
        result.push(createTextNode(linkText))
      }
      continue
    }

    // Handle bold (strong, b)
    if (tag === 'strong' || tag === 'b') {
      const text = extractText(node.children || [])
      result.push(createTextNode(text, 1)) // Bold format
      continue
    }

    // Handle italic (em, i)
    if (tag === 'em' || tag === 'i') {
      const text = extractText(node.children || [])
      result.push(createTextNode(text, 2)) // Italic format
      continue
    }

    // Handle underline (u)
    if (tag === 'u') {
      const text = extractText(node.children || [])
      result.push(createTextNode(text, 4)) // Underline format
      continue
    }

    // Handle images inline
    if (tag === 'img') {
      const src = node.attrs?.src || node.attributes?.src || ''
      const alt = node.attrs?.alt || node.attributes?.alt || ''
      result.push(createImageNode(src, alt))
      continue
    }

    // Handle spans (treat as text)
    if (tag === 'span') {
      const text = extractText(node.children || [])
      if (text) {
        result.push(createTextNode(text))
      }
      continue
    }

    // Handle code
    if (tag === 'code') {
      const text = extractText(node.children || [])
      result.push({
        ...createTextNode(text),
        style: 'font-family: monospace;',
      })
      continue
    }

    // Recursively handle other inline elements
    const children = convertInlineNodes(node.children || [])
    result.push(...children)
  }

  return result.length > 0 ? result : [createTextNode('')]
}

/**
 * Create a heading node
 */
function createHeading(tag: string, children: any[]): LexicalElementNode {
  const tagNum = parseInt(tag.substring(1), 10)
  const inlineNodes = convertInlineNodes(children)
  return {
    type: 'heading',
    version: 1,
    tag: `h${tagNum}`,
    children: inlineNodes.length > 0 ? inlineNodes : [createTextNode('')],
  }
}

/**
 * Create an image node
 */
function createImageNode(src: string, alt: string): LexicalElementNode {
  return {
    type: 'paragraph',
    version: 1,
    children: [
      {
        ...createTextNode(`[Image: ${alt || src}]`),
        style: 'font-style: italic;',
      },
    ],
  }
}

/**
 * Extract plain text from nodes
 */
function extractText(nodes: any[]): string {
  let text = ''
  for (const node of nodes) {
    if (node.type === 'text') {
      text += node.value || node.content || ''
    } else if (node.children) {
      text += extractText(node.children)
    } else if (node.content) {
      text += node.content
    }
  }
  return text
}

// ============================================================
// UTILITY FUNCTIONS
// ============================================================

/**
 * Check if a string is valid Lexical JSON
 */
export function isValidLexical(json: string): boolean {
  try {
    const parsed = JSON.parse(json)
    return parsed?.type === 'root' && Array.isArray(parsed?.children)
  } catch {
    return false
  }
}

/**
 * Convert multiple HTML contents to Lexical format
 */
export function batchHtmlToLexical(htmlArray: string[]): string[] {
  return htmlArray.map((html) => htmlToLexical(html))
}