feat(backend): update collections, config and migration tools

Update Payload CMS configuration, collections (Audit, Posts), and add migration scripts/reports.
2026-02-11 11:50:23 +08:00
parent 8ca609a889
commit be7fc902fb
46 changed files with 5442 additions and 15 deletions
--- a/apps/backend/scripts/migration/lexicalConverter.ts
+++ b/apps/backend/scripts/migration/lexicalConverter.ts
@@ -0,0 +1,572 @@
+/**
+ * HTML to Lexical JSON Converter
+ * Story 1.3: Content Migration Script
+ *
+ * Converts HTML content to Payload CMS Lexical editor format
+ */
+
+import { parse } from 'html-parse-stringify'
+
+// ============================================================
+// LEXICAL JSON TYPES
+// ============================================================
+
+interface LexicalNode {
+  type: string
+  version: number
+  [key: string]: any
+}
+
+interface LexicalTextContent {
+  type: 'text'
+  version: 1
+  detail?: { 0: any; 1: any }
+  format?: number
+  mode?: string
+  style?: string
+  text: string
+}
+
+interface LexicalElementNode {
+  type: 'element' | 'heading' | 'link' | 'list' | 'listitem' | 'quote' | 'paragraph'
+  version: 1
+  children: LexicalContent[]
+  direction?: 'ltr' | 'rtl' | null
+  format?: '' | 'left' | 'start' | 'center' | 'right' | 'end' | 'justify'
+  indent?: number
+  tag?: string
+  listType?: 'bullet' | 'number'
+  rel?: null | string
+  target?: null | string
+  title?: null | string
+  url?: string
+}
+
+interface LexicalLinebreakNode {
+  type: 'linebreak'
+  version: 1
+}
+
+interface LexicalRoot {
+  type: 'root'
+  version: 1
+  children: LexicalElementNode[]
+  direction: 'ltr' | 'rtl' | null
+}
+
+type LexicalContent = LexicalTextContent | LexicalElementNode | LexicalLinebreakNode
+
+// ============================================================
+// HTML TO LEXICAL CONVERTER
+// ============================================================
+
+/**
+ * Convert HTML string to Lexical JSON format (returns object for Payload local API)
+ *
+ * IMPORTANT: Payload's richText field expects content wrapped in { "root": {...} } structure
+ */
+export function htmlToLexical(html: string): string {
+  if (!html || typeof html !== 'string') {
+    return createEmptyLexical()
+  }
+
+  // Clean the HTML first
+  const cleanedHtml = cleanHtml(html)
+
+  try {
+    const ast = parse(cleanedHtml)
+    const children = convertNodes(ast)
+
+    // Clean up empty text nodes that Payload doesn't accept
+    const cleanedChildren = cleanEmptyTextNodes(children)
+
+    const lexicalObject = {
+      type: 'root',
+      version: 1,
+      children: cleanedChildren.length > 0 ? cleanedChildren : [createEmptyParagraph()],
+      direction: null,
+    } satisfies LexicalRoot
+
+    // Wrap in { "root": ... } structure for Payload's richText field
+    // This is the format Payload expects when storing Lexical content
+    return JSON.stringify({ root: lexicalObject })
+  } catch (error) {
+    console.warn('Failed to parse HTML, using fallback:', error)
+    return createTextLexical(cleanedHtml)
+  }
+}
+
+/**
+ * Convert HTML string to Lexical object (for direct use with Payload local API)
+ * Returns { root: LexicalRoot } format for Payload richText field
+ */
+export function htmlToLexicalObject(html: string): { root: LexicalRoot } {
+  if (!html || typeof html !== 'string') {
+    return JSON.parse(createEmptyLexical())
+  }
+
+  // Clean the HTML first
+  const cleanedHtml = cleanHtml(html)
+
+  try {
+    const ast = parse(cleanedHtml)
+    const children = convertNodes(ast)
+
+    return {
+      root: {
+        type: 'root',
+        version: 1,
+        children: children.length > 0 ? children : [createEmptyParagraph()],
+        direction: null,
+      },
+    }
+  } catch (error) {
+    console.warn('Failed to parse HTML, using fallback:', error)
+    return JSON.parse(createTextLexical(cleanedHtml))
+  }
+}
+
+/**
+ * Create empty Lexical JSON structure
+ */
+function createEmptyLexical(): string {
+  return JSON.stringify({
+    root: {
+      type: 'root',
+      version: 1,
+      children: [createEmptyParagraph()],
+      direction: null,
+    },
+  })
+}
+
+/**
+ * Create Lexical JSON with plain text (fallback)
+ */
+function createTextLexical(text: string): string {
+  return JSON.stringify({
+    root: {
+      type: 'root',
+      version: 1,
+      children: [
+        {
+          type: 'paragraph',
+          version: 1,
+          children: [createTextNode(text)],
+        },
+      ],
+      direction: null,
+    },
+  })
+}
+
+/**
+ * Create an empty paragraph node
+ */
+function createEmptyParagraph(): LexicalElementNode {
+  return {
+    type: 'paragraph',
+    version: 1,
+    children: [createTextNode('')],
+  }
+}
+
+/**
+ * Clean empty text nodes from Lexical tree
+ * Payload's Lexical validator rejects empty text nodes
+ */
+function cleanEmptyTextNodes(nodes: LexicalElementNode[]): LexicalElementNode[] {
+  return nodes
+    .map((node) => {
+      // Clean children recursively
+      if (node.children && Array.isArray(node.children)) {
+        const cleanedChildren = node.children
+          .filter((child: any) => {
+            // Remove empty text nodes
+            if (child.type === 'text' && child.text === '') {
+              return false
+            }
+            return true
+          })
+          .map((child: any) => {
+            // If child has children, clean those too
+            if (child.children && Array.isArray(child.children)) {
+              return {
+                ...child,
+                children: child.children.filter((c: any) => {
+                  if (c.type === 'text' && c.text === '') {
+                    return false
+                  }
+                  return true
+                }),
+              }
+            }
+            return child
+          })
+
+        // If all children were removed, add an empty text node
+        if (cleanedChildren.length === 0) {
+          return { ...node, children: [createTextNode('')] }
+        }
+
+        return { ...node, children: cleanedChildren }
+      }
+      return node
+    })
+    .filter((node) => {
+      // Remove nodes that became invalid after cleaning
+      return node.type !== 'linebreak'
+    })
+}
+
+/**
+ * Clean HTML by removing unwanted elements
+ */
+function cleanHtml(html: string): string {
+  return html
+    // Remove script and style tags
+    .replace(/<script\b[^<]*(?:(?!<\/script>)<[^<]*)*<\/script>/gi, '')
+    .replace(/<style\b[^<]*(?:(?!<\/style>)<[^<]*)*<\/style>/gi, '')
+    // Remove Webflow-specific attributes
+    .replace(/\sdata-[a-z-]+="[^"]*"/gi, '')
+    .replace(/\sclass="[^"]*"/gi, '')
+    // Clean up empty tags
+    .replace(/<p>\s*<\/p>/gi, '')
+    .replace(/<div>\s*<\/div>/gi, '')
+    .trim()
+}
+
+/**
+ * Convert HTML AST nodes to Lexical nodes
+ */
+function convertNodes(nodes: any[]): LexicalElementNode[] {
+  const result: LexicalElementNode[] = []
+  let currentList: LexicalElementNode | null = null
+  let listItems: LexicalElementNode[] = []
+
+  for (const node of nodes) {
+    // Handle text nodes
+    if (node.type === 'text') {
+      const text = node.value?.trim()
+      if (text) {
+        result.push({
+          type: 'paragraph',
+          version: 1,
+          children: [createTextNode(text)],
+        })
+      }
+      continue
+    }
+
+    if (!node.name) continue
+
+    const tag = node.name.toLowerCase()
+
+    // Handle headings
+    if (['h1', 'h2', 'h3', 'h4', 'h5', 'h6'].includes(tag)) {
+      flushList(result, currentList, listItems)
+      currentList = null
+      listItems = []
+      result.push(createHeading(tag, node.children || []))
+      continue
+    }
+
+    // Handle paragraphs
+    if (tag === 'p') {
+      flushList(result, currentList, listItems)
+      currentList = null
+      listItems = []
+      const content = convertInlineNodes(node.children || [])
+      if (content.length > 0) {
+        result.push({
+          type: 'paragraph',
+          version: 1,
+          children: content,
+        })
+      }
+      continue
+    }
+
+    // Handle lists
+    if (tag === 'ul' || tag === 'ol') {
+      flushList(result, currentList, listItems)
+      currentList = {
+        type: 'list',
+        version: 1,
+        listType: tag === 'ol' ? 'number' : 'bullet',
+        children: [],
+      }
+      listItems = convertListItems(node.children || [])
+      continue
+    }
+
+    // Handle blockquotes
+    if (tag === 'blockquote') {
+      flushList(result, currentList, listItems)
+      currentList = null
+      listItems = []
+      const content = convertInlineNodes(node.children || [])
+      result.push({
+        type: 'quote',
+        version: 1,
+        children: content,
+      })
+      continue
+    }
+
+    // Handle divs (treat as paragraphs)
+    if (tag === 'div') {
+      flushList(result, currentList, listItems)
+      currentList = null
+      listItems = []
+      const content = convertInlineNodes(node.children || [])
+      if (content.length > 0) {
+        result.push({
+          type: 'paragraph',
+          version: 1,
+          children: content,
+        })
+      }
+      continue
+    }
+
+    // Handle line breaks and horizontal rules
+    if (tag === 'br') {
+      result.push({
+        type: 'paragraph',
+        version: 1,
+        children: [{ type: 'linebreak', version: 1 } as any],
+      })
+      continue
+    }
+
+    if (tag === 'hr') {
+      result.push({
+        type: 'paragraph',
+        version: 1,
+        children: [createTextNode('---')],
+      })
+      continue
+    }
+
+    // Handle images
+    if (tag === 'img') {
+      flushList(result, currentList, listItems)
+      currentList = null
+      listItems = []
+      const src = node.attributes?.src || ''
+      const alt = node.attributes?.alt || ''
+      result.push(createImageNode(src, alt))
+      continue
+    }
+  }
+
+  // Flush any remaining list
+  flushList(result, currentList, listItems)
+
+  return result.length > 0 ? result : [createEmptyParagraph()]
+}
+
+/**
+ * Flush pending list items to result
+ */
+function flushList(
+  result: LexicalElementNode[],
+  list: LexicalElementNode | null,
+  items: LexicalElementNode[],
+): void {
+  if (list && items.length > 0) {
+    list.children = items
+    result.push(list)
+  }
+}
+
+/**
+ * Convert list items (li) to Lexical format
+ */
+function convertListItems(items: any[]): LexicalElementNode[] {
+  return items
+    .filter((item) => item.name?.toLowerCase() === 'li')
+    .map((item) => ({
+      type: 'listitem',
+      version: 1,
+      children: convertInlineNodes(item.children || []),
+    }))
+}
+
+/**
+ * Create a standard text node with all required Lexical properties
+ */
+function createTextNode(text: string, format?: number): LexicalTextContent {
+  return {
+    type: 'text',
+    version: 1,
+    text,
+    detail: 0,
+    format: format ?? 0,
+    mode: 'normal',
+    style: '',
+  }
+}
+
+/**
+ * Convert inline nodes (text, links, formatting)
+ */
+function convertInlineNodes(nodes: any[]): LexicalContent[] {
+  const result: LexicalContent[] = []
+
+  for (const node of nodes) {
+    // Handle text nodes (html-parse-stringify uses type for text)
+    if (node.type === 'text') {
+      const text = (node.value || node.content || '') as string
+      if (text) {
+        result.push(createTextNode(text))
+      }
+      continue
+    }
+
+    // Skip if no element name (not an element)
+    if (!node.name && !node.type) continue
+
+    const tag = node.name.toLowerCase()
+
+    // Handle links
+    // NOTE: Payload's Lexical link validation is very strict. For now, convert links to text
+    // TODO: Implement proper link format after investigating Payload's link node requirements
+    if (tag === 'a') {
+      // Convert links to text with URL in parentheses
+      const text = extractText(node.children || [])
+      const href = node.attrs?.href || node.attributes?.href || ''
+      if (text) {
+        // Include URL as text for now
+        const linkText = href && href !== '#' ? `${text} (${href})` : text
+        result.push(createTextNode(linkText))
+      }
+      continue
+    }
+
+    // Handle bold (strong, b)
+    if (tag === 'strong' || tag === 'b') {
+      const text = extractText(node.children || [])
+      result.push(createTextNode(text, 1)) // Bold format
+      continue
+    }
+
+    // Handle italic (em, i)
+    if (tag === 'em' || tag === 'i') {
+      const text = extractText(node.children || [])
+      result.push(createTextNode(text, 2)) // Italic format
+      continue
+    }
+
+    // Handle underline (u)
+    if (tag === 'u') {
+      const text = extractText(node.children || [])
+      result.push(createTextNode(text, 4)) // Underline format
+      continue
+    }
+
+    // Handle images inline
+    if (tag === 'img') {
+      const src = node.attrs?.src || node.attributes?.src || ''
+      const alt = node.attrs?.alt || node.attributes?.alt || ''
+      result.push(createImageNode(src, alt))
+      continue
+    }
+
+    // Handle spans (treat as text)
+    if (tag === 'span') {
+      const text = extractText(node.children || [])
+      if (text) {
+        result.push(createTextNode(text))
+      }
+      continue
+    }
+
+    // Handle code
+    if (tag === 'code') {
+      const text = extractText(node.children || [])
+      result.push({
+        ...createTextNode(text),
+        style: 'font-family: monospace;',
+      })
+      continue
+    }
+
+    // Recursively handle other inline elements
+    const children = convertInlineNodes(node.children || [])
+    result.push(...children)
+  }
+
+  return result.length > 0 ? result : [createTextNode('')]
+}
+
+/**
+ * Create a heading node
+ */
+function createHeading(tag: string, children: any[]): LexicalElementNode {
+  const tagNum = parseInt(tag.substring(1), 10)
+  const inlineNodes = convertInlineNodes(children)
+  return {
+    type: 'heading',
+    version: 1,
+    tag: `h${tagNum}`,
+    children: inlineNodes.length > 0 ? inlineNodes : [createTextNode('')],
+  }
+}
+
+/**
+ * Create an image node
+ */
+function createImageNode(src: string, alt: string): LexicalElementNode {
+  return {
+    type: 'paragraph',
+    version: 1,
+    children: [
+      {
+        ...createTextNode(`[Image: ${alt || src}]`),
+        style: 'font-style: italic;',
+      },
+    ],
+  }
+}
+
+/**
+ * Extract plain text from nodes
+ */
+function extractText(nodes: any[]): string {
+  let text = ''
+  for (const node of nodes) {
+    if (node.type === 'text') {
+      text += node.value || node.content || ''
+    } else if (node.children) {
+      text += extractText(node.children)
+    } else if (node.content) {
+      text += node.content
+    }
+  }
+  return text
+}
+
+// ============================================================
+// UTILITY FUNCTIONS
+// ============================================================
+
+/**
+ * Check if a string is valid Lexical JSON
+ */
+export function isValidLexical(json: string): boolean {
+  try {
+    const parsed = JSON.parse(json)
+    return parsed?.type === 'root' && Array.isArray(parsed?.children)
+  } catch {
+    return false
+  }
+}
+
+/**
+ * Convert multiple HTML contents to Lexical format
+ */
+export function batchHtmlToLexical(htmlArray: string[]): string[] {
+  return htmlArray.map((html) => htmlToLexical(html))
+}