feat(backend): update collections, config and migration tools

Update Payload CMS configuration, collections (Audit, Posts), and add migration scripts/reports.
This commit is contained in:
2026-02-11 11:50:23 +08:00
parent 8ca609a889
commit be7fc902fb
46 changed files with 5442 additions and 15 deletions

View File

@@ -0,0 +1,572 @@
/**
* HTML to Lexical JSON Converter
* Story 1.3: Content Migration Script
*
* Converts HTML content to Payload CMS Lexical editor format
*/
import { parse } from 'html-parse-stringify'
// ============================================================
// LEXICAL JSON TYPES
// ============================================================
interface LexicalNode {
type: string
version: number
[key: string]: any
}
interface LexicalTextContent {
type: 'text'
version: 1
detail?: { 0: any; 1: any }
format?: number
mode?: string
style?: string
text: string
}
interface LexicalElementNode {
type: 'element' | 'heading' | 'link' | 'list' | 'listitem' | 'quote' | 'paragraph'
version: 1
children: LexicalContent[]
direction?: 'ltr' | 'rtl' | null
format?: '' | 'left' | 'start' | 'center' | 'right' | 'end' | 'justify'
indent?: number
tag?: string
listType?: 'bullet' | 'number'
rel?: null | string
target?: null | string
title?: null | string
url?: string
}
interface LexicalLinebreakNode {
type: 'linebreak'
version: 1
}
interface LexicalRoot {
type: 'root'
version: 1
children: LexicalElementNode[]
direction: 'ltr' | 'rtl' | null
}
type LexicalContent = LexicalTextContent | LexicalElementNode | LexicalLinebreakNode
// ============================================================
// HTML TO LEXICAL CONVERTER
// ============================================================
/**
* Convert HTML string to Lexical JSON format (returns object for Payload local API)
*
* IMPORTANT: Payload's richText field expects content wrapped in { "root": {...} } structure
*/
export function htmlToLexical(html: string): string {
if (!html || typeof html !== 'string') {
return createEmptyLexical()
}
// Clean the HTML first
const cleanedHtml = cleanHtml(html)
try {
const ast = parse(cleanedHtml)
const children = convertNodes(ast)
// Clean up empty text nodes that Payload doesn't accept
const cleanedChildren = cleanEmptyTextNodes(children)
const lexicalObject = {
type: 'root',
version: 1,
children: cleanedChildren.length > 0 ? cleanedChildren : [createEmptyParagraph()],
direction: null,
} satisfies LexicalRoot
// Wrap in { "root": ... } structure for Payload's richText field
// This is the format Payload expects when storing Lexical content
return JSON.stringify({ root: lexicalObject })
} catch (error) {
console.warn('Failed to parse HTML, using fallback:', error)
return createTextLexical(cleanedHtml)
}
}
/**
* Convert HTML string to Lexical object (for direct use with Payload local API)
* Returns { root: LexicalRoot } format for Payload richText field
*/
export function htmlToLexicalObject(html: string): { root: LexicalRoot } {
if (!html || typeof html !== 'string') {
return JSON.parse(createEmptyLexical())
}
// Clean the HTML first
const cleanedHtml = cleanHtml(html)
try {
const ast = parse(cleanedHtml)
const children = convertNodes(ast)
return {
root: {
type: 'root',
version: 1,
children: children.length > 0 ? children : [createEmptyParagraph()],
direction: null,
},
}
} catch (error) {
console.warn('Failed to parse HTML, using fallback:', error)
return JSON.parse(createTextLexical(cleanedHtml))
}
}
/**
* Create empty Lexical JSON structure
*/
function createEmptyLexical(): string {
return JSON.stringify({
root: {
type: 'root',
version: 1,
children: [createEmptyParagraph()],
direction: null,
},
})
}
/**
* Create Lexical JSON with plain text (fallback)
*/
function createTextLexical(text: string): string {
return JSON.stringify({
root: {
type: 'root',
version: 1,
children: [
{
type: 'paragraph',
version: 1,
children: [createTextNode(text)],
},
],
direction: null,
},
})
}
/**
* Create an empty paragraph node
*/
function createEmptyParagraph(): LexicalElementNode {
return {
type: 'paragraph',
version: 1,
children: [createTextNode('')],
}
}
/**
* Clean empty text nodes from Lexical tree
* Payload's Lexical validator rejects empty text nodes
*/
function cleanEmptyTextNodes(nodes: LexicalElementNode[]): LexicalElementNode[] {
return nodes
.map((node) => {
// Clean children recursively
if (node.children && Array.isArray(node.children)) {
const cleanedChildren = node.children
.filter((child: any) => {
// Remove empty text nodes
if (child.type === 'text' && child.text === '') {
return false
}
return true
})
.map((child: any) => {
// If child has children, clean those too
if (child.children && Array.isArray(child.children)) {
return {
...child,
children: child.children.filter((c: any) => {
if (c.type === 'text' && c.text === '') {
return false
}
return true
}),
}
}
return child
})
// If all children were removed, add an empty text node
if (cleanedChildren.length === 0) {
return { ...node, children: [createTextNode('')] }
}
return { ...node, children: cleanedChildren }
}
return node
})
.filter((node) => {
// Remove nodes that became invalid after cleaning
return node.type !== 'linebreak'
})
}
/**
* Clean HTML by removing unwanted elements
*/
function cleanHtml(html: string): string {
return html
// Remove script and style tags
.replace(/<script\b[^<]*(?:(?!<\/script>)<[^<]*)*<\/script>/gi, '')
.replace(/<style\b[^<]*(?:(?!<\/style>)<[^<]*)*<\/style>/gi, '')
// Remove Webflow-specific attributes
.replace(/\sdata-[a-z-]+="[^"]*"/gi, '')
.replace(/\sclass="[^"]*"/gi, '')
// Clean up empty tags
.replace(/<p>\s*<\/p>/gi, '')
.replace(/<div>\s*<\/div>/gi, '')
.trim()
}
/**
* Convert HTML AST nodes to Lexical nodes
*/
function convertNodes(nodes: any[]): LexicalElementNode[] {
const result: LexicalElementNode[] = []
let currentList: LexicalElementNode | null = null
let listItems: LexicalElementNode[] = []
for (const node of nodes) {
// Handle text nodes
if (node.type === 'text') {
const text = node.value?.trim()
if (text) {
result.push({
type: 'paragraph',
version: 1,
children: [createTextNode(text)],
})
}
continue
}
if (!node.name) continue
const tag = node.name.toLowerCase()
// Handle headings
if (['h1', 'h2', 'h3', 'h4', 'h5', 'h6'].includes(tag)) {
flushList(result, currentList, listItems)
currentList = null
listItems = []
result.push(createHeading(tag, node.children || []))
continue
}
// Handle paragraphs
if (tag === 'p') {
flushList(result, currentList, listItems)
currentList = null
listItems = []
const content = convertInlineNodes(node.children || [])
if (content.length > 0) {
result.push({
type: 'paragraph',
version: 1,
children: content,
})
}
continue
}
// Handle lists
if (tag === 'ul' || tag === 'ol') {
flushList(result, currentList, listItems)
currentList = {
type: 'list',
version: 1,
listType: tag === 'ol' ? 'number' : 'bullet',
children: [],
}
listItems = convertListItems(node.children || [])
continue
}
// Handle blockquotes
if (tag === 'blockquote') {
flushList(result, currentList, listItems)
currentList = null
listItems = []
const content = convertInlineNodes(node.children || [])
result.push({
type: 'quote',
version: 1,
children: content,
})
continue
}
// Handle divs (treat as paragraphs)
if (tag === 'div') {
flushList(result, currentList, listItems)
currentList = null
listItems = []
const content = convertInlineNodes(node.children || [])
if (content.length > 0) {
result.push({
type: 'paragraph',
version: 1,
children: content,
})
}
continue
}
// Handle line breaks and horizontal rules
if (tag === 'br') {
result.push({
type: 'paragraph',
version: 1,
children: [{ type: 'linebreak', version: 1 } as any],
})
continue
}
if (tag === 'hr') {
result.push({
type: 'paragraph',
version: 1,
children: [createTextNode('---')],
})
continue
}
// Handle images
if (tag === 'img') {
flushList(result, currentList, listItems)
currentList = null
listItems = []
const src = node.attributes?.src || ''
const alt = node.attributes?.alt || ''
result.push(createImageNode(src, alt))
continue
}
}
// Flush any remaining list
flushList(result, currentList, listItems)
return result.length > 0 ? result : [createEmptyParagraph()]
}
/**
* Flush pending list items to result
*/
function flushList(
result: LexicalElementNode[],
list: LexicalElementNode | null,
items: LexicalElementNode[],
): void {
if (list && items.length > 0) {
list.children = items
result.push(list)
}
}
/**
* Convert list items (li) to Lexical format
*/
function convertListItems(items: any[]): LexicalElementNode[] {
return items
.filter((item) => item.name?.toLowerCase() === 'li')
.map((item) => ({
type: 'listitem',
version: 1,
children: convertInlineNodes(item.children || []),
}))
}
/**
* Create a standard text node with all required Lexical properties
*/
function createTextNode(text: string, format?: number): LexicalTextContent {
return {
type: 'text',
version: 1,
text,
detail: 0,
format: format ?? 0,
mode: 'normal',
style: '',
}
}
/**
* Convert inline nodes (text, links, formatting)
*/
function convertInlineNodes(nodes: any[]): LexicalContent[] {
const result: LexicalContent[] = []
for (const node of nodes) {
// Handle text nodes (html-parse-stringify uses type for text)
if (node.type === 'text') {
const text = (node.value || node.content || '') as string
if (text) {
result.push(createTextNode(text))
}
continue
}
// Skip if no element name (not an element)
if (!node.name && !node.type) continue
const tag = node.name.toLowerCase()
// Handle links
// NOTE: Payload's Lexical link validation is very strict. For now, convert links to text
// TODO: Implement proper link format after investigating Payload's link node requirements
if (tag === 'a') {
// Convert links to text with URL in parentheses
const text = extractText(node.children || [])
const href = node.attrs?.href || node.attributes?.href || ''
if (text) {
// Include URL as text for now
const linkText = href && href !== '#' ? `${text} (${href})` : text
result.push(createTextNode(linkText))
}
continue
}
// Handle bold (strong, b)
if (tag === 'strong' || tag === 'b') {
const text = extractText(node.children || [])
result.push(createTextNode(text, 1)) // Bold format
continue
}
// Handle italic (em, i)
if (tag === 'em' || tag === 'i') {
const text = extractText(node.children || [])
result.push(createTextNode(text, 2)) // Italic format
continue
}
// Handle underline (u)
if (tag === 'u') {
const text = extractText(node.children || [])
result.push(createTextNode(text, 4)) // Underline format
continue
}
// Handle images inline
if (tag === 'img') {
const src = node.attrs?.src || node.attributes?.src || ''
const alt = node.attrs?.alt || node.attributes?.alt || ''
result.push(createImageNode(src, alt))
continue
}
// Handle spans (treat as text)
if (tag === 'span') {
const text = extractText(node.children || [])
if (text) {
result.push(createTextNode(text))
}
continue
}
// Handle code
if (tag === 'code') {
const text = extractText(node.children || [])
result.push({
...createTextNode(text),
style: 'font-family: monospace;',
})
continue
}
// Recursively handle other inline elements
const children = convertInlineNodes(node.children || [])
result.push(...children)
}
return result.length > 0 ? result : [createTextNode('')]
}
/**
* Create a heading node
*/
function createHeading(tag: string, children: any[]): LexicalElementNode {
const tagNum = parseInt(tag.substring(1), 10)
const inlineNodes = convertInlineNodes(children)
return {
type: 'heading',
version: 1,
tag: `h${tagNum}`,
children: inlineNodes.length > 0 ? inlineNodes : [createTextNode('')],
}
}
/**
* Create an image node
*/
function createImageNode(src: string, alt: string): LexicalElementNode {
return {
type: 'paragraph',
version: 1,
children: [
{
...createTextNode(`[Image: ${alt || src}]`),
style: 'font-style: italic;',
},
],
}
}
/**
* Extract plain text from nodes
*/
function extractText(nodes: any[]): string {
let text = ''
for (const node of nodes) {
if (node.type === 'text') {
text += node.value || node.content || ''
} else if (node.children) {
text += extractText(node.children)
} else if (node.content) {
text += node.content
}
}
return text
}
// ============================================================
// UTILITY FUNCTIONS
// ============================================================
/**
* Check if a string is valid Lexical JSON
*/
export function isValidLexical(json: string): boolean {
try {
const parsed = JSON.parse(json)
return parsed?.type === 'root' && Array.isArray(parsed?.children)
} catch {
return false
}
}
/**
* Convert multiple HTML contents to Lexical format
*/
export function batchHtmlToLexical(htmlArray: string[]): string[] {
return htmlArray.map((html) => htmlToLexical(html))
}