Update Payload CMS configuration, collections (Audit, Posts), and add migration scripts/reports.
573 lines
14 KiB
TypeScript
573 lines
14 KiB
TypeScript
/**
|
|
* HTML to Lexical JSON Converter
|
|
* Story 1.3: Content Migration Script
|
|
*
|
|
* Converts HTML content to Payload CMS Lexical editor format
|
|
*/
|
|
|
|
import { parse } from 'html-parse-stringify'
|
|
|
|
// ============================================================
|
|
// LEXICAL JSON TYPES
|
|
// ============================================================
|
|
|
|
interface LexicalNode {
|
|
type: string
|
|
version: number
|
|
[key: string]: any
|
|
}
|
|
|
|
interface LexicalTextContent {
|
|
type: 'text'
|
|
version: 1
|
|
detail?: { 0: any; 1: any }
|
|
format?: number
|
|
mode?: string
|
|
style?: string
|
|
text: string
|
|
}
|
|
|
|
interface LexicalElementNode {
|
|
type: 'element' | 'heading' | 'link' | 'list' | 'listitem' | 'quote' | 'paragraph'
|
|
version: 1
|
|
children: LexicalContent[]
|
|
direction?: 'ltr' | 'rtl' | null
|
|
format?: '' | 'left' | 'start' | 'center' | 'right' | 'end' | 'justify'
|
|
indent?: number
|
|
tag?: string
|
|
listType?: 'bullet' | 'number'
|
|
rel?: null | string
|
|
target?: null | string
|
|
title?: null | string
|
|
url?: string
|
|
}
|
|
|
|
interface LexicalLinebreakNode {
|
|
type: 'linebreak'
|
|
version: 1
|
|
}
|
|
|
|
interface LexicalRoot {
|
|
type: 'root'
|
|
version: 1
|
|
children: LexicalElementNode[]
|
|
direction: 'ltr' | 'rtl' | null
|
|
}
|
|
|
|
type LexicalContent = LexicalTextContent | LexicalElementNode | LexicalLinebreakNode
|
|
|
|
// ============================================================
|
|
// HTML TO LEXICAL CONVERTER
|
|
// ============================================================
|
|
|
|
/**
|
|
* Convert HTML string to Lexical JSON format (returns object for Payload local API)
|
|
*
|
|
* IMPORTANT: Payload's richText field expects content wrapped in { "root": {...} } structure
|
|
*/
|
|
export function htmlToLexical(html: string): string {
|
|
if (!html || typeof html !== 'string') {
|
|
return createEmptyLexical()
|
|
}
|
|
|
|
// Clean the HTML first
|
|
const cleanedHtml = cleanHtml(html)
|
|
|
|
try {
|
|
const ast = parse(cleanedHtml)
|
|
const children = convertNodes(ast)
|
|
|
|
// Clean up empty text nodes that Payload doesn't accept
|
|
const cleanedChildren = cleanEmptyTextNodes(children)
|
|
|
|
const lexicalObject = {
|
|
type: 'root',
|
|
version: 1,
|
|
children: cleanedChildren.length > 0 ? cleanedChildren : [createEmptyParagraph()],
|
|
direction: null,
|
|
} satisfies LexicalRoot
|
|
|
|
// Wrap in { "root": ... } structure for Payload's richText field
|
|
// This is the format Payload expects when storing Lexical content
|
|
return JSON.stringify({ root: lexicalObject })
|
|
} catch (error) {
|
|
console.warn('Failed to parse HTML, using fallback:', error)
|
|
return createTextLexical(cleanedHtml)
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Convert HTML string to Lexical object (for direct use with Payload local API)
|
|
* Returns { root: LexicalRoot } format for Payload richText field
|
|
*/
|
|
export function htmlToLexicalObject(html: string): { root: LexicalRoot } {
|
|
if (!html || typeof html !== 'string') {
|
|
return JSON.parse(createEmptyLexical())
|
|
}
|
|
|
|
// Clean the HTML first
|
|
const cleanedHtml = cleanHtml(html)
|
|
|
|
try {
|
|
const ast = parse(cleanedHtml)
|
|
const children = convertNodes(ast)
|
|
|
|
return {
|
|
root: {
|
|
type: 'root',
|
|
version: 1,
|
|
children: children.length > 0 ? children : [createEmptyParagraph()],
|
|
direction: null,
|
|
},
|
|
}
|
|
} catch (error) {
|
|
console.warn('Failed to parse HTML, using fallback:', error)
|
|
return JSON.parse(createTextLexical(cleanedHtml))
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Create empty Lexical JSON structure
|
|
*/
|
|
function createEmptyLexical(): string {
|
|
return JSON.stringify({
|
|
root: {
|
|
type: 'root',
|
|
version: 1,
|
|
children: [createEmptyParagraph()],
|
|
direction: null,
|
|
},
|
|
})
|
|
}
|
|
|
|
/**
|
|
* Create Lexical JSON with plain text (fallback)
|
|
*/
|
|
function createTextLexical(text: string): string {
|
|
return JSON.stringify({
|
|
root: {
|
|
type: 'root',
|
|
version: 1,
|
|
children: [
|
|
{
|
|
type: 'paragraph',
|
|
version: 1,
|
|
children: [createTextNode(text)],
|
|
},
|
|
],
|
|
direction: null,
|
|
},
|
|
})
|
|
}
|
|
|
|
/**
|
|
* Create an empty paragraph node
|
|
*/
|
|
function createEmptyParagraph(): LexicalElementNode {
|
|
return {
|
|
type: 'paragraph',
|
|
version: 1,
|
|
children: [createTextNode('')],
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Clean empty text nodes from Lexical tree
|
|
* Payload's Lexical validator rejects empty text nodes
|
|
*/
|
|
function cleanEmptyTextNodes(nodes: LexicalElementNode[]): LexicalElementNode[] {
|
|
return nodes
|
|
.map((node) => {
|
|
// Clean children recursively
|
|
if (node.children && Array.isArray(node.children)) {
|
|
const cleanedChildren = node.children
|
|
.filter((child: any) => {
|
|
// Remove empty text nodes
|
|
if (child.type === 'text' && child.text === '') {
|
|
return false
|
|
}
|
|
return true
|
|
})
|
|
.map((child: any) => {
|
|
// If child has children, clean those too
|
|
if (child.children && Array.isArray(child.children)) {
|
|
return {
|
|
...child,
|
|
children: child.children.filter((c: any) => {
|
|
if (c.type === 'text' && c.text === '') {
|
|
return false
|
|
}
|
|
return true
|
|
}),
|
|
}
|
|
}
|
|
return child
|
|
})
|
|
|
|
// If all children were removed, add an empty text node
|
|
if (cleanedChildren.length === 0) {
|
|
return { ...node, children: [createTextNode('')] }
|
|
}
|
|
|
|
return { ...node, children: cleanedChildren }
|
|
}
|
|
return node
|
|
})
|
|
.filter((node) => {
|
|
// Remove nodes that became invalid after cleaning
|
|
return node.type !== 'linebreak'
|
|
})
|
|
}
|
|
|
|
/**
|
|
* Clean HTML by removing unwanted elements
|
|
*/
|
|
function cleanHtml(html: string): string {
|
|
return html
|
|
// Remove script and style tags
|
|
.replace(/<script\b[^<]*(?:(?!<\/script>)<[^<]*)*<\/script>/gi, '')
|
|
.replace(/<style\b[^<]*(?:(?!<\/style>)<[^<]*)*<\/style>/gi, '')
|
|
// Remove Webflow-specific attributes
|
|
.replace(/\sdata-[a-z-]+="[^"]*"/gi, '')
|
|
.replace(/\sclass="[^"]*"/gi, '')
|
|
// Clean up empty tags
|
|
.replace(/<p>\s*<\/p>/gi, '')
|
|
.replace(/<div>\s*<\/div>/gi, '')
|
|
.trim()
|
|
}
|
|
|
|
/**
|
|
* Convert HTML AST nodes to Lexical nodes
|
|
*/
|
|
function convertNodes(nodes: any[]): LexicalElementNode[] {
|
|
const result: LexicalElementNode[] = []
|
|
let currentList: LexicalElementNode | null = null
|
|
let listItems: LexicalElementNode[] = []
|
|
|
|
for (const node of nodes) {
|
|
// Handle text nodes
|
|
if (node.type === 'text') {
|
|
const text = node.value?.trim()
|
|
if (text) {
|
|
result.push({
|
|
type: 'paragraph',
|
|
version: 1,
|
|
children: [createTextNode(text)],
|
|
})
|
|
}
|
|
continue
|
|
}
|
|
|
|
if (!node.name) continue
|
|
|
|
const tag = node.name.toLowerCase()
|
|
|
|
// Handle headings
|
|
if (['h1', 'h2', 'h3', 'h4', 'h5', 'h6'].includes(tag)) {
|
|
flushList(result, currentList, listItems)
|
|
currentList = null
|
|
listItems = []
|
|
result.push(createHeading(tag, node.children || []))
|
|
continue
|
|
}
|
|
|
|
// Handle paragraphs
|
|
if (tag === 'p') {
|
|
flushList(result, currentList, listItems)
|
|
currentList = null
|
|
listItems = []
|
|
const content = convertInlineNodes(node.children || [])
|
|
if (content.length > 0) {
|
|
result.push({
|
|
type: 'paragraph',
|
|
version: 1,
|
|
children: content,
|
|
})
|
|
}
|
|
continue
|
|
}
|
|
|
|
// Handle lists
|
|
if (tag === 'ul' || tag === 'ol') {
|
|
flushList(result, currentList, listItems)
|
|
currentList = {
|
|
type: 'list',
|
|
version: 1,
|
|
listType: tag === 'ol' ? 'number' : 'bullet',
|
|
children: [],
|
|
}
|
|
listItems = convertListItems(node.children || [])
|
|
continue
|
|
}
|
|
|
|
// Handle blockquotes
|
|
if (tag === 'blockquote') {
|
|
flushList(result, currentList, listItems)
|
|
currentList = null
|
|
listItems = []
|
|
const content = convertInlineNodes(node.children || [])
|
|
result.push({
|
|
type: 'quote',
|
|
version: 1,
|
|
children: content,
|
|
})
|
|
continue
|
|
}
|
|
|
|
// Handle divs (treat as paragraphs)
|
|
if (tag === 'div') {
|
|
flushList(result, currentList, listItems)
|
|
currentList = null
|
|
listItems = []
|
|
const content = convertInlineNodes(node.children || [])
|
|
if (content.length > 0) {
|
|
result.push({
|
|
type: 'paragraph',
|
|
version: 1,
|
|
children: content,
|
|
})
|
|
}
|
|
continue
|
|
}
|
|
|
|
// Handle line breaks and horizontal rules
|
|
if (tag === 'br') {
|
|
result.push({
|
|
type: 'paragraph',
|
|
version: 1,
|
|
children: [{ type: 'linebreak', version: 1 } as any],
|
|
})
|
|
continue
|
|
}
|
|
|
|
if (tag === 'hr') {
|
|
result.push({
|
|
type: 'paragraph',
|
|
version: 1,
|
|
children: [createTextNode('---')],
|
|
})
|
|
continue
|
|
}
|
|
|
|
// Handle images
|
|
if (tag === 'img') {
|
|
flushList(result, currentList, listItems)
|
|
currentList = null
|
|
listItems = []
|
|
const src = node.attributes?.src || ''
|
|
const alt = node.attributes?.alt || ''
|
|
result.push(createImageNode(src, alt))
|
|
continue
|
|
}
|
|
}
|
|
|
|
// Flush any remaining list
|
|
flushList(result, currentList, listItems)
|
|
|
|
return result.length > 0 ? result : [createEmptyParagraph()]
|
|
}
|
|
|
|
/**
|
|
* Flush pending list items to result
|
|
*/
|
|
function flushList(
|
|
result: LexicalElementNode[],
|
|
list: LexicalElementNode | null,
|
|
items: LexicalElementNode[],
|
|
): void {
|
|
if (list && items.length > 0) {
|
|
list.children = items
|
|
result.push(list)
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Convert list items (li) to Lexical format
|
|
*/
|
|
function convertListItems(items: any[]): LexicalElementNode[] {
|
|
return items
|
|
.filter((item) => item.name?.toLowerCase() === 'li')
|
|
.map((item) => ({
|
|
type: 'listitem',
|
|
version: 1,
|
|
children: convertInlineNodes(item.children || []),
|
|
}))
|
|
}
|
|
|
|
/**
|
|
* Create a standard text node with all required Lexical properties
|
|
*/
|
|
function createTextNode(text: string, format?: number): LexicalTextContent {
|
|
return {
|
|
type: 'text',
|
|
version: 1,
|
|
text,
|
|
detail: 0,
|
|
format: format ?? 0,
|
|
mode: 'normal',
|
|
style: '',
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Convert inline nodes (text, links, formatting)
|
|
*/
|
|
function convertInlineNodes(nodes: any[]): LexicalContent[] {
|
|
const result: LexicalContent[] = []
|
|
|
|
for (const node of nodes) {
|
|
// Handle text nodes (html-parse-stringify uses type for text)
|
|
if (node.type === 'text') {
|
|
const text = (node.value || node.content || '') as string
|
|
if (text) {
|
|
result.push(createTextNode(text))
|
|
}
|
|
continue
|
|
}
|
|
|
|
// Skip if no element name (not an element)
|
|
if (!node.name && !node.type) continue
|
|
|
|
const tag = node.name.toLowerCase()
|
|
|
|
// Handle links
|
|
// NOTE: Payload's Lexical link validation is very strict. For now, convert links to text
|
|
// TODO: Implement proper link format after investigating Payload's link node requirements
|
|
if (tag === 'a') {
|
|
// Convert links to text with URL in parentheses
|
|
const text = extractText(node.children || [])
|
|
const href = node.attrs?.href || node.attributes?.href || ''
|
|
if (text) {
|
|
// Include URL as text for now
|
|
const linkText = href && href !== '#' ? `${text} (${href})` : text
|
|
result.push(createTextNode(linkText))
|
|
}
|
|
continue
|
|
}
|
|
|
|
// Handle bold (strong, b)
|
|
if (tag === 'strong' || tag === 'b') {
|
|
const text = extractText(node.children || [])
|
|
result.push(createTextNode(text, 1)) // Bold format
|
|
continue
|
|
}
|
|
|
|
// Handle italic (em, i)
|
|
if (tag === 'em' || tag === 'i') {
|
|
const text = extractText(node.children || [])
|
|
result.push(createTextNode(text, 2)) // Italic format
|
|
continue
|
|
}
|
|
|
|
// Handle underline (u)
|
|
if (tag === 'u') {
|
|
const text = extractText(node.children || [])
|
|
result.push(createTextNode(text, 4)) // Underline format
|
|
continue
|
|
}
|
|
|
|
// Handle images inline
|
|
if (tag === 'img') {
|
|
const src = node.attrs?.src || node.attributes?.src || ''
|
|
const alt = node.attrs?.alt || node.attributes?.alt || ''
|
|
result.push(createImageNode(src, alt))
|
|
continue
|
|
}
|
|
|
|
// Handle spans (treat as text)
|
|
if (tag === 'span') {
|
|
const text = extractText(node.children || [])
|
|
if (text) {
|
|
result.push(createTextNode(text))
|
|
}
|
|
continue
|
|
}
|
|
|
|
// Handle code
|
|
if (tag === 'code') {
|
|
const text = extractText(node.children || [])
|
|
result.push({
|
|
...createTextNode(text),
|
|
style: 'font-family: monospace;',
|
|
})
|
|
continue
|
|
}
|
|
|
|
// Recursively handle other inline elements
|
|
const children = convertInlineNodes(node.children || [])
|
|
result.push(...children)
|
|
}
|
|
|
|
return result.length > 0 ? result : [createTextNode('')]
|
|
}
|
|
|
|
/**
|
|
* Create a heading node
|
|
*/
|
|
function createHeading(tag: string, children: any[]): LexicalElementNode {
|
|
const tagNum = parseInt(tag.substring(1), 10)
|
|
const inlineNodes = convertInlineNodes(children)
|
|
return {
|
|
type: 'heading',
|
|
version: 1,
|
|
tag: `h${tagNum}`,
|
|
children: inlineNodes.length > 0 ? inlineNodes : [createTextNode('')],
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Create an image node
|
|
*/
|
|
function createImageNode(src: string, alt: string): LexicalElementNode {
|
|
return {
|
|
type: 'paragraph',
|
|
version: 1,
|
|
children: [
|
|
{
|
|
...createTextNode(`[Image: ${alt || src}]`),
|
|
style: 'font-style: italic;',
|
|
},
|
|
],
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Extract plain text from nodes
|
|
*/
|
|
function extractText(nodes: any[]): string {
|
|
let text = ''
|
|
for (const node of nodes) {
|
|
if (node.type === 'text') {
|
|
text += node.value || node.content || ''
|
|
} else if (node.children) {
|
|
text += extractText(node.children)
|
|
} else if (node.content) {
|
|
text += node.content
|
|
}
|
|
}
|
|
return text
|
|
}
|
|
|
|
// ============================================================
|
|
// UTILITY FUNCTIONS
|
|
// ============================================================
|
|
|
|
/**
|
|
* Check if a string is valid Lexical JSON
|
|
*/
|
|
export function isValidLexical(json: string): boolean {
|
|
try {
|
|
const parsed = JSON.parse(json)
|
|
return parsed?.type === 'root' && Array.isArray(parsed?.children)
|
|
} catch {
|
|
return false
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Convert multiple HTML contents to Lexical format
|
|
*/
|
|
export function batchHtmlToLexical(htmlArray: string[]): string[] {
|
|
return htmlArray.map((html) => htmlToLexical(html))
|
|
}
|