/**
* HTML to Lexical JSON Converter
* Story 1.3: Content Migration Script
*
* Converts HTML content to Payload CMS Lexical editor format
*/
import { parse } from 'html-parse-stringify'
// ============================================================
// LEXICAL JSON TYPES
// ============================================================
interface LexicalNode {
type: string
version: number
[key: string]: any
}
interface LexicalTextContent {
type: 'text'
version: 1
detail?: { 0: any; 1: any }
format?: number
mode?: string
style?: string
text: string
}
interface LexicalElementNode {
type: 'element' | 'heading' | 'link' | 'list' | 'listitem' | 'quote' | 'paragraph'
version: 1
children: LexicalContent[]
direction?: 'ltr' | 'rtl' | null
format?: '' | 'left' | 'start' | 'center' | 'right' | 'end' | 'justify'
indent?: number
tag?: string
listType?: 'bullet' | 'number'
rel?: null | string
target?: null | string
title?: null | string
url?: string
}
interface LexicalLinebreakNode {
type: 'linebreak'
version: 1
}
interface LexicalRoot {
type: 'root'
version: 1
children: LexicalElementNode[]
direction: 'ltr' | 'rtl' | null
}
type LexicalContent = LexicalTextContent | LexicalElementNode | LexicalLinebreakNode
// ============================================================
// HTML TO LEXICAL CONVERTER
// ============================================================
/**
* Convert HTML string to Lexical JSON format (returns object for Payload local API)
*
* IMPORTANT: Payload's richText field expects content wrapped in { "root": {...} } structure
*/
export function htmlToLexical(html: string): string {
if (!html || typeof html !== 'string') {
return createEmptyLexical()
}
// Clean the HTML first
const cleanedHtml = cleanHtml(html)
try {
const ast = parse(cleanedHtml)
const children = convertNodes(ast)
// Clean up empty text nodes that Payload doesn't accept
const cleanedChildren = cleanEmptyTextNodes(children)
const lexicalObject = {
type: 'root',
version: 1,
children: cleanedChildren.length > 0 ? cleanedChildren : [createEmptyParagraph()],
direction: null,
} satisfies LexicalRoot
// Wrap in { "root": ... } structure for Payload's richText field
// This is the format Payload expects when storing Lexical content
return JSON.stringify({ root: lexicalObject })
} catch (error) {
console.warn('Failed to parse HTML, using fallback:', error)
return createTextLexical(cleanedHtml)
}
}
/**
* Convert HTML string to Lexical object (for direct use with Payload local API)
* Returns { root: LexicalRoot } format for Payload richText field
*/
export function htmlToLexicalObject(html: string): { root: LexicalRoot } {
if (!html || typeof html !== 'string') {
return JSON.parse(createEmptyLexical())
}
// Clean the HTML first
const cleanedHtml = cleanHtml(html)
try {
const ast = parse(cleanedHtml)
const children = convertNodes(ast)
return {
root: {
type: 'root',
version: 1,
children: children.length > 0 ? children : [createEmptyParagraph()],
direction: null,
},
}
} catch (error) {
console.warn('Failed to parse HTML, using fallback:', error)
return JSON.parse(createTextLexical(cleanedHtml))
}
}
/**
* Create empty Lexical JSON structure
*/
function createEmptyLexical(): string {
return JSON.stringify({
root: {
type: 'root',
version: 1,
children: [createEmptyParagraph()],
direction: null,
},
})
}
/**
* Create Lexical JSON with plain text (fallback)
*/
function createTextLexical(text: string): string {
return JSON.stringify({
root: {
type: 'root',
version: 1,
children: [
{
type: 'paragraph',
version: 1,
children: [createTextNode(text)],
},
],
direction: null,
},
})
}
/**
* Create an empty paragraph node
*/
function createEmptyParagraph(): LexicalElementNode {
return {
type: 'paragraph',
version: 1,
children: [createTextNode('')],
}
}
/**
* Clean empty text nodes from Lexical tree
* Payload's Lexical validator rejects empty text nodes
*/
function cleanEmptyTextNodes(nodes: LexicalElementNode[]): LexicalElementNode[] {
return nodes
.map((node) => {
// Clean children recursively
if (node.children && Array.isArray(node.children)) {
const cleanedChildren = node.children
.filter((child: any) => {
// Remove empty text nodes
if (child.type === 'text' && child.text === '') {
return false
}
return true
})
.map((child: any) => {
// If child has children, clean those too
if (child.children && Array.isArray(child.children)) {
return {
...child,
children: child.children.filter((c: any) => {
if (c.type === 'text' && c.text === '') {
return false
}
return true
}),
}
}
return child
})
// If all children were removed, add an empty text node
if (cleanedChildren.length === 0) {
return { ...node, children: [createTextNode('')] }
}
return { ...node, children: cleanedChildren }
}
return node
})
.filter((node) => {
// Remove nodes that became invalid after cleaning
return node.type !== 'linebreak'
})
}
/**
* Clean HTML by removing unwanted elements
*/
function cleanHtml(html: string): string {
return html
// Remove script and style tags
.replace(/