feat(backend): update collections, config and migration tools
Update Payload CMS configuration, collections (Audit, Posts), and add migration scripts/reports.
This commit is contained in:
572
apps/backend/scripts/migration/lexicalConverter.ts
Normal file
572
apps/backend/scripts/migration/lexicalConverter.ts
Normal file
@@ -0,0 +1,572 @@
|
||||
/**
|
||||
* HTML to Lexical JSON Converter
|
||||
* Story 1.3: Content Migration Script
|
||||
*
|
||||
* Converts HTML content to Payload CMS Lexical editor format
|
||||
*/
|
||||
|
||||
import { parse } from 'html-parse-stringify'
|
||||
|
||||
// ============================================================
|
||||
// LEXICAL JSON TYPES
|
||||
// ============================================================
|
||||
|
||||
interface LexicalNode {
|
||||
type: string
|
||||
version: number
|
||||
[key: string]: any
|
||||
}
|
||||
|
||||
interface LexicalTextContent {
|
||||
type: 'text'
|
||||
version: 1
|
||||
detail?: { 0: any; 1: any }
|
||||
format?: number
|
||||
mode?: string
|
||||
style?: string
|
||||
text: string
|
||||
}
|
||||
|
||||
interface LexicalElementNode {
|
||||
type: 'element' | 'heading' | 'link' | 'list' | 'listitem' | 'quote' | 'paragraph'
|
||||
version: 1
|
||||
children: LexicalContent[]
|
||||
direction?: 'ltr' | 'rtl' | null
|
||||
format?: '' | 'left' | 'start' | 'center' | 'right' | 'end' | 'justify'
|
||||
indent?: number
|
||||
tag?: string
|
||||
listType?: 'bullet' | 'number'
|
||||
rel?: null | string
|
||||
target?: null | string
|
||||
title?: null | string
|
||||
url?: string
|
||||
}
|
||||
|
||||
interface LexicalLinebreakNode {
|
||||
type: 'linebreak'
|
||||
version: 1
|
||||
}
|
||||
|
||||
interface LexicalRoot {
|
||||
type: 'root'
|
||||
version: 1
|
||||
children: LexicalElementNode[]
|
||||
direction: 'ltr' | 'rtl' | null
|
||||
}
|
||||
|
||||
type LexicalContent = LexicalTextContent | LexicalElementNode | LexicalLinebreakNode
|
||||
|
||||
// ============================================================
|
||||
// HTML TO LEXICAL CONVERTER
|
||||
// ============================================================
|
||||
|
||||
/**
|
||||
* Convert HTML string to Lexical JSON format (returns object for Payload local API)
|
||||
*
|
||||
* IMPORTANT: Payload's richText field expects content wrapped in { "root": {...} } structure
|
||||
*/
|
||||
export function htmlToLexical(html: string): string {
|
||||
if (!html || typeof html !== 'string') {
|
||||
return createEmptyLexical()
|
||||
}
|
||||
|
||||
// Clean the HTML first
|
||||
const cleanedHtml = cleanHtml(html)
|
||||
|
||||
try {
|
||||
const ast = parse(cleanedHtml)
|
||||
const children = convertNodes(ast)
|
||||
|
||||
// Clean up empty text nodes that Payload doesn't accept
|
||||
const cleanedChildren = cleanEmptyTextNodes(children)
|
||||
|
||||
const lexicalObject = {
|
||||
type: 'root',
|
||||
version: 1,
|
||||
children: cleanedChildren.length > 0 ? cleanedChildren : [createEmptyParagraph()],
|
||||
direction: null,
|
||||
} satisfies LexicalRoot
|
||||
|
||||
// Wrap in { "root": ... } structure for Payload's richText field
|
||||
// This is the format Payload expects when storing Lexical content
|
||||
return JSON.stringify({ root: lexicalObject })
|
||||
} catch (error) {
|
||||
console.warn('Failed to parse HTML, using fallback:', error)
|
||||
return createTextLexical(cleanedHtml)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert HTML string to Lexical object (for direct use with Payload local API)
|
||||
* Returns { root: LexicalRoot } format for Payload richText field
|
||||
*/
|
||||
export function htmlToLexicalObject(html: string): { root: LexicalRoot } {
|
||||
if (!html || typeof html !== 'string') {
|
||||
return JSON.parse(createEmptyLexical())
|
||||
}
|
||||
|
||||
// Clean the HTML first
|
||||
const cleanedHtml = cleanHtml(html)
|
||||
|
||||
try {
|
||||
const ast = parse(cleanedHtml)
|
||||
const children = convertNodes(ast)
|
||||
|
||||
return {
|
||||
root: {
|
||||
type: 'root',
|
||||
version: 1,
|
||||
children: children.length > 0 ? children : [createEmptyParagraph()],
|
||||
direction: null,
|
||||
},
|
||||
}
|
||||
} catch (error) {
|
||||
console.warn('Failed to parse HTML, using fallback:', error)
|
||||
return JSON.parse(createTextLexical(cleanedHtml))
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Create empty Lexical JSON structure
|
||||
*/
|
||||
function createEmptyLexical(): string {
|
||||
return JSON.stringify({
|
||||
root: {
|
||||
type: 'root',
|
||||
version: 1,
|
||||
children: [createEmptyParagraph()],
|
||||
direction: null,
|
||||
},
|
||||
})
|
||||
}
|
||||
|
||||
/**
|
||||
* Create Lexical JSON with plain text (fallback)
|
||||
*/
|
||||
function createTextLexical(text: string): string {
|
||||
return JSON.stringify({
|
||||
root: {
|
||||
type: 'root',
|
||||
version: 1,
|
||||
children: [
|
||||
{
|
||||
type: 'paragraph',
|
||||
version: 1,
|
||||
children: [createTextNode(text)],
|
||||
},
|
||||
],
|
||||
direction: null,
|
||||
},
|
||||
})
|
||||
}
|
||||
|
||||
/**
|
||||
* Create an empty paragraph node
|
||||
*/
|
||||
function createEmptyParagraph(): LexicalElementNode {
|
||||
return {
|
||||
type: 'paragraph',
|
||||
version: 1,
|
||||
children: [createTextNode('')],
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Clean empty text nodes from Lexical tree
|
||||
* Payload's Lexical validator rejects empty text nodes
|
||||
*/
|
||||
function cleanEmptyTextNodes(nodes: LexicalElementNode[]): LexicalElementNode[] {
|
||||
return nodes
|
||||
.map((node) => {
|
||||
// Clean children recursively
|
||||
if (node.children && Array.isArray(node.children)) {
|
||||
const cleanedChildren = node.children
|
||||
.filter((child: any) => {
|
||||
// Remove empty text nodes
|
||||
if (child.type === 'text' && child.text === '') {
|
||||
return false
|
||||
}
|
||||
return true
|
||||
})
|
||||
.map((child: any) => {
|
||||
// If child has children, clean those too
|
||||
if (child.children && Array.isArray(child.children)) {
|
||||
return {
|
||||
...child,
|
||||
children: child.children.filter((c: any) => {
|
||||
if (c.type === 'text' && c.text === '') {
|
||||
return false
|
||||
}
|
||||
return true
|
||||
}),
|
||||
}
|
||||
}
|
||||
return child
|
||||
})
|
||||
|
||||
// If all children were removed, add an empty text node
|
||||
if (cleanedChildren.length === 0) {
|
||||
return { ...node, children: [createTextNode('')] }
|
||||
}
|
||||
|
||||
return { ...node, children: cleanedChildren }
|
||||
}
|
||||
return node
|
||||
})
|
||||
.filter((node) => {
|
||||
// Remove nodes that became invalid after cleaning
|
||||
return node.type !== 'linebreak'
|
||||
})
|
||||
}
|
||||
|
||||
/**
|
||||
* Clean HTML by removing unwanted elements
|
||||
*/
|
||||
function cleanHtml(html: string): string {
|
||||
return html
|
||||
// Remove script and style tags
|
||||
.replace(/<script\b[^<]*(?:(?!<\/script>)<[^<]*)*<\/script>/gi, '')
|
||||
.replace(/<style\b[^<]*(?:(?!<\/style>)<[^<]*)*<\/style>/gi, '')
|
||||
// Remove Webflow-specific attributes
|
||||
.replace(/\sdata-[a-z-]+="[^"]*"/gi, '')
|
||||
.replace(/\sclass="[^"]*"/gi, '')
|
||||
// Clean up empty tags
|
||||
.replace(/<p>\s*<\/p>/gi, '')
|
||||
.replace(/<div>\s*<\/div>/gi, '')
|
||||
.trim()
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert HTML AST nodes to Lexical nodes
|
||||
*/
|
||||
function convertNodes(nodes: any[]): LexicalElementNode[] {
|
||||
const result: LexicalElementNode[] = []
|
||||
let currentList: LexicalElementNode | null = null
|
||||
let listItems: LexicalElementNode[] = []
|
||||
|
||||
for (const node of nodes) {
|
||||
// Handle text nodes
|
||||
if (node.type === 'text') {
|
||||
const text = node.value?.trim()
|
||||
if (text) {
|
||||
result.push({
|
||||
type: 'paragraph',
|
||||
version: 1,
|
||||
children: [createTextNode(text)],
|
||||
})
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
if (!node.name) continue
|
||||
|
||||
const tag = node.name.toLowerCase()
|
||||
|
||||
// Handle headings
|
||||
if (['h1', 'h2', 'h3', 'h4', 'h5', 'h6'].includes(tag)) {
|
||||
flushList(result, currentList, listItems)
|
||||
currentList = null
|
||||
listItems = []
|
||||
result.push(createHeading(tag, node.children || []))
|
||||
continue
|
||||
}
|
||||
|
||||
// Handle paragraphs
|
||||
if (tag === 'p') {
|
||||
flushList(result, currentList, listItems)
|
||||
currentList = null
|
||||
listItems = []
|
||||
const content = convertInlineNodes(node.children || [])
|
||||
if (content.length > 0) {
|
||||
result.push({
|
||||
type: 'paragraph',
|
||||
version: 1,
|
||||
children: content,
|
||||
})
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
// Handle lists
|
||||
if (tag === 'ul' || tag === 'ol') {
|
||||
flushList(result, currentList, listItems)
|
||||
currentList = {
|
||||
type: 'list',
|
||||
version: 1,
|
||||
listType: tag === 'ol' ? 'number' : 'bullet',
|
||||
children: [],
|
||||
}
|
||||
listItems = convertListItems(node.children || [])
|
||||
continue
|
||||
}
|
||||
|
||||
// Handle blockquotes
|
||||
if (tag === 'blockquote') {
|
||||
flushList(result, currentList, listItems)
|
||||
currentList = null
|
||||
listItems = []
|
||||
const content = convertInlineNodes(node.children || [])
|
||||
result.push({
|
||||
type: 'quote',
|
||||
version: 1,
|
||||
children: content,
|
||||
})
|
||||
continue
|
||||
}
|
||||
|
||||
// Handle divs (treat as paragraphs)
|
||||
if (tag === 'div') {
|
||||
flushList(result, currentList, listItems)
|
||||
currentList = null
|
||||
listItems = []
|
||||
const content = convertInlineNodes(node.children || [])
|
||||
if (content.length > 0) {
|
||||
result.push({
|
||||
type: 'paragraph',
|
||||
version: 1,
|
||||
children: content,
|
||||
})
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
// Handle line breaks and horizontal rules
|
||||
if (tag === 'br') {
|
||||
result.push({
|
||||
type: 'paragraph',
|
||||
version: 1,
|
||||
children: [{ type: 'linebreak', version: 1 } as any],
|
||||
})
|
||||
continue
|
||||
}
|
||||
|
||||
if (tag === 'hr') {
|
||||
result.push({
|
||||
type: 'paragraph',
|
||||
version: 1,
|
||||
children: [createTextNode('---')],
|
||||
})
|
||||
continue
|
||||
}
|
||||
|
||||
// Handle images
|
||||
if (tag === 'img') {
|
||||
flushList(result, currentList, listItems)
|
||||
currentList = null
|
||||
listItems = []
|
||||
const src = node.attributes?.src || ''
|
||||
const alt = node.attributes?.alt || ''
|
||||
result.push(createImageNode(src, alt))
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
// Flush any remaining list
|
||||
flushList(result, currentList, listItems)
|
||||
|
||||
return result.length > 0 ? result : [createEmptyParagraph()]
|
||||
}
|
||||
|
||||
/**
|
||||
* Flush pending list items to result
|
||||
*/
|
||||
function flushList(
|
||||
result: LexicalElementNode[],
|
||||
list: LexicalElementNode | null,
|
||||
items: LexicalElementNode[],
|
||||
): void {
|
||||
if (list && items.length > 0) {
|
||||
list.children = items
|
||||
result.push(list)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert list items (li) to Lexical format
|
||||
*/
|
||||
function convertListItems(items: any[]): LexicalElementNode[] {
|
||||
return items
|
||||
.filter((item) => item.name?.toLowerCase() === 'li')
|
||||
.map((item) => ({
|
||||
type: 'listitem',
|
||||
version: 1,
|
||||
children: convertInlineNodes(item.children || []),
|
||||
}))
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a standard text node with all required Lexical properties
|
||||
*/
|
||||
function createTextNode(text: string, format?: number): LexicalTextContent {
|
||||
return {
|
||||
type: 'text',
|
||||
version: 1,
|
||||
text,
|
||||
detail: 0,
|
||||
format: format ?? 0,
|
||||
mode: 'normal',
|
||||
style: '',
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert inline nodes (text, links, formatting)
|
||||
*/
|
||||
function convertInlineNodes(nodes: any[]): LexicalContent[] {
|
||||
const result: LexicalContent[] = []
|
||||
|
||||
for (const node of nodes) {
|
||||
// Handle text nodes (html-parse-stringify uses type for text)
|
||||
if (node.type === 'text') {
|
||||
const text = (node.value || node.content || '') as string
|
||||
if (text) {
|
||||
result.push(createTextNode(text))
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
// Skip if no element name (not an element)
|
||||
if (!node.name && !node.type) continue
|
||||
|
||||
const tag = node.name.toLowerCase()
|
||||
|
||||
// Handle links
|
||||
// NOTE: Payload's Lexical link validation is very strict. For now, convert links to text
|
||||
// TODO: Implement proper link format after investigating Payload's link node requirements
|
||||
if (tag === 'a') {
|
||||
// Convert links to text with URL in parentheses
|
||||
const text = extractText(node.children || [])
|
||||
const href = node.attrs?.href || node.attributes?.href || ''
|
||||
if (text) {
|
||||
// Include URL as text for now
|
||||
const linkText = href && href !== '#' ? `${text} (${href})` : text
|
||||
result.push(createTextNode(linkText))
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
// Handle bold (strong, b)
|
||||
if (tag === 'strong' || tag === 'b') {
|
||||
const text = extractText(node.children || [])
|
||||
result.push(createTextNode(text, 1)) // Bold format
|
||||
continue
|
||||
}
|
||||
|
||||
// Handle italic (em, i)
|
||||
if (tag === 'em' || tag === 'i') {
|
||||
const text = extractText(node.children || [])
|
||||
result.push(createTextNode(text, 2)) // Italic format
|
||||
continue
|
||||
}
|
||||
|
||||
// Handle underline (u)
|
||||
if (tag === 'u') {
|
||||
const text = extractText(node.children || [])
|
||||
result.push(createTextNode(text, 4)) // Underline format
|
||||
continue
|
||||
}
|
||||
|
||||
// Handle images inline
|
||||
if (tag === 'img') {
|
||||
const src = node.attrs?.src || node.attributes?.src || ''
|
||||
const alt = node.attrs?.alt || node.attributes?.alt || ''
|
||||
result.push(createImageNode(src, alt))
|
||||
continue
|
||||
}
|
||||
|
||||
// Handle spans (treat as text)
|
||||
if (tag === 'span') {
|
||||
const text = extractText(node.children || [])
|
||||
if (text) {
|
||||
result.push(createTextNode(text))
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
// Handle code
|
||||
if (tag === 'code') {
|
||||
const text = extractText(node.children || [])
|
||||
result.push({
|
||||
...createTextNode(text),
|
||||
style: 'font-family: monospace;',
|
||||
})
|
||||
continue
|
||||
}
|
||||
|
||||
// Recursively handle other inline elements
|
||||
const children = convertInlineNodes(node.children || [])
|
||||
result.push(...children)
|
||||
}
|
||||
|
||||
return result.length > 0 ? result : [createTextNode('')]
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a heading node
|
||||
*/
|
||||
function createHeading(tag: string, children: any[]): LexicalElementNode {
|
||||
const tagNum = parseInt(tag.substring(1), 10)
|
||||
const inlineNodes = convertInlineNodes(children)
|
||||
return {
|
||||
type: 'heading',
|
||||
version: 1,
|
||||
tag: `h${tagNum}`,
|
||||
children: inlineNodes.length > 0 ? inlineNodes : [createTextNode('')],
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Create an image node
|
||||
*/
|
||||
function createImageNode(src: string, alt: string): LexicalElementNode {
|
||||
return {
|
||||
type: 'paragraph',
|
||||
version: 1,
|
||||
children: [
|
||||
{
|
||||
...createTextNode(`[Image: ${alt || src}]`),
|
||||
style: 'font-style: italic;',
|
||||
},
|
||||
],
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract plain text from nodes
|
||||
*/
|
||||
function extractText(nodes: any[]): string {
|
||||
let text = ''
|
||||
for (const node of nodes) {
|
||||
if (node.type === 'text') {
|
||||
text += node.value || node.content || ''
|
||||
} else if (node.children) {
|
||||
text += extractText(node.children)
|
||||
} else if (node.content) {
|
||||
text += node.content
|
||||
}
|
||||
}
|
||||
return text
|
||||
}
|
||||
|
||||
// ============================================================
|
||||
// UTILITY FUNCTIONS
|
||||
// ============================================================
|
||||
|
||||
/**
|
||||
* Check if a string is valid Lexical JSON
|
||||
*/
|
||||
export function isValidLexical(json: string): boolean {
|
||||
try {
|
||||
const parsed = JSON.parse(json)
|
||||
return parsed?.type === 'root' && Array.isArray(parsed?.children)
|
||||
} catch {
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert multiple HTML contents to Lexical format
|
||||
*/
|
||||
export function batchHtmlToLexical(htmlArray: string[]): string[] {
|
||||
return htmlArray.map((html) => htmlToLexical(html))
|
||||
}
|
||||
Reference in New Issue
Block a user