Files
website-enchun-mgr/apps/backend/scripts/migration/migrate.ts
pkupuk be7fc902fb feat(backend): update collections, config and migration tools
Update Payload CMS configuration, collections (Audit, Posts), and add migration scripts/reports.
2026-02-11 11:50:23 +08:00

437 lines
13 KiB
TypeScript
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env tsx
/**
* Webflow to Payload CMS Migration Script
* Story 1.3: Content Migration Script
*
* Usage:
* pnpm tsx scripts/migration/migrate.ts [options]
*
* Options:
* --dry-run, -n Run without making changes
* --verbose, -v Show detailed logging
* --force, -f Overwrite existing items
* --collection, -c Specific collection (categories|posts|portfolio|all)
* --source, -s Path to export file
* --batch-size Batch size for processing (default: 5)
* --help, -h Show help message
*/
import { config as dotenvConfig } from 'dotenv'
// Load .env before any other imports
dotenvConfig({ path: '.env' })
// Ensure R2_BUCKET_NAME is set (from R2_BUCKET)
if (!process.env.R2_BUCKET_NAME && process.env.R2_BUCKET) {
process.env.R2_BUCKET_NAME = process.env.R2_BUCKET
}
import { getPayload } from 'payload'
import config from '../../src/payload.config'
import { parseCliArgs, Logger, colors } from './utils'
import { createReport, updateReport, saveReport, printReportSummary } from './reporter'
import {
findBySlug,
findBySlugAndDate,
getAllSlugs,
getExistingPostIdentifiers,
} from './deduplicator'
import { transformCategories, transformPosts, transformPortfolios } from './transformers'
import { processMediaUrls, getOrCreateMedia } from './mediaHandler'
import { parseWebflowHTML, parseHTMLFile, extractMediaUrls } from './htmlParser'
import { parseWebflowCSV } from './csvParser'
import type { MigrationConfig, WebflowExportData, PayloadCategory } from './types'
import { readFileSync, existsSync } from 'fs'
import { extname } from 'path'
// ============================================================
// MAIN MIGRATION FUNCTION
// ============================================================
async function main() {
// Parse CLI arguments
const args = process.argv.slice(2)
const config_options = parseCliArgs(args)
const logger = new Logger(config_options.verbose)
const report = createReport(config_options.dryRun)
logger.header('🚀 Webflow to Payload CMS Migration')
logger.info(`Mode: ${config_options.dryRun ? colors.yellow + 'DRY RUN' + colors.reset : colors.green + 'LIVE' + colors.reset}`)
logger.info(`Source: ${config_options.sourcePath}`)
logger.info(`Collections: ${config_options.collections.join(', ')}`)
// Initialize Payload
logger.info('\n📦 Initializing Payload CMS...')
const payload = await getPayload({ config })
// Load source data
logger.info('\n📂 Loading source data...')
const sourceData = await loadSourceData(config_options.sourcePath, logger)
if (!sourceData) {
logger.error('Failed to load source data')
process.exit(1)
}
// Process based on collections
const collectionsToProcess = determineCollections(config_options.collections)
// Migration order: Categories first, then Posts/Portfolio
let categoryMap = new Map<string, string>() // slug -> id
if (collectionsToProcess.includes('categories')) {
categoryMap = await migrateCategories(payload, sourceData, config_options, logger, report)
}
if (collectionsToProcess.includes('posts')) {
await migratePosts(payload, sourceData, config_options, logger, report, categoryMap)
}
if (collectionsToProcess.includes('portfolio')) {
await migratePortfolio(payload, sourceData, config_options, logger, report)
}
// Generate and save report
printReportSummary(report)
if (!config_options.dryRun) {
await saveReport(report, './apps/backend/reports')
}
}
// ============================================================
// DATA LOADING
// ============================================================
async function loadSourceData(
sourcePath: string,
logger: Logger,
): Promise<WebflowExportData | null> {
// Check if file exists
if (!existsSync(sourcePath)) {
logger.error(`Source file not found: ${sourcePath}`)
logger.info('\nCreating sample data structure for manual entry...')
return {
posts: [],
categories: [
{ name: 'Google小學堂', slug: 'google-workshop', colorHex: '#4285f4' },
{ name: 'Meta小學堂', slug: 'meta-workshop', colorHex: '#0668e1' },
{ name: '行銷時事最前線', slug: 'marketing-news', colorHex: '#34a853' },
{ name: '恩群數位最新公告', slug: 'enchun-announcements', colorHex: '#ea4335' },
],
portfolio: [],
}
}
const ext = extname(sourcePath).toLowerCase()
try {
if (ext === '.csv') {
// Parse CSV export (Webflow format)
logger.info('Parsing CSV file (Webflow format)...')
return await parseWebflowCSV(sourcePath)
} else if (ext === '.json') {
// Parse JSON export
const content = readFileSync(sourcePath, 'utf-8')
return JSON.parse(content) as WebflowExportData
} else if (ext === '.html' || ext === '.htm') {
// Parse HTML file
logger.info('Parsing HTML file (this may not capture all data)...')
return await parseHTMLFile(sourcePath)
} else {
// Auto-detect: try CSV first, then JSON, then HTML
logger.info('Auto-detecting file format...')
try {
return await parseWebflowCSV(sourcePath)
} catch {
try {
const content = readFileSync(sourcePath, 'utf-8')
return JSON.parse(content) as WebflowExportData
} catch {
const content = readFileSync(sourcePath, 'utf-8')
return parseWebflowHTML(content)
}
}
}
} catch (error) {
logger.error(`Error loading source data: ${error}`)
return null
}
}
// ============================================================
// COLLECTION MIGRATION
// ============================================================
async function migrateCategories(
payload: any,
sourceData: WebflowExportData,
config: MigrationConfig,
logger: Logger,
report: any,
): Promise<Map<string, string>> {
logger.header('\n🏷 Migrating Categories')
const categories = sourceData.categories || []
if (categories.length === 0) {
logger.warn('No categories found in source data')
return new Map()
}
logger.info(`Found ${categories.length} categories`)
const categoryMap = new Map<string, string>()
const results: any[] = []
let created = 0,
skipped = 0,
failed = 0
// Get existing slugs for deduplication
const existingSlugs = config.force ? new Set<string>() : await getAllSlugs(payload, 'categories')
for (const category of categories) {
const transformed = transformCategories([category])[0]
if (!config.force && existingSlugs.has(transformed.slug)) {
logger.debug(`⏭️ Skipping existing category: ${transformed.title}`)
skipped++
results.push({ slug: transformed.slug, success: true, skipped: true })
continue
}
if (config.dryRun) {
logger.debug(`✓ Would create category: ${transformed.title}`)
created++
results.push({ slug: transformed.slug, success: true })
categoryMap.set(transformed.slug, `dry-run-id-${created}`)
continue
}
try {
const result = await payload.create({
collection: 'categories',
data: transformed,
})
logger.success(`Created category: ${transformed.title}`)
created++
results.push({ slug: transformed.slug, success: true, id: result.id })
categoryMap.set(transformed.slug, result.id)
} catch (error) {
logger.error(`Failed to create category "${transformed.title}": ${error}`)
failed++
results.push({ slug: transformed.slug, success: false, error: String(error) })
}
}
updateReport(report, {
collection: 'categories',
created,
skipped,
failed,
results,
})
logger.info(`Categories: ${created} created, ${skipped} skipped, ${failed} failed`)
return categoryMap
}
async function migratePosts(
payload: any,
sourceData: WebflowExportData,
config: MigrationConfig,
logger: Logger,
report: any,
categoryMap: Map<string, string>,
): Promise<void> {
logger.header('\n📝 Migrating Posts')
const posts = sourceData.posts || []
if (posts.length === 0) {
logger.warn('No posts found in source data')
return
}
logger.info(`Found ${posts.length} posts`)
const results: any[] = []
let created = 0,
skipped = 0,
failed = 0
// Get existing identifiers for deduplication
const existingIds = config.force ? new Map<string, Date>() : await getExistingPostIdentifiers(payload)
// Extract media URLs for batch processing
const mediaUrls = new Set<string>()
for (const post of posts) {
if (post.featuredImage) mediaUrls.add(post.featuredImage)
}
// Process media
if (mediaUrls.size > 0 && !config.dryRun) {
logger.info(`Processing ${mediaUrls.size} media files...`)
const mediaMap = await processMediaUrls(payload, Array.from(mediaUrls), {
batchSize: config.batchSize,
onProgress: (current, total) => logger.progress(current, total, 'media'),
})
logger.success(`Media processing complete`)
}
for (const post of posts) {
const transformed = transformPosts([post])[0]
// Resolve category IDs
if (post.postCategory && categoryMap.has(post.postCategory)) {
transformed.categories = [categoryMap.get(post.postCategory)!]
}
// Check for duplicates
const postKey = `${transformed.slug}-${transformed.publishedAt.toISOString()}`
if (!config.force && existingIds.has(postKey)) {
logger.debug(`⏭️ Skipping existing post: ${transformed.title}`)
skipped++
results.push({ slug: transformed.slug, success: true, skipped: true })
continue
}
if (config.dryRun) {
logger.debug(`✓ Would create post: ${transformed.title}`)
created++
results.push({ slug: transformed.slug, success: true })
continue
}
try {
const result = await payload.create({
collection: 'posts',
data: transformed,
})
logger.success(`Created post: ${transformed.title}`)
created++
results.push({ slug: transformed.slug, success: true, id: result.id })
} catch (error) {
logger.error(`Failed to create post "${transformed.title}": ${error}`)
failed++
results.push({ slug: transformed.slug, success: false, error: String(error) })
}
}
updateReport(report, {
collection: 'posts',
created,
skipped,
failed,
results,
})
logger.info(`Posts: ${created} created, ${skipped} skipped, ${failed} failed`)
}
async function migratePortfolio(
payload: any,
sourceData: WebflowExportData,
config: MigrationConfig,
logger: Logger,
report: any,
): Promise<void> {
logger.header('\n💼 Migrating Portfolio')
const portfolio = sourceData.portfolio || []
if (portfolio.length === 0) {
logger.warn('No portfolio items found in source data')
return
}
logger.info(`Found ${portfolio.length} portfolio items`)
const results: any[] = []
let created = 0,
skipped = 0,
failed = 0
// Get existing slugs
const existingSlugs = config.force ? new Set<string>() : await getAllSlugs(payload, 'portfolio')
// Extract media URLs
const mediaUrls = new Set<string>()
for (const item of portfolio) {
if (item.previewImage) mediaUrls.add(item.previewImage)
}
// Process media
if (mediaUrls.size > 0 && !config.dryRun) {
logger.info(`Processing ${mediaUrls.size} media files...`)
await processMediaUrls(payload, Array.from(mediaUrls), {
batchSize: config.batchSize,
onProgress: (current, total) => logger.progress(current, total, 'media'),
})
logger.success(`Media processing complete`)
}
for (const item of portfolio) {
const transformed = transformPortfolios([item])[0]
if (!config.force && existingSlugs.has(transformed.slug)) {
logger.debug(`⏭️ Skipping existing portfolio: ${transformed.title}`)
skipped++
results.push({ slug: transformed.slug, success: true, skipped: true })
continue
}
if (config.dryRun) {
logger.debug(`✓ Would create portfolio: ${transformed.title}`)
created++
results.push({ slug: transformed.slug, success: true })
continue
}
try {
const result = await payload.create({
collection: 'portfolio',
data: transformed,
})
logger.success(`Created portfolio: ${transformed.title}`)
created++
results.push({ slug: transformed.slug, success: true, id: result.id })
} catch (error) {
logger.error(`Failed to create portfolio "${transformed.title}": ${error}`)
failed++
results.push({ slug: transformed.slug, success: false, error: String(error) })
}
}
updateReport(report, {
collection: 'portfolio',
created,
skipped,
failed,
results,
})
logger.info(`Portfolio: ${created} created, ${skipped} skipped, ${failed} failed`)
}
// ============================================================
// HELPER FUNCTIONS
// ============================================================
function determineCollections(collections: string[]): Array<'categories' | 'posts' | 'portfolio'> {
if (collections.includes('all')) {
return ['categories', 'posts', 'portfolio']
}
return collections as Array<'categories' | 'posts' | 'portfolio'>
}
// ============================================================
// ENTRY POINT
// ============================================================
main().catch((error) => {
console.error(`${colors.red}Fatal error:${colors.reset}`, error)
process.exit(1)
})