#!/usr/bin/env tsx /** * Webflow to Payload CMS Migration Script * Story 1.3: Content Migration Script * * Usage: * pnpm tsx scripts/migration/migrate.ts [options] * * Options: * --dry-run, -n Run without making changes * --verbose, -v Show detailed logging * --force, -f Overwrite existing items * --collection, -c Specific collection (categories|posts|portfolio|all) * --source, -s Path to export file * --batch-size Batch size for processing (default: 5) * --help, -h Show help message */ import { config as dotenvConfig } from 'dotenv' // Load .env before any other imports dotenvConfig({ path: '.env' }) // Ensure R2_BUCKET_NAME is set (from R2_BUCKET) if (!process.env.R2_BUCKET_NAME && process.env.R2_BUCKET) { process.env.R2_BUCKET_NAME = process.env.R2_BUCKET } import { getPayload } from 'payload' import config from '../../src/payload.config' import { parseCliArgs, Logger, colors } from './utils' import { createReport, updateReport, saveReport, printReportSummary } from './reporter' import { findBySlug, findBySlugAndDate, getAllSlugs, getExistingPostIdentifiers, } from './deduplicator' import { transformCategories, transformPosts, transformPortfolios } from './transformers' import { processMediaUrls, getOrCreateMedia } from './mediaHandler' import { parseWebflowHTML, parseHTMLFile, extractMediaUrls } from './htmlParser' import { parseWebflowCSV } from './csvParser' import type { MigrationConfig, WebflowExportData, PayloadCategory } from './types' import { readFileSync, existsSync } from 'fs' import { extname } from 'path' // ============================================================ // MAIN MIGRATION FUNCTION // ============================================================ async function main() { // Parse CLI arguments const args = process.argv.slice(2) const config_options = parseCliArgs(args) const logger = new Logger(config_options.verbose) const report = createReport(config_options.dryRun) logger.header('🚀 Webflow to Payload CMS Migration') logger.info(`Mode: ${config_options.dryRun ? colors.yellow + 'DRY RUN' + colors.reset : colors.green + 'LIVE' + colors.reset}`) logger.info(`Source: ${config_options.sourcePath}`) logger.info(`Collections: ${config_options.collections.join(', ')}`) // Initialize Payload logger.info('\n📦 Initializing Payload CMS...') const payload = await getPayload({ config }) // Load source data logger.info('\n📂 Loading source data...') const sourceData = await loadSourceData(config_options.sourcePath, logger) if (!sourceData) { logger.error('Failed to load source data') process.exit(1) } // Process based on collections const collectionsToProcess = determineCollections(config_options.collections) // Migration order: Categories first, then Posts/Portfolio let categoryMap = new Map() // slug -> id if (collectionsToProcess.includes('categories')) { categoryMap = await migrateCategories(payload, sourceData, config_options, logger, report) } if (collectionsToProcess.includes('posts')) { await migratePosts(payload, sourceData, config_options, logger, report, categoryMap) } if (collectionsToProcess.includes('portfolio')) { await migratePortfolio(payload, sourceData, config_options, logger, report) } // Generate and save report printReportSummary(report) if (!config_options.dryRun) { await saveReport(report, './apps/backend/reports') } } // ============================================================ // DATA LOADING // ============================================================ async function loadSourceData( sourcePath: string, logger: Logger, ): Promise { // Check if file exists if (!existsSync(sourcePath)) { logger.error(`Source file not found: ${sourcePath}`) logger.info('\nCreating sample data structure for manual entry...') return { posts: [], categories: [ { name: 'Google小學堂', slug: 'google-workshop', colorHex: '#4285f4' }, { name: 'Meta小學堂', slug: 'meta-workshop', colorHex: '#0668e1' }, { name: '行銷時事最前線', slug: 'marketing-news', colorHex: '#34a853' }, { name: '恩群數位最新公告', slug: 'enchun-announcements', colorHex: '#ea4335' }, ], portfolio: [], } } const ext = extname(sourcePath).toLowerCase() try { if (ext === '.csv') { // Parse CSV export (Webflow format) logger.info('Parsing CSV file (Webflow format)...') return await parseWebflowCSV(sourcePath) } else if (ext === '.json') { // Parse JSON export const content = readFileSync(sourcePath, 'utf-8') return JSON.parse(content) as WebflowExportData } else if (ext === '.html' || ext === '.htm') { // Parse HTML file logger.info('Parsing HTML file (this may not capture all data)...') return await parseHTMLFile(sourcePath) } else { // Auto-detect: try CSV first, then JSON, then HTML logger.info('Auto-detecting file format...') try { return await parseWebflowCSV(sourcePath) } catch { try { const content = readFileSync(sourcePath, 'utf-8') return JSON.parse(content) as WebflowExportData } catch { const content = readFileSync(sourcePath, 'utf-8') return parseWebflowHTML(content) } } } } catch (error) { logger.error(`Error loading source data: ${error}`) return null } } // ============================================================ // COLLECTION MIGRATION // ============================================================ async function migrateCategories( payload: any, sourceData: WebflowExportData, config: MigrationConfig, logger: Logger, report: any, ): Promise> { logger.header('\n🏷️ Migrating Categories') const categories = sourceData.categories || [] if (categories.length === 0) { logger.warn('No categories found in source data') return new Map() } logger.info(`Found ${categories.length} categories`) const categoryMap = new Map() const results: any[] = [] let created = 0, skipped = 0, failed = 0 // Get existing slugs for deduplication const existingSlugs = config.force ? new Set() : await getAllSlugs(payload, 'categories') for (const category of categories) { const transformed = transformCategories([category])[0] if (!config.force && existingSlugs.has(transformed.slug)) { logger.debug(`⏭️ Skipping existing category: ${transformed.title}`) skipped++ results.push({ slug: transformed.slug, success: true, skipped: true }) continue } if (config.dryRun) { logger.debug(`✓ Would create category: ${transformed.title}`) created++ results.push({ slug: transformed.slug, success: true }) categoryMap.set(transformed.slug, `dry-run-id-${created}`) continue } try { const result = await payload.create({ collection: 'categories', data: transformed, }) logger.success(`Created category: ${transformed.title}`) created++ results.push({ slug: transformed.slug, success: true, id: result.id }) categoryMap.set(transformed.slug, result.id) } catch (error) { logger.error(`Failed to create category "${transformed.title}": ${error}`) failed++ results.push({ slug: transformed.slug, success: false, error: String(error) }) } } updateReport(report, { collection: 'categories', created, skipped, failed, results, }) logger.info(`Categories: ${created} created, ${skipped} skipped, ${failed} failed`) return categoryMap } async function migratePosts( payload: any, sourceData: WebflowExportData, config: MigrationConfig, logger: Logger, report: any, categoryMap: Map, ): Promise { logger.header('\n📝 Migrating Posts') const posts = sourceData.posts || [] if (posts.length === 0) { logger.warn('No posts found in source data') return } logger.info(`Found ${posts.length} posts`) const results: any[] = [] let created = 0, skipped = 0, failed = 0 // Get existing identifiers for deduplication const existingIds = config.force ? new Map() : await getExistingPostIdentifiers(payload) // Extract media URLs for batch processing const mediaUrls = new Set() for (const post of posts) { if (post.featuredImage) mediaUrls.add(post.featuredImage) } // Process media if (mediaUrls.size > 0 && !config.dryRun) { logger.info(`Processing ${mediaUrls.size} media files...`) const mediaMap = await processMediaUrls(payload, Array.from(mediaUrls), { batchSize: config.batchSize, onProgress: (current, total) => logger.progress(current, total, 'media'), }) logger.success(`Media processing complete`) } for (const post of posts) { const transformed = transformPosts([post])[0] // Resolve category IDs if (post.postCategory && categoryMap.has(post.postCategory)) { transformed.categories = [categoryMap.get(post.postCategory)!] } // Check for duplicates const postKey = `${transformed.slug}-${transformed.publishedAt.toISOString()}` if (!config.force && existingIds.has(postKey)) { logger.debug(`⏭️ Skipping existing post: ${transformed.title}`) skipped++ results.push({ slug: transformed.slug, success: true, skipped: true }) continue } if (config.dryRun) { logger.debug(`✓ Would create post: ${transformed.title}`) created++ results.push({ slug: transformed.slug, success: true }) continue } try { const result = await payload.create({ collection: 'posts', data: transformed, }) logger.success(`Created post: ${transformed.title}`) created++ results.push({ slug: transformed.slug, success: true, id: result.id }) } catch (error) { logger.error(`Failed to create post "${transformed.title}": ${error}`) failed++ results.push({ slug: transformed.slug, success: false, error: String(error) }) } } updateReport(report, { collection: 'posts', created, skipped, failed, results, }) logger.info(`Posts: ${created} created, ${skipped} skipped, ${failed} failed`) } async function migratePortfolio( payload: any, sourceData: WebflowExportData, config: MigrationConfig, logger: Logger, report: any, ): Promise { logger.header('\n💼 Migrating Portfolio') const portfolio = sourceData.portfolio || [] if (portfolio.length === 0) { logger.warn('No portfolio items found in source data') return } logger.info(`Found ${portfolio.length} portfolio items`) const results: any[] = [] let created = 0, skipped = 0, failed = 0 // Get existing slugs const existingSlugs = config.force ? new Set() : await getAllSlugs(payload, 'portfolio') // Extract media URLs const mediaUrls = new Set() for (const item of portfolio) { if (item.previewImage) mediaUrls.add(item.previewImage) } // Process media if (mediaUrls.size > 0 && !config.dryRun) { logger.info(`Processing ${mediaUrls.size} media files...`) await processMediaUrls(payload, Array.from(mediaUrls), { batchSize: config.batchSize, onProgress: (current, total) => logger.progress(current, total, 'media'), }) logger.success(`Media processing complete`) } for (const item of portfolio) { const transformed = transformPortfolios([item])[0] if (!config.force && existingSlugs.has(transformed.slug)) { logger.debug(`⏭️ Skipping existing portfolio: ${transformed.title}`) skipped++ results.push({ slug: transformed.slug, success: true, skipped: true }) continue } if (config.dryRun) { logger.debug(`✓ Would create portfolio: ${transformed.title}`) created++ results.push({ slug: transformed.slug, success: true }) continue } try { const result = await payload.create({ collection: 'portfolio', data: transformed, }) logger.success(`Created portfolio: ${transformed.title}`) created++ results.push({ slug: transformed.slug, success: true, id: result.id }) } catch (error) { logger.error(`Failed to create portfolio "${transformed.title}": ${error}`) failed++ results.push({ slug: transformed.slug, success: false, error: String(error) }) } } updateReport(report, { collection: 'portfolio', created, skipped, failed, results, }) logger.info(`Portfolio: ${created} created, ${skipped} skipped, ${failed} failed`) } // ============================================================ // HELPER FUNCTIONS // ============================================================ function determineCollections(collections: string[]): Array<'categories' | 'posts' | 'portfolio'> { if (collections.includes('all')) { return ['categories', 'posts', 'portfolio'] } return collections as Array<'categories' | 'posts' | 'portfolio'> } // ============================================================ // ENTRY POINT // ============================================================ main().catch((error) => { console.error(`${colors.red}Fatal error:${colors.reset}`, error) process.exit(1) })