/** * CSV Parser for Webflow Exports * Story 1.3: Content Migration Script * * Parses Webflow CSV export files and converts to WebflowExportData format */ import type { WebflowExportData, WebflowPost, WebflowCategory } from './types' import { readFile } from 'fs/promises' import { parse } from 'csv-parse/sync' // ============================================================ // CSV ROW INTERFACES // ============================================================ interface WebflowPostCsvRow { '文章標題': string 'Slug': string 'Collection ID': string 'Item ID': string 'Archived': string 'Draft': string 'Created On': string 'Updated On': string 'Published On': string '強調圖片': string 'Open Graph 顯示圖片': string '文章簡述': string '發文日期': string '文章分類': string '發文內容': string '是否放在頁尾': string } interface WebflowCategoryCsvRow { name: string slug: string [key: string]: string } interface WebflowPortfolioCsvRow { Name: string Slug: string 'website-link': string 'preview-image': string description: string 'website-type': string tags: string [key: string]: string } // ============================================================ // MAIN CSV PARSER // ============================================================ /** * Parse Webflow CSV file and convert to WebflowExportData */ export async function parseWebflowCSV(filePath: string): Promise { const content = await readFile(filePath, 'utf-8') const records: any[] = parse(content, { columns: true, skip_empty_lines: true, trim: true, }) // Detect collection type from file name or headers if (filePath.includes('行銷放大鏡集') || records[0]?.['文章標題']) { return parsePostsCSV(records as WebflowPostCsvRow[]) } if (filePath.includes('Categories') || filePath.includes('分類')) { return parseCategoriesCSV(records as WebflowCategoryCsvRow[]) } if (filePath.includes('Portfolio') || filePath.includes('作品')) { return parsePortfolioCSV(records as WebflowPortfolioCsvRow[]) } // Default: try to detect from structure if (records[0]?.['文章標題'] || records[0]?.['發文內容']) { return parsePostsCSV(records as WebflowPostCsvRow[]) } return { posts: [], categories: [], portfolio: [] } } // ============================================================ // POSTS CSV PARSER // ============================================================ /** * Parse Posts collection CSV * Webflow CSV headers: 文章標題, Slug, ..., 強調圖片, 發文日期, 文章分類, 發文內容, ... */ function parsePostsCSV(records: WebflowPostCsvRow[]): WebflowExportData { const posts: WebflowPost[] = [] const categoryNames = new Set() for (const row of records) { // Skip archived posts if needed if (row.Archived === 'true') continue // Extract category name const categoryName = row['文章分類'] || '' if (categoryName) { categoryNames.add(categoryName) } // Parse published date const publishedDate = parseWebflowDate(row['發文日期'] || row['Published On'] || row['Created On']) posts.push({ title: row['文章標題'] || '', slug: row.Slug || '', content: row['發文內容'] || '', publishedDate, postCategory: categoryName || undefined, featuredImage: row['強調圖片'] || undefined, seoTitle: undefined, // Could be extracted from content if needed seoDescription: row['文章簡述'] || undefined, excerpt: row['文章簡述'] || undefined, }) } // Generate categories from posts const categories = generateCategoriesFromPosts(Array.from(categoryNames)) return { posts, categories, portfolio: [] } } // ============================================================ // CATEGORIES CSV PARSER // ============================================================ /** * Parse Categories collection CSV */ function parseCategoriesCSV(records: WebflowCategoryCsvRow[]): WebflowExportData { const categories: WebflowCategory[] = [] // Known categories with colors (from story requirements) const knownCategories: Record = { 'google-xiao-xue-tang': '#4285f4', // Google blue 'google-workshop': '#4285f4', 'meta-xiao-xue-tang': '#0668e1', // Meta blue 'meta-workshop': '#0668e1', 'xing-xiao-shi-shi-zui-qian-xian': '#34a853', // Green 'marketing-news': '#34a853', 'enchun-announcements': '#ea4335', // Red '恩群數位最新公告': '#ea4335', } for (const row of records) { const name = row.name || '' const slug = row.slug || '' categories.push({ name, slug, colorHex: knownCategories[slug] || knownCategories[name] || '#0066cc', }) } return { posts: [], categories, portfolio: [] } } // ============================================================ // PORTFOLIO CSV PARSER // ============================================================ /** * Parse Portfolio collection CSV */ function parsePortfolioCSV(records: WebflowPortfolioCsvRow[]): WebflowExportData { const portfolio: any[] = [] for (const row of records) { // Map website type strings to enum values const typeMapping: Record = { 'corporate': 'corporate', 'ecommerce': 'ecommerce', 'landing': 'landing', 'brand': 'brand', } const websiteType = typeMapping[row['website-type']?.toLowerCase()] || 'other' portfolio.push({ name: row.Name || '', slug: row.Slug || '', websiteLink: row['website-link'] || '', previewImage: row['preview-image'] || '', description: row.description || '', websiteType, tags: row.tags || '', }) } return { posts: [], categories: [], portfolio } } // ============================================================ // HELPER FUNCTIONS // ============================================================ /** * Parse Webflow date format to Date object * Webflow dates: "Thu Jan 20 2022 00:00:00 GMT+0000 (Coordinated Universal Time)" */ function parseWebflowDate(dateStr: string): Date { if (!dateStr) return new Date() // Remove timezone info and parse const cleanDate = dateStr.replace(/\(.*\)$/, '').trim() const parsed = new Date(cleanDate) return isNaN(parsed.getTime()) ? new Date() : parsed } /** * Generate category objects from category names found in posts */ function generateCategoriesFromPosts(categoryNames: string[]): WebflowCategory[] { const nameToSlug: Record = { 'Google小學堂': 'google-xiao-xue-tang', 'Meta小學堂': 'meta-xiao-xue-tang', '行銷時事最前線': 'xing-xiao-shi-shi-zui-qian-xian', '恩群數位最新公告': 'enchun-announcements', } const slugToColor: Record = { 'google-xiao-xue-tang': '#4285f4', 'meta-xiao-xue-tang': '#0668e1', 'xing-xiao-shi-shi-zui-qian-xian': '#34a853', 'enchun-announcements': '#ea4335', } const categories: WebflowCategory[] = [] const seen = new Set() for (const name of categoryNames) { if (seen.has(name)) continue seen.add(name) const slug = nameToSlug[name] || toSlug(name) const colorHex = slugToColor[slug] || '#0066cc' categories.push({ name, slug, colorHex }) } return categories } /** * Convert string to URL-friendly slug (Chinese-friendly) */ function toSlug(value: string): string { return value .toString() .toLowerCase() .trim() .normalize('NFD') .replace(/[\u0300-\u036f]/g, '') .replace(/[^a-z0-9\u4e00-\u9fa5/-]/g, '-') .replace(/-+/g, '-') .replace(/^-+|-+$/g, '') } // ============================================================ // BATCH CSV PARSER // ============================================================ /** * Parse multiple CSV files at once */ export async function parseMultipleCSVs(filePaths: string[]): Promise { const combined: WebflowExportData = { posts: [], categories: [], portfolio: [], } for (const filePath of filePaths) { try { const data = await parseWebflowCSV(filePath) if (data.posts) combined.posts?.push(...data.posts) if (data.categories) combined.categories?.push(...data.categories) if (data.portfolio) combined.portfolio?.push(...data.portfolio) } catch (error) { console.error(`Error parsing ${filePath}:`, error) } } // Deduplicate categories by slug if (combined.categories) { const seen = new Set() combined.categories = combined.categories.filter((cat) => { if (seen.has(cat.slug)) return false seen.add(cat.slug) return true }) } return combined }