/** * Deduplication Module * Story 1.3: Content Migration Script * * Checks for existing items to prevent duplicates */ import type { Payload } from 'payload' export interface DuplicateCheckOptions { force?: boolean // Skip deduplication check } // ============================================================ // FIND EXISTING BY SLUG // ============================================================ /** * Check if a document exists by slug */ export async function findBySlug( payload: Payload, collection: string, slug: string, ): Promise<{ exists: boolean; id?: string }> { try { const result = await payload.find({ collection, where: { slug: { equals: slug }, }, limit: 1, depth: 0, }) if (result.docs && result.docs.length > 0) { return { exists: true, id: result.docs[0].id } } return { exists: false } } catch (error) { console.error(`Error checking for duplicate ${collection} with slug "${slug}":`, error) return { exists: false } } } /** * Check if post exists by slug and published date */ export async function findBySlugAndDate( payload: Payload, slug: string, publishedAt: Date, ): Promise<{ exists: boolean; id?: string }> { try { const result = await payload.find({ collection: 'posts', where: { and: [ { slug: { equals: slug }, }, { publishedAt: { equals: publishedAt }, }, ], }, limit: 1, depth: 0, }) if (result.docs && result.docs.length > 0) { return { exists: true, id: result.docs[0].id } } return { exists: false } } catch (error) { console.error(`Error checking for duplicate post with slug "${slug}":`, error) return { exists: false } } } // ============================================================ // BULK EXISTENCE CHECK // ============================================================ /** * Get all existing slugs for a collection */ export async function getAllSlugs(payload: Payload, collection: string): Promise> { try { const result = await payload.find({ collection, limit: 1000, // Adjust based on expected data size depth: 0, select: { slug: true }, }) const slugs = new Set() if (result.docs) { for (const doc of result.docs) { if ('slug' in doc && typeof doc.slug === 'string') { slugs.add(doc.slug) } } } return slugs } catch (error) { console.error(`Error getting existing slugs for ${collection}:`, error) return new Set() } } /** * Get existing posts by slug + date combination */ export async function getExistingPostIdentifiers( payload: Payload, ): Promise> { try { const result = await payload.find({ collection: 'posts', limit: 1000, depth: 0, select: { slug: true, publishedAt: true }, }) const identifiers = new Map() if (result.docs) { for (const doc of result.docs) { if ('slug' in doc && 'publishedAt' in doc) { const key = `${doc.slug}-${doc.publishedAt}` identifiers.set(key, doc.publishedAt as Date) } } } return identifiers } catch (error) { console.error('Error getting existing post identifiers:', error) return new Map() } }