From 7ac1d4c4f3e98bc00fb6d10f5c1124b72f4cd8f8 Mon Sep 17 00:00:00 2001 From: Josias Date: Thu, 2 Jan 2025 14:23:05 +0100 Subject: [PATCH] update posts in batches --- scripts/regenerate-og.js | 96 +++++++++++++++++++++------------------- 1 file changed, 50 insertions(+), 46 deletions(-) diff --git a/scripts/regenerate-og.js b/scripts/regenerate-og.js index 26026eb9..eab853c7 100644 --- a/scripts/regenerate-og.js +++ b/scripts/regenerate-og.js @@ -29,8 +29,8 @@ export function extractOgUrl(htmlDoc) { if (!result) return; let index = result.index; - for(;;) { - if (htmlDoc[index] === "/" && htmlDoc[index+1] === ">") break; + for (; ;) { + if (htmlDoc[index] === "/" && htmlDoc[index + 1] === ">") break; if (htmlDoc[index] === ">") break; index++; } @@ -92,7 +92,7 @@ async function processPosts() { gt: startDate } }, - }); + }); while (processed <= postsWithPotentiallyOGImages.length) { console.log("Processing posts", processed, "to", processed + 100); @@ -102,51 +102,55 @@ async function processPosts() { } async function regenerateOGImages(posts) { - // this is the date when fallbacks to OG images was originally introduced - - posts.forEach(async post => { - console.log("Working on post", post.id); - // check if the post has an image that is hosted on `imgutil.s3.us-east-2.amazonaws.com` and it's actually an image - const imageWasOnBucky = image => image.includes('imgutil.s3.us-east-2.amazonaws.com') && ["jpg", "jpeg", "png", "gif", "webp", "heic"].some(ext => image.toLowerCase().endsWith(ext)) - const attachmentsOnBucky = post.attachments.filter(attachment => imageWasOnBucky(attachment)); - const attachmentesNotOnBucky = post.attachments.filter(attachment => !imageWasOnBucky(attachment)); - - if (post.attachments.length > 0 && attachmentsOnBucky.length === 0) return; - - const urls = getUrls(post.text); - if (!urls || urls.length === 0) return; - - const regeneratedOGs = await Promise.all(urls.map(async url => { - try { - - const pageContent = await getPageContent(url); - const ogUrls = extractOgUrl(pageContent); - - if (ogUrls.length === 0) return null; - - let imageUrl = await getAndUploadOgImage(ogUrls); - return imageUrl; - } catch (error) { - console.log("Failed to update OG image", url, error); - return null; - } - })); - - const updatedAttachments = [...attachmentesNotOnBucky, ...regeneratedOGs.filter(a => a !== null)]; - - // update the attachments - await prismaClient.updates.update({ - where: { - id: post.id - }, - data: { - attachments: updatedAttachments - } + return new Promise((resolve, reject) => { + // this is the date when fallbacks to OG images was originally introduced + const prismaClient = new PrismaClient(); + Promise.all(posts.map(async post => { + console.log("Working on post", post.id); + // check if the post has an image that is hosted on `imgutil.s3.us-east-2.amazonaws.com` and it's actually an image + const imageWasOnBucky = image => image.includes('imgutil.s3.us-east-2.amazonaws.com') && ["jpg", "jpeg", "png", "gif", "webp", "heic"].some(ext => image.toLowerCase().endsWith(ext)) + const attachmentsOnBucky = post.attachments.filter(attachment => imageWasOnBucky(attachment)); + const attachmentesNotOnBucky = post.attachments.filter(attachment => !imageWasOnBucky(attachment)); + + if (post.attachments.length > 0 && attachmentsOnBucky.length === 0) return; + + const urls = getUrls(post.text); + if (!urls || urls.length === 0) return; + + const regeneratedOGs = await Promise.all(urls.map(async url => { + try { + + const pageContent = await getPageContent(url); + const ogUrls = extractOgUrl(pageContent); + + if (ogUrls.length === 0) return null; + + let imageUrl = await getAndUploadOgImage(ogUrls); + return imageUrl; + } catch (error) { + console.log("Failed to update OG image", url, error); + return null; + } + })); + + const updatedAttachments = [...attachmentesNotOnBucky, ...regeneratedOGs.filter(a => a !== null)]; + + // update the attachments + await prismaClient.updates.update({ + where: { + id: post.id + }, + data: { + attachments: updatedAttachments + } + }); + + console.log("Updated post attachments", post.id); + })).then(() => { + resolve(); }); - - console.log("Updated post attachments", post.id); + console.log("Done!"); }); - console.log("Done!"); } processPosts(); \ No newline at end of file