From e532f529a47135f174fce3ba79cc7041d15b3f74 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uro=C5=A1=20Marolt?= Date: Fri, 12 Jan 2024 13:25:37 +0100 Subject: [PATCH] make sure name-website combination is unique in the organizationCacheIdentities table --- ...60063__org-cache-identities-unique-key.sql | 0 ...60063__org-cache-identities-unique-key.sql | 2 + services/apps/data_sink_worker/package.json | 2 +- ...=> fix-org-cache-identities-duplicates.ts} | 72 +++++-------------- 4 files changed, 19 insertions(+), 57 deletions(-) create mode 100644 backend/src/database/migrations/U1705060063__org-cache-identities-unique-key.sql create mode 100644 backend/src/database/migrations/V1705060063__org-cache-identities-unique-key.sql rename services/apps/data_sink_worker/src/bin/{migrate-org-cache-to-identities.ts => fix-org-cache-identities-duplicates.ts} (75%) diff --git a/backend/src/database/migrations/U1705060063__org-cache-identities-unique-key.sql b/backend/src/database/migrations/U1705060063__org-cache-identities-unique-key.sql new file mode 100644 index 0000000000..e69de29bb2 diff --git a/backend/src/database/migrations/V1705060063__org-cache-identities-unique-key.sql b/backend/src/database/migrations/V1705060063__org-cache-identities-unique-key.sql new file mode 100644 index 0000000000..d43c4cd069 --- /dev/null +++ b/backend/src/database/migrations/V1705060063__org-cache-identities-unique-key.sql @@ -0,0 +1,2 @@ +alter table "organizationCacheIdentities" + add constraint ix_unique_website_name unique (name, website); \ No newline at end of file diff --git a/services/apps/data_sink_worker/package.json b/services/apps/data_sink_worker/package.json index 6d806742d6..7881889fd0 100644 --- a/services/apps/data_sink_worker/package.json +++ b/services/apps/data_sink_worker/package.json @@ -18,7 +18,7 @@ "script:restart-result": "SERVICE=script TS_NODE_TRANSPILE_ONLY=true node -r tsconfig-paths/register -r ts-node/register src/bin/restart-result.ts", "script:process-results": "SERVICE=script TS_NODE_TRANSPILE_ONLY=true node -r tsconfig-paths/register -r ts-node/register src/bin/process-results.ts", "script:map-tenant-members-to-org": "SERVICE=script TS_NODE_TRANSPILE_ONLY=true node -r tsconfig-paths/register -r ts-node/register src/bin/map-tenant-members-to-org.ts", - "script:migrate-org-caches": "SERVICE=script TS_NODE_TRANSPILE_ONLY=true node -r tsconfig-paths/register -r ts-node/register src/bin/migrate-org-cache-to-identities.ts", + "script:fix-org-cache-identities-duplicates": "SERVICE=script TS_NODE_TRANSPILE_ONLY=true node -r tsconfig-paths/register -r ts-node/register src/bin/fix-org-cache-identities-duplicates.ts", "script:map-member-to-org": "SERVICE=script TS_NODE_TRANSPILE_ONLY=true node -r tsconfig-paths/register -r ts-node/register src/bin/map-member-to-org.ts" }, "dependencies": { diff --git a/services/apps/data_sink_worker/src/bin/migrate-org-cache-to-identities.ts b/services/apps/data_sink_worker/src/bin/fix-org-cache-identities-duplicates.ts similarity index 75% rename from services/apps/data_sink_worker/src/bin/migrate-org-cache-to-identities.ts rename to services/apps/data_sink_worker/src/bin/fix-org-cache-identities-duplicates.ts index ca6495dab1..cbd3d6348d 100644 --- a/services/apps/data_sink_worker/src/bin/migrate-org-cache-to-identities.ts +++ b/services/apps/data_sink_worker/src/bin/fix-org-cache-identities-duplicates.ts @@ -8,24 +8,22 @@ import { } from '@crowd/database' import { getServiceLogger } from '@crowd/logging' import { DB_CONFIG } from '../conf' -import { distinctBy } from '@crowd/common' /* eslint-disable @typescript-eslint/no-explicit-any */ interface IOrgCacheToMerge { - website: string + name: string ids: string[] } -async function getOrganizationCachesToMigrate(db: DbConnection): Promise { +async function getOrganizationsToFix(db: DbConnection): Promise { const results = await db.any( ` -select oc."oldWebsite", array_agg(oc.id) as ids -from "organizationCaches" oc -where oc."oldWebsite" is not null - and not exists (select 1 from "organizationCacheIdentities" oci where oci.website = oc."oldWebsite") -group by oc."oldWebsite" -limit 100; + select name, array_agg(id) as ids + from "organizationCacheIdentities" where website is null + group by name + having count(*) > 1 + limit 100 `, ) @@ -45,24 +43,6 @@ where id in ($(ids:csv)) return results } -async function createOrgCacheIdentity( - db: DbTransaction, - dbInstance: DbInstance, - id: string, - identities: { name: string; website: string }[], -): Promise { - const prepared = identities.map((i) => { - return { ...i, id } - }) - - const query = dbInstance.helpers.insert( - prepared, - ['id', 'name', 'website'], - 'organizationCacheIdentities', - ) - await db.none(query) -} - async function moveLinksToNewCacheId( db: DbTransaction, fromIds: string[], @@ -120,6 +100,10 @@ async function removeCaches(db: DbTransaction, ids: string[]): Promise { await db.none(`delete from "organizationCaches" where id in ($(ids:csv))`, { ids }) } +async function removeCacheIdentities(db: DbTransaction, ids: string[]): Promise { + await db.none(`delete from "organizationCacheIdentities" where id in ($(ids:csv))`, { ids }) +} + const columnsToIgnore = [ 'id', 'createdAt', @@ -201,47 +185,23 @@ async function processOrgCache( } } - let identities = caches.map((c) => { - return { - name: c.oldName, - website: c.oldWebsite, - } - }) - - identities = distinctBy(identities, (i) => `${i.name}:${i.website}`) - try { await db.tx(async (tx) => { - await createOrgCacheIdentity(tx, dbInstance, data.id, identities) if (Object.keys(toUpdate).length > 0) { await updateOrganizationCacheData(dbInstance, tx, data.id, toUpdate) } const cacheIdsToRemove = caches.filter((c) => c.id !== data.id).map((c) => c.id) + await moveLinksToNewCacheId(tx, cacheIdsToRemove, data.id) + await removeCacheIdentities(tx, cacheIdsToRemove) await removeCaches(tx, cacheIdsToRemove) }) } catch (err) { log.error(err, { id: data.id }, 'Error while processing organization caches!') throw err } - } else if (ids.length === 1) { - if (caches.length !== 1) { - throw new Error(`Did not find org cache for id: ${ids[0]}`) - } - // no need to merge datapoints just extract identity into organizationCacheIdentities - const data = caches[0] - try { - await db.tx(async (tx) => { - await createOrgCacheIdentity(tx, dbInstance, data.id, [ - { name: data.oldName, website: data.oldWebsite }, - ]) - }) - } catch (err) { - log.error(err, { id: data.id }, 'Error while processing organization cache!') - throw err - } } else { - throw new Error(`No ids found!`) + throw new Error('should not happen') } } @@ -249,7 +209,7 @@ setImmediate(async () => { const db = await getDbConnection(DB_CONFIG()) const dbInstance = getDbInstance() - let results = await getOrganizationCachesToMigrate(db) + let results = await getOrganizationsToFix(db) let count = 0 while (results.length > 0) { @@ -259,7 +219,7 @@ setImmediate(async () => { log.info({ count }, `Processed org cache!`) } - results = await getOrganizationCachesToMigrate(db) + results = await getOrganizationsToFix(db) } process.exit(0)