From 6987580dd4239d43668a6889201bfd10f38c8396 Mon Sep 17 00:00:00 2001 From: Mariano Goldman Date: Tue, 27 Feb 2024 12:23:07 -0300 Subject: [PATCH] chore: script to garbage collect unused files in the storage (#271) --- src/adapters/aws-config.ts | 1 + src/components.ts | 11 ++- .../handlers/garbage-collection.ts | 79 +++++++++++++++++++ src/controllers/routes.ts | 2 + src/types.ts | 3 +- test/integration/garbage-collection.spec.ts | 77 ++++++++++++++++++ test/mocks/world-creator.ts | 6 ++ 7 files changed, 174 insertions(+), 5 deletions(-) create mode 100644 src/controllers/handlers/garbage-collection.ts create mode 100644 test/integration/garbage-collection.spec.ts diff --git a/src/adapters/aws-config.ts b/src/adapters/aws-config.ts index e31810e6..672914a5 100644 --- a/src/adapters/aws-config.ts +++ b/src/adapters/aws-config.ts @@ -16,6 +16,7 @@ export async function createAwsConfig({ config }: Pick) if (awsEndpoint) { awsConfig.endpoint = awsEndpoint awsConfig.forcePathStyle = true + awsConfig.s3ForcePathStyle = true } return awsConfig diff --git a/src/components.ts b/src/components.ts index e3370fb6..a015f180 100644 --- a/src/components.ts +++ b/src/components.ts @@ -8,9 +8,9 @@ import { AppComponents, GlobalContext, ICommsAdapter, INameDenyListChecker, IWor import { metricDeclarations } from './metrics' import { HTTPProvider } from 'eth-connect' import { - createAwsS3BasedFileSystemContentStorage, createFolderBasedFileSystemContentStorage, - createFsComponent + createFsComponent, + createS3BasedFileSystemContentStorage } from '@dcl/catalyst-storage' import { createStatusComponent } from './adapters/status' import { createLimitsManagerComponent } from './adapters/limits-manager' @@ -30,10 +30,12 @@ import { createWalletStatsComponent } from './adapters/wallet-stats' import { createUpdateOwnerJob } from './adapters/update-owner-job' import { createSnsClient } from './adapters/sns-client' import { createAwsConfig } from './adapters/aws-config' +import { S3 } from 'aws-sdk' // Initialize all the components of the app export async function initComponents(): Promise { const config = await createDotEnvConfigComponent({ path: ['.env.default', '.env'] }) + const awsConfig = await createAwsConfig({ config }) const logs = await createLogComponent({ config }) const logger = logs.getLogger('components') @@ -65,14 +67,15 @@ export async function initComponents(): Promise { const fs = createFsComponent() const storage = bucket - ? await createAwsS3BasedFileSystemContentStorage({ config, logs }, bucket) + ? await createS3BasedFileSystemContentStorage({ logs }, new S3(awsConfig), { + Bucket: bucket + }) : await createFolderBasedFileSystemContentStorage({ fs, logs }, storageFolder) const subGraphUrl = await config.requireString('MARKETPLACE_SUBGRAPH_URL') const marketplaceSubGraph = await createSubgraphComponent({ config, logs, metrics, fetch }, subGraphUrl) const status = await createStatusComponent({ logs, fetch, config }) - const awsConfig = await createAwsConfig({ config }) const snsClient = await createSnsClient({ awsConfig }) const nameDenyListChecker: INameDenyListChecker = await createNameDenyListChecker({ diff --git a/src/controllers/handlers/garbage-collection.ts b/src/controllers/handlers/garbage-collection.ts new file mode 100644 index 00000000..a83c403c --- /dev/null +++ b/src/controllers/handlers/garbage-collection.ts @@ -0,0 +1,79 @@ +import { HandlerContextWithPath, WorldRecord } from '../../types' +import SQL from 'sql-template-strings' +import { IHttpServerComponent } from '@well-known-components/interfaces' + +function formatSecs(millis: number): string { + return `${(millis / 1000).toFixed(2)} secs` +} + +export async function garbageCollectionHandler( + context: HandlerContextWithPath<'database' | 'logs' | 'storage', '/gc'> +): Promise { + const { database, logs, storage } = context.components + const logger = logs.getLogger('garbage-collection') + + async function getAllActiveKeys() { + const start = Date.now() + logger.info('Getting all keys active in the database...') + + const activeKeys = new Set() + const result = await database.query( + SQL`SELECT * + FROM worlds + WHERE worlds.entity IS NOT NULL` + ) + result.rows.forEach((row) => { + // Add entity file and deployment auth-chain + activeKeys.add(row.entity_id) + activeKeys.add(`${row.entity_id}.auth`) + + // Add all referenced content files + for (const file of row.entity.content) { + activeKeys.add(file.hash) + } + }) + + logger.info(`Done in ${formatSecs(Date.now() - start)}. Database contains ${activeKeys.size} active keys.`) + + return activeKeys + } + + logger.info('Starting garbage collection...') + + const activeKeys = await getAllActiveKeys() + + logger.info('Getting keys from storage that are not currently active...') + const start = Date.now() + let totalRemovedKeys = 0 + const batch = new Set() + for await (const key of storage.allFileIds()) { + if (!activeKeys.has(key)) { + batch.add(key) + } + + if (batch.size === 1000) { + logger.info(`Deleting a batch of ${batch.size} keys from storage...`) + await storage.delete([...batch]) + totalRemovedKeys += batch.size + batch.clear() + } + } + + if (batch.size > 0) { + logger.info(`Deleting a batch of ${batch.size} keys from storage...`) + await storage.delete([...batch]) + totalRemovedKeys += batch.size + } + logger.info( + `Done in ${formatSecs(Date.now() - start)}. Deleted ${totalRemovedKeys} keys that are not active in the storage.` + ) + + logger.info('Garbage collection finished.') + + return { + status: 200, + body: { + message: `Garbage collection removed ${totalRemovedKeys} unused keys.` + } + } +} diff --git a/src/controllers/routes.ts b/src/controllers/routes.ts index 143613e7..0237a9ed 100644 --- a/src/controllers/routes.ts +++ b/src/controllers/routes.ts @@ -22,6 +22,7 @@ import { walletStatsHandler } from './handlers/wallet-stats-handler' import { undeployEntity } from './handlers/undeploy-entity-handler' import { bearerTokenMiddleware, errorHandler } from '@dcl/platform-server-commons' import { reprocessABHandler } from './handlers/reprocess-ab-handler' +import { garbageCollectionHandler } from './handlers/garbage-collection' export async function setupRouter(globalContext: GlobalContext): Promise> { const router = new Router() @@ -76,6 +77,7 @@ export async function setupRouter(globalContext: GlobalContext): Promise { + const { localFetch, storage, worldCreator } = components + + const worldName = worldCreator.randomWorldName() + + // deploy an initial version of the scene + const files = new Map() + files.set('abc.png', stringToUtf8Bytes(makeid(150))) + files.set('abc.txt', stringToUtf8Bytes(makeid(50))) + + const { entityId, entity } = await worldCreator.createWorldWithScene({ + worldName, + metadata: { + main: 'abc.txt', + scene: { + base: '20,24', + parcels: ['20,24'] + }, + worldConfiguration: { + name: worldName + } + }, + files + }) + + expect(await storage.exist(entityId)).toBeTruthy() + expect(await storage.exist(`${entityId}.auth`)).toBeTruthy() + expect(await storage.exist(entity.content[0].hash)).toBeTruthy() + expect(await storage.exist(entity.content[1].hash)).toBeTruthy() + + // deploy a new version of the scene + const newFiles = new Map() + newFiles.set('abc.png', stringToUtf8Bytes(makeid(150))) + newFiles.set('abc.txt', stringToUtf8Bytes(makeid(50))) + + const { entityId: entityId2, entity: entity2 } = await worldCreator.createWorldWithScene({ + worldName, + metadata: { + main: 'abc.txt', + scene: { + base: '20,24', + parcels: ['20,24'] + }, + worldConfiguration: { + name: worldName + } + }, + files: newFiles + }) + + expect(await storage.exist(entityId2)).toBeTruthy() + expect(await storage.exist(`${entityId2}.auth`)).toBeTruthy() + expect(await storage.exist(entity2.content[0].hash)).toBeTruthy() + expect(await storage.exist(entity2.content[1].hash)).toBeTruthy() + + // run garbage collection + const response = await localFetch.fetch('/gc', { + method: 'POST', + headers: { + Authorization: 'Bearer setup_some_secret_here' + } + }) + + // Check old files have been removed + expect(response.status).toEqual(200) + expect(await response.json()).toMatchObject({ message: 'Garbage collection removed 4 unused keys.' }) + expect(await storage.exist(entityId)).toBeFalsy() + expect(await storage.exist(`${entityId}.auth`)).toBeFalsy() + expect(await storage.exist(entity.content[0].hash)).toBeFalsy() + expect(await storage.exist(entity.content[1].hash)).toBeFalsy() + }) +}) diff --git a/test/mocks/world-creator.ts b/test/mocks/world-creator.ts index c0655965..e52abff9 100644 --- a/test/mocks/world-creator.ts +++ b/test/mocks/world-creator.ts @@ -5,6 +5,8 @@ import { TextDecoder } from 'util' import { getIdentity, makeid, storeJson } from '../utils' import { Authenticator, AuthIdentity } from '@dcl/crypto' import { defaultPermissions } from '../../src/logic/permissions-checker' +import { hashV1 } from '@dcl/hashing' +import { bufferToStream } from '@dcl/catalyst-storage' export function createWorldCreator({ storage, @@ -44,6 +46,10 @@ export function createWorldCreator({ const authChain = Authenticator.signPayload(signer, entityId) await storeJson(storage, entityId + '.auth', authChain) + for (const [filename, file] of files) { + await storage.storeStream(filename, bufferToStream(file)) + } + const entity = { id: entityId, ...entityWithoutId } await worldsManager.deployScene(worldName, entity, signer.authChain[0].payload)