Skip to content

Commit

Permalink
chore: script to garbage collect unused files in the storage (#271)
Browse files Browse the repository at this point in the history
  • Loading branch information
marianogoldman authored Feb 27, 2024
1 parent 5083c36 commit 6987580
Show file tree
Hide file tree
Showing 7 changed files with 174 additions and 5 deletions.
1 change: 1 addition & 0 deletions src/adapters/aws-config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ export async function createAwsConfig({ config }: Pick<AppComponents, 'config'>)
if (awsEndpoint) {
awsConfig.endpoint = awsEndpoint
awsConfig.forcePathStyle = true
awsConfig.s3ForcePathStyle = true
}

return awsConfig
Expand Down
11 changes: 7 additions & 4 deletions src/components.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,9 @@ import { AppComponents, GlobalContext, ICommsAdapter, INameDenyListChecker, IWor
import { metricDeclarations } from './metrics'
import { HTTPProvider } from 'eth-connect'
import {
createAwsS3BasedFileSystemContentStorage,
createFolderBasedFileSystemContentStorage,
createFsComponent
createFsComponent,
createS3BasedFileSystemContentStorage
} from '@dcl/catalyst-storage'
import { createStatusComponent } from './adapters/status'
import { createLimitsManagerComponent } from './adapters/limits-manager'
Expand All @@ -30,10 +30,12 @@ import { createWalletStatsComponent } from './adapters/wallet-stats'
import { createUpdateOwnerJob } from './adapters/update-owner-job'
import { createSnsClient } from './adapters/sns-client'
import { createAwsConfig } from './adapters/aws-config'
import { S3 } from 'aws-sdk'

// Initialize all the components of the app
export async function initComponents(): Promise<AppComponents> {
const config = await createDotEnvConfigComponent({ path: ['.env.default', '.env'] })
const awsConfig = await createAwsConfig({ config })
const logs = await createLogComponent({ config })

const logger = logs.getLogger('components')
Expand Down Expand Up @@ -65,14 +67,15 @@ export async function initComponents(): Promise<AppComponents> {
const fs = createFsComponent()

const storage = bucket
? await createAwsS3BasedFileSystemContentStorage({ config, logs }, bucket)
? await createS3BasedFileSystemContentStorage({ logs }, new S3(awsConfig), {
Bucket: bucket
})
: await createFolderBasedFileSystemContentStorage({ fs, logs }, storageFolder)

const subGraphUrl = await config.requireString('MARKETPLACE_SUBGRAPH_URL')
const marketplaceSubGraph = await createSubgraphComponent({ config, logs, metrics, fetch }, subGraphUrl)

const status = await createStatusComponent({ logs, fetch, config })
const awsConfig = await createAwsConfig({ config })
const snsClient = await createSnsClient({ awsConfig })

const nameDenyListChecker: INameDenyListChecker = await createNameDenyListChecker({
Expand Down
79 changes: 79 additions & 0 deletions src/controllers/handlers/garbage-collection.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
import { HandlerContextWithPath, WorldRecord } from '../../types'
import SQL from 'sql-template-strings'
import { IHttpServerComponent } from '@well-known-components/interfaces'

function formatSecs(millis: number): string {
return `${(millis / 1000).toFixed(2)} secs`
}

export async function garbageCollectionHandler(
context: HandlerContextWithPath<'database' | 'logs' | 'storage', '/gc'>
): Promise<IHttpServerComponent.IResponse> {
const { database, logs, storage } = context.components
const logger = logs.getLogger('garbage-collection')

async function getAllActiveKeys() {
const start = Date.now()
logger.info('Getting all keys active in the database...')

const activeKeys = new Set<string>()
const result = await database.query<WorldRecord>(
SQL`SELECT *
FROM worlds
WHERE worlds.entity IS NOT NULL`
)
result.rows.forEach((row) => {
// Add entity file and deployment auth-chain
activeKeys.add(row.entity_id)
activeKeys.add(`${row.entity_id}.auth`)

// Add all referenced content files
for (const file of row.entity.content) {
activeKeys.add(file.hash)
}
})

logger.info(`Done in ${formatSecs(Date.now() - start)}. Database contains ${activeKeys.size} active keys.`)

return activeKeys
}

logger.info('Starting garbage collection...')

const activeKeys = await getAllActiveKeys()

logger.info('Getting keys from storage that are not currently active...')
const start = Date.now()
let totalRemovedKeys = 0
const batch = new Set<string>()
for await (const key of storage.allFileIds()) {
if (!activeKeys.has(key)) {
batch.add(key)
}

if (batch.size === 1000) {
logger.info(`Deleting a batch of ${batch.size} keys from storage...`)
await storage.delete([...batch])
totalRemovedKeys += batch.size
batch.clear()
}
}

if (batch.size > 0) {
logger.info(`Deleting a batch of ${batch.size} keys from storage...`)
await storage.delete([...batch])
totalRemovedKeys += batch.size
}
logger.info(
`Done in ${formatSecs(Date.now() - start)}. Deleted ${totalRemovedKeys} keys that are not active in the storage.`
)

logger.info('Garbage collection finished.')

return {
status: 200,
body: {
message: `Garbage collection removed ${totalRemovedKeys} unused keys.`
}
}
}
2 changes: 2 additions & 0 deletions src/controllers/routes.ts
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ import { walletStatsHandler } from './handlers/wallet-stats-handler'
import { undeployEntity } from './handlers/undeploy-entity-handler'
import { bearerTokenMiddleware, errorHandler } from '@dcl/platform-server-commons'
import { reprocessABHandler } from './handlers/reprocess-ab-handler'
import { garbageCollectionHandler } from './handlers/garbage-collection'

export async function setupRouter(globalContext: GlobalContext): Promise<Router<GlobalContext>> {
const router = new Router<GlobalContext>()
Expand Down Expand Up @@ -76,6 +77,7 @@ export async function setupRouter(globalContext: GlobalContext): Promise<Router<
const secret = await globalContext.components.config.requireString('AUTH_SECRET')
if (secret) {
router.post('/reprocess-ab', bearerTokenMiddleware(secret), reprocessABHandler)
router.post('/gc', bearerTokenMiddleware(secret), garbageCollectionHandler)
}
return router
}
3 changes: 2 additions & 1 deletion src/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -237,7 +237,8 @@ export type AwsConfig = {
region: string
credentials?: { accessKeyId: string; secretAccessKey: string }
endpoint?: string
forcePathStyle?: boolean
forcePathStyle?: boolean // for SDK v3
s3ForcePathStyle?: boolean // for SDK v2
}

export type SnsClient = {
Expand Down
77 changes: 77 additions & 0 deletions test/integration/garbage-collection.spec.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
import { test } from '../components'
import { stringToUtf8Bytes } from 'eth-connect'
import { makeid } from '../utils'

test('garbage collection works', function ({ components }) {
it('cleans up all unused files', async () => {
const { localFetch, storage, worldCreator } = components

const worldName = worldCreator.randomWorldName()

// deploy an initial version of the scene
const files = new Map<string, Uint8Array>()
files.set('abc.png', stringToUtf8Bytes(makeid(150)))
files.set('abc.txt', stringToUtf8Bytes(makeid(50)))

const { entityId, entity } = await worldCreator.createWorldWithScene({
worldName,
metadata: {
main: 'abc.txt',
scene: {
base: '20,24',
parcels: ['20,24']
},
worldConfiguration: {
name: worldName
}
},
files
})

expect(await storage.exist(entityId)).toBeTruthy()
expect(await storage.exist(`${entityId}.auth`)).toBeTruthy()
expect(await storage.exist(entity.content[0].hash)).toBeTruthy()
expect(await storage.exist(entity.content[1].hash)).toBeTruthy()

// deploy a new version of the scene
const newFiles = new Map<string, Uint8Array>()
newFiles.set('abc.png', stringToUtf8Bytes(makeid(150)))
newFiles.set('abc.txt', stringToUtf8Bytes(makeid(50)))

const { entityId: entityId2, entity: entity2 } = await worldCreator.createWorldWithScene({
worldName,
metadata: {
main: 'abc.txt',
scene: {
base: '20,24',
parcels: ['20,24']
},
worldConfiguration: {
name: worldName
}
},
files: newFiles
})

expect(await storage.exist(entityId2)).toBeTruthy()
expect(await storage.exist(`${entityId2}.auth`)).toBeTruthy()
expect(await storage.exist(entity2.content[0].hash)).toBeTruthy()
expect(await storage.exist(entity2.content[1].hash)).toBeTruthy()

// run garbage collection
const response = await localFetch.fetch('/gc', {
method: 'POST',
headers: {
Authorization: 'Bearer setup_some_secret_here'
}
})

// Check old files have been removed
expect(response.status).toEqual(200)
expect(await response.json()).toMatchObject({ message: 'Garbage collection removed 4 unused keys.' })
expect(await storage.exist(entityId)).toBeFalsy()
expect(await storage.exist(`${entityId}.auth`)).toBeFalsy()
expect(await storage.exist(entity.content[0].hash)).toBeFalsy()
expect(await storage.exist(entity.content[1].hash)).toBeFalsy()
})
})
6 changes: 6 additions & 0 deletions test/mocks/world-creator.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@ import { TextDecoder } from 'util'
import { getIdentity, makeid, storeJson } from '../utils'
import { Authenticator, AuthIdentity } from '@dcl/crypto'
import { defaultPermissions } from '../../src/logic/permissions-checker'
import { hashV1 } from '@dcl/hashing'

Check warning on line 8 in test/mocks/world-creator.ts

View workflow job for this annotation

GitHub Actions / build / validations

'hashV1' is defined but never used. Allowed unused vars must match /^_/u

Check warning on line 8 in test/mocks/world-creator.ts

View workflow job for this annotation

GitHub Actions / build / validations

'hashV1' is defined but never used. Allowed unused vars must match /^_/u
import { bufferToStream } from '@dcl/catalyst-storage'

export function createWorldCreator({
storage,
Expand Down Expand Up @@ -44,6 +46,10 @@ export function createWorldCreator({
const authChain = Authenticator.signPayload(signer, entityId)
await storeJson(storage, entityId + '.auth', authChain)

for (const [filename, file] of files) {
await storage.storeStream(filename, bufferToStream(file))
}

const entity = { id: entityId, ...entityWithoutId }

await worldsManager.deployScene(worldName, entity, signer.authChain[0].payload)
Expand Down

0 comments on commit 6987580

Please sign in to comment.