Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

chore: script to garbage collect unused files in the storage #271

Merged
merged 7 commits into from
Feb 27, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/adapters/aws-config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ export async function createAwsConfig({ config }: Pick<AppComponents, 'config'>)
if (awsEndpoint) {
awsConfig.endpoint = awsEndpoint
awsConfig.forcePathStyle = true
awsConfig.s3ForcePathStyle = true
}

return awsConfig
Expand Down
11 changes: 7 additions & 4 deletions src/components.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,9 @@ import { AppComponents, GlobalContext, ICommsAdapter, INameDenyListChecker, IWor
import { metricDeclarations } from './metrics'
import { HTTPProvider } from 'eth-connect'
import {
createAwsS3BasedFileSystemContentStorage,
createFolderBasedFileSystemContentStorage,
createFsComponent
createFsComponent,
createS3BasedFileSystemContentStorage
} from '@dcl/catalyst-storage'
import { createStatusComponent } from './adapters/status'
import { createLimitsManagerComponent } from './adapters/limits-manager'
Expand All @@ -30,10 +30,12 @@ import { createWalletStatsComponent } from './adapters/wallet-stats'
import { createUpdateOwnerJob } from './adapters/update-owner-job'
import { createSnsClient } from './adapters/sns-client'
import { createAwsConfig } from './adapters/aws-config'
import { S3 } from 'aws-sdk'

// Initialize all the components of the app
export async function initComponents(): Promise<AppComponents> {
const config = await createDotEnvConfigComponent({ path: ['.env.default', '.env'] })
const awsConfig = await createAwsConfig({ config })
const logs = await createLogComponent({ config })

const logger = logs.getLogger('components')
Expand Down Expand Up @@ -65,14 +67,15 @@ export async function initComponents(): Promise<AppComponents> {
const fs = createFsComponent()

const storage = bucket
? await createAwsS3BasedFileSystemContentStorage({ config, logs }, bucket)
? await createS3BasedFileSystemContentStorage({ logs }, new S3(awsConfig), {
Bucket: bucket
})
: await createFolderBasedFileSystemContentStorage({ fs, logs }, storageFolder)

const subGraphUrl = await config.requireString('MARKETPLACE_SUBGRAPH_URL')
const marketplaceSubGraph = await createSubgraphComponent({ config, logs, metrics, fetch }, subGraphUrl)

const status = await createStatusComponent({ logs, fetch, config })
const awsConfig = await createAwsConfig({ config })
const snsClient = await createSnsClient({ awsConfig })

const nameDenyListChecker: INameDenyListChecker = await createNameDenyListChecker({
Expand Down
79 changes: 79 additions & 0 deletions src/controllers/handlers/garbage-collection.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
import { HandlerContextWithPath, WorldRecord } from '../../types'
import SQL from 'sql-template-strings'
import { IHttpServerComponent } from '@well-known-components/interfaces'

function formatSecs(millis: number): string {
return `${(millis / 1000).toFixed(2)} secs`
}

export async function garbageCollectionHandler(
context: HandlerContextWithPath<'database' | 'logs' | 'storage', '/gc'>
): Promise<IHttpServerComponent.IResponse> {
const { database, logs, storage } = context.components
const logger = logs.getLogger('garbage-collection')

async function getAllActiveKeys() {
const start = Date.now()
logger.info('Getting all keys active in the database...')

const activeKeys = new Set<string>()
const result = await database.query<WorldRecord>(
SQL`SELECT *
FROM worlds
WHERE worlds.entity IS NOT NULL`
)
result.rows.forEach((row) => {
// Add entity file and deployment auth-chain
activeKeys.add(row.entity_id)
activeKeys.add(`${row.entity_id}.auth`)

// Add all referenced content files
for (const file of row.entity.content) {
activeKeys.add(file.hash)
}
})

logger.info(`Done in ${formatSecs(Date.now() - start)}. Database contains ${activeKeys.size} active keys.`)

return activeKeys
}

logger.info('Starting garbage collection...')

const activeKeys = await getAllActiveKeys()

logger.info('Getting keys from storage that are not currently active...')
const start = Date.now()
let totalRemovedKeys = 0
const batch = new Set<string>()
for await (const key of storage.allFileIds()) {
if (!activeKeys.has(key)) {
batch.add(key)
}

if (batch.size === 1000) {
logger.info(`Deleting a batch of ${batch.size} keys from storage...`)
await storage.delete([...batch])
totalRemovedKeys += batch.size
batch.clear()
}
}

if (batch.size > 0) {
logger.info(`Deleting a batch of ${batch.size} keys from storage...`)
await storage.delete([...batch])
totalRemovedKeys += batch.size
}
logger.info(
`Done in ${formatSecs(Date.now() - start)}. Deleted ${totalRemovedKeys} keys that are not active in the storage.`
)

logger.info('Garbage collection finished.')

return {
status: 200,
body: {
message: `Garbage collection removed ${totalRemovedKeys} unused keys.`
}
}
}
2 changes: 2 additions & 0 deletions src/controllers/routes.ts
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ import { walletStatsHandler } from './handlers/wallet-stats-handler'
import { undeployEntity } from './handlers/undeploy-entity-handler'
import { bearerTokenMiddleware, errorHandler } from '@dcl/platform-server-commons'
import { reprocessABHandler } from './handlers/reprocess-ab-handler'
import { garbageCollectionHandler } from './handlers/garbage-collection'

export async function setupRouter(globalContext: GlobalContext): Promise<Router<GlobalContext>> {
const router = new Router<GlobalContext>()
Expand Down Expand Up @@ -76,6 +77,7 @@ export async function setupRouter(globalContext: GlobalContext): Promise<Router<
const secret = await globalContext.components.config.requireString('AUTH_SECRET')
if (secret) {
router.post('/reprocess-ab', bearerTokenMiddleware(secret), reprocessABHandler)
router.post('/gc', bearerTokenMiddleware(secret), garbageCollectionHandler)
}
return router
}
3 changes: 2 additions & 1 deletion src/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -237,7 +237,8 @@ export type AwsConfig = {
region: string
credentials?: { accessKeyId: string; secretAccessKey: string }
endpoint?: string
forcePathStyle?: boolean
forcePathStyle?: boolean // for SDK v3
s3ForcePathStyle?: boolean // for SDK v2
}

export type SnsClient = {
Expand Down
77 changes: 77 additions & 0 deletions test/integration/garbage-collection.spec.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
import { test } from '../components'
import { stringToUtf8Bytes } from 'eth-connect'
import { makeid } from '../utils'

test('garbage collection works', function ({ components }) {
it('cleans up all unused files', async () => {
const { localFetch, storage, worldCreator } = components

const worldName = worldCreator.randomWorldName()

// deploy an initial version of the scene
const files = new Map<string, Uint8Array>()
files.set('abc.png', stringToUtf8Bytes(makeid(150)))
files.set('abc.txt', stringToUtf8Bytes(makeid(50)))

const { entityId, entity } = await worldCreator.createWorldWithScene({
worldName,
metadata: {
main: 'abc.txt',
scene: {
base: '20,24',
parcels: ['20,24']
},
worldConfiguration: {
name: worldName
}
},
files
})

expect(await storage.exist(entityId)).toBeTruthy()
expect(await storage.exist(`${entityId}.auth`)).toBeTruthy()
expect(await storage.exist(entity.content[0].hash)).toBeTruthy()
expect(await storage.exist(entity.content[1].hash)).toBeTruthy()

// deploy a new version of the scene
const newFiles = new Map<string, Uint8Array>()
newFiles.set('abc.png', stringToUtf8Bytes(makeid(150)))
newFiles.set('abc.txt', stringToUtf8Bytes(makeid(50)))

const { entityId: entityId2, entity: entity2 } = await worldCreator.createWorldWithScene({
worldName,
metadata: {
main: 'abc.txt',
scene: {
base: '20,24',
parcels: ['20,24']
},
worldConfiguration: {
name: worldName
}
},
files: newFiles
})

expect(await storage.exist(entityId2)).toBeTruthy()
expect(await storage.exist(`${entityId2}.auth`)).toBeTruthy()
expect(await storage.exist(entity2.content[0].hash)).toBeTruthy()
expect(await storage.exist(entity2.content[1].hash)).toBeTruthy()

// run garbage collection
const response = await localFetch.fetch('/gc', {
method: 'POST',
headers: {
Authorization: 'Bearer setup_some_secret_here'
}
})

// Check old files have been removed
expect(response.status).toEqual(200)
expect(await response.json()).toMatchObject({ message: 'Garbage collection removed 4 unused keys.' })
expect(await storage.exist(entityId)).toBeFalsy()
expect(await storage.exist(`${entityId}.auth`)).toBeFalsy()
expect(await storage.exist(entity.content[0].hash)).toBeFalsy()
expect(await storage.exist(entity.content[1].hash)).toBeFalsy()
})
})
6 changes: 6 additions & 0 deletions test/mocks/world-creator.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
import { getIdentity, makeid, storeJson } from '../utils'
import { Authenticator, AuthIdentity } from '@dcl/crypto'
import { defaultPermissions } from '../../src/logic/permissions-checker'
import { hashV1 } from '@dcl/hashing'

Check warning on line 8 in test/mocks/world-creator.ts

View workflow job for this annotation

GitHub Actions / build / validations

'hashV1' is defined but never used. Allowed unused vars must match /^_/u

Check warning on line 8 in test/mocks/world-creator.ts

View workflow job for this annotation

GitHub Actions / build / validations

'hashV1' is defined but never used. Allowed unused vars must match /^_/u
import { bufferToStream } from '@dcl/catalyst-storage'

export function createWorldCreator({
storage,
Expand Down Expand Up @@ -44,6 +46,10 @@
const authChain = Authenticator.signPayload(signer, entityId)
await storeJson(storage, entityId + '.auth', authChain)

for (const [filename, file] of files) {
await storage.storeStream(filename, bufferToStream(file))
}

const entity = { id: entityId, ...entityWithoutId }

await worldsManager.deployScene(worldName, entity, signer.authChain[0].payload)
Expand Down
Loading