Skip to content

Commit

Permalink
Features #710: Purge entities
Browse files Browse the repository at this point in the history
  • Loading branch information
sadiqkhoja committed Dec 30, 2024
1 parent 90f985c commit 0713461
Show file tree
Hide file tree
Showing 10 changed files with 725 additions and 15 deletions.
4 changes: 3 additions & 1 deletion lib/bin/purge.js
Original file line number Diff line number Diff line change
Expand Up @@ -21,11 +21,13 @@ const { purgeTask } = require('../task/purge');

const { program } = require('commander');
program.option('-f, --force', 'Force any soft-deleted form to be purged right away.');
program.option('-m, --mode <value>', 'Mode of purging. Can be "forms", "submissions", or "all". Default is "all".', 'all');
program.option('-m, --mode <value>', 'Mode of purging. Can be "forms", "submissions", "entities" or "all". Default is "all".', 'all');
program.option('-i, --formId <integer>', 'Purge a specific form based on its id.', parseInt);
program.option('-p, --projectId <integer>', 'Restrict purging to a specific project.', parseInt);
program.option('-x, --xmlFormId <value>', 'Restrict purging to specific form based on xmlFormId (must be used with projectId).');
program.option('-s, --instanceId <value>', 'Restrict purging to a specific submission based on instanceId (use with projectId and xmlFormId).');
program.option('-d, --datasetName <value>', 'Restrict purging to specific dataset/entity-list based on its name (must be used with projectId).');
program.option('-e, --entityUuid <value>', 'Restrict purging to a specific entitiy based on its UUID (use with projectId and datasetName).');

program.parse();

Expand Down
20 changes: 20 additions & 0 deletions lib/model/migrations/20241224-02-cascade-entity-purge.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
// Copyright 2024 ODK Central Developers
// See the NOTICE file at the top-level directory of this distribution and at
// https://github.com/getodk/central-backend/blob/master/NOTICE.
// This file is part of ODK Central. It is subject to the license terms in
// the LICENSE file found in the top-level directory of this distribution and at
// https://www.apache.org/licenses/LICENSE-2.0. No part of ODK Central,
// including this file, may be copied, modified, propagated, or distributed
// except according to the terms contained in the LICENSE file.

const up = (db) => db.raw(`
ALTER TABLE public.entity_defs DROP CONSTRAINT entity_defs_entityid_foreign;
ALTER TABLE public.entity_defs ADD CONSTRAINT entity_defs_entityid_foreign FOREIGN KEY ("entityId") REFERENCES public.entities(id) ON DELETE CASCADE;
`);

const down = ((db) => db.raw(`
ALTER TABLE public.entity_defs DROP CONSTRAINT entity_defs_entityid_foreign;
ALTER TABLE public.entity_defs ADD CONSTRAINT entity_defs_entityid_foreign FOREIGN KEY ("entityId") REFERENCES public.entities(id);
`));

module.exports = { up, down };
24 changes: 24 additions & 0 deletions lib/model/migrations/20241226-01-indices-for-purging-entities.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
// Copyright 2024 ODK Central Developers
// See the NOTICE file at the top-level directory of this distribution and at
// https://github.com/getodk/central-backend/blob/master/NOTICE.
// This file is part of ODK Central. It is subject to the license terms in
// the LICENSE file found in the top-level directory of this distribution and at
// https://www.apache.org/licenses/LICENSE-2.0. No part of ODK Central,
// including this file, may be copied, modified, propagated, or distributed
// except according to the terms contained in the LICENSE file.

const up = (db) => db.raw(`
CREATE INDEX audits_details_entity_uuid ON public.audits USING hash ((details->'entity'->>'uuid'))
WHERE ACTION IN ('entity.create', 'entity.update', 'entity.update.version', 'entity.update.resolve', 'entity.delete', 'entity.restore');
CREATE INDEX audits_details_entityUuids ON audits USING gin ((details -> 'entityUuids') jsonb_path_ops)
WHERE ACTION = 'entities.purge';
`);

const down = ((db) => db.raw(`
DROP INDEX audits_details_entity_uuid;
DROP INDEX audits_details_entityUuids;
`));

module.exports = { up, down };

17 changes: 17 additions & 0 deletions lib/model/migrations/20241227-01-backfill-audit-entity-uuid.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
// Copyright 2024 ODK Central Developers
// See the NOTICE file at the top-level directory of this distribution and at
// https://github.com/getodk/central-backend/blob/master/NOTICE.
// This file is part of ODK Central. It is subject to the license terms in
// the LICENSE file found in the top-level directory of this distribution and at
// https://www.apache.org/licenses/LICENSE-2.0. No part of ODK Central,
// including this file, may be copied, modified, propagated, or distributed
// except according to the terms contained in the LICENSE file.

const up = (db) => db.raw(`
UPDATE audits SET details = ('{ "entity":{ "uuid":"' || (details->>'uuid') || '"}}')::JSONB
WHERE action = 'entity.delete' AND details \\? 'uuid';
`);

const down = () => {};

module.exports = { up, down };
111 changes: 107 additions & 4 deletions lib/model/query/entities.js
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,18 @@ const { isTrue } = require('../../util/http');
const Problem = require('../../util/problem');
const { getOrReject, runSequentially } = require('../../util/promise');

/////////////////////////////////////////////////////////////////////////////////
// Check if provided UUIDs (array) were used by purged entities.

const _getPurgedUuids = (all, uuids) => all(sql`
SELECT jsonb_array_elements_text(details -> 'entityUuids') AS uuid FROM audits a WHERE action = 'entities.purge'
INTERSECT
SELECT jsonb_array_elements_text(${JSON.stringify(uuids)})`)
.then(all.map(r => r.uuid));

const _isPurgedUuid = (maybeOneFirst, uuid) => maybeOneFirst(sql`
SELECT TRUE FROM audits WHERE action = 'entities.purge' AND details -> 'entityUuids' @> ${JSON.stringify([uuid])}
`);

/////////////////////////////////////////////////////////////////////////////////
// ENTITY DEF SOURCES
Expand Down Expand Up @@ -65,10 +77,16 @@ const nextval = sql`nextval(pg_get_serial_sequence('entities', 'id'))`;
// standard authenticated API request. The worker has better access to the event
// actor/initiator and actee/target so it will do the logging itself (including
// error logging).
const createNew = (dataset, partial, subDef, sourceId, userAgentIn) => ({ one, context }) => {
const createNew = (dataset, partial, subDef, sourceId, userAgentIn) => async ({ one, context, maybeOneFirst }) => {
let creatorId;
let userAgent;

// Validate UUID against purged entities
(await _isPurgedUuid(maybeOneFirst, partial.uuid))
.ifDefined(() => {
throw Problem.user.uniquenessViolation({ fields: 'uuid', values: partial.uuid });
});

// Set creatorId and userAgent from submission def if it exists
if (subDef != null) {
({ submitterId: creatorId, userAgent } = subDef);
Expand Down Expand Up @@ -111,6 +129,12 @@ const createMany = (dataset, rawEntities, sourceId, userAgentIn) => async ({ all
const creatorId = context.auth.actor.map((actor) => actor.id).orNull();
const userAgent = blankStringToNull(userAgentIn);

// Validate UUID with the purged entities
const purgedUuids = await _getPurgedUuids(all, rawEntities.map(e => e.uuid));
if (purgedUuids.length > 0) {
throw Problem.user.uniquenessViolation({ fields: 'uuid', values: purgedUuids.join(', ') });
}

// Augment parsed entity data with dataset and creator IDs
const entitiesForInsert = rawEntities.map(e => new Entity({ datasetId: dataset.id, creatorId, ...e }));
const entities = await all(sql`${insertMany(entitiesForInsert)} RETURNING id`);
Expand Down Expand Up @@ -849,15 +873,94 @@ CROSS JOIN
const del = (entity) => ({ run }) =>
run(markDeleted(entity));

del.audit = (entity, dataset) => (log) => log('entity.delete', entity.with({ acteeId: dataset.acteeId }), { uuid: entity.uuid });
del.audit = (entity, dataset) => (log) =>
log(
'entity.delete',
entity.with({ acteeId: dataset.acteeId }),
{ entity: { uuid: entity.uuid } }
);

////////////////////////////////////////////////////////////////////////////////
// RESTORE ENTITY

const restore = (entity) => ({ run }) =>
run(markUndeleted(entity));

restore.audit = (entity, dataset) => (log) => log('entity.restore', entity.with({ acteeId: dataset.acteeId }), { uuid: entity.uuid });
restore.audit = (entity, dataset) => (log) =>
log(
'entity.restore',
entity.with({ acteeId: dataset.acteeId }),
{ entity: { uuid: entity.uuid } }
);

////////////////////////////////////////////////////////////////////////////////
// PURGE DELETED ENTITIES

const PURGE_DAY_RANGE = config.has('default.taskSchedule.purge')
? config.get('default.taskSchedule.purge')
: 30; // Default is 30 days

const _trashedFilter = (force, projectId, datasetName, entityUuid) => {
const idFilters = [sql`TRUE`];
if (projectId) idFilters.push(sql`datasets."projectId" = ${projectId}`);
if (datasetName) idFilters.push(sql`datasets."name" = ${datasetName}`);
if (entityUuid) idFilters.push(sql`entities."uuid" = ${entityUuid}`);

const idFilter = sql.join(idFilters, sql` AND `);

return (force
? sql`entities."deletedAt" IS NOT NULL AND ${idFilter}`
: sql`entities."deletedAt" < CURRENT_DATE - CAST(${PURGE_DAY_RANGE} AS INT) AND ${idFilter}`);
};

const purge = (force = false, projectId = null, datasetName = null, entityUuid = null) => ({ oneFirst }) => {
if (entityUuid && (!projectId || !datasetName)) {
throw Problem.internal.unknown({ error: 'Must specify projectId and datasetName to purge a specify entity.' });
}
if (datasetName && !projectId) {
throw Problem.internal.unknown({ error: 'Must specify projectId to purge all entities of a dataset/entity-list.' });
}

// Reminder: 'notes' in audit table is set by 'x-action-notes' header of all
// APIs with side-effects. Although we don't provide any way to set it from
// the frontend (as of 2024.3), it is a good idea to clear it for purged
// entities.
return oneFirst(sql`
WITH redacted_audits AS (
UPDATE audits SET notes = NULL
FROM entities
JOIN datasets ON entities."datasetId" = datasets.id
WHERE (audits.details->'entity'->>'uuid') = entities.uuid
AND ${_trashedFilter(force, projectId, datasetName, entityUuid)}
), purge_audit AS (
INSERT INTO audits ("action", "loggedAt", "processed", "details")
SELECT 'entities.purge', clock_timestamp(), clock_timestamp(), jsonb_build_object('entitiesDeleted', COUNT(*), 'entityUuids', ARRAY_AGG(uuid))
FROM entities
JOIN datasets ON entities."datasetId" = datasets.id
WHERE ${_trashedFilter(force, projectId, datasetName, entityUuid)}
HAVING count(*) > 0
), delete_entity_sources AS (
DELETE FROM entity_def_sources
USING entity_defs, entities, datasets
WHERE entity_def_sources.id = entity_defs."sourceId"
AND entity_defs."entityId" = entities.id
AND entities."datasetId" = datasets.id
AND ${_trashedFilter(force, projectId, datasetName, entityUuid)}
), delete_submission_backlog AS (
DELETE FROM entity_submission_backlog
USING entities, datasets
WHERE entity_submission_backlog."entityUuid"::varchar = entities.uuid
AND entities."datasetId" = datasets.id
AND ${_trashedFilter(force, projectId, datasetName, entityUuid)}
), deleted_entities AS (
DELETE FROM entities
USING datasets
WHERE entities."datasetId" = datasets.id
AND ${_trashedFilter(force, projectId, datasetName, entityUuid)}
RETURNING 1
)
SELECT COUNT(*) FROM deleted_entities`);
};

module.exports = {
createNew, _processSubmissionEvent,
Expand All @@ -875,5 +978,5 @@ module.exports = {
countByDatasetId, getById, getDef,
getAll, getAllDefs, del,
createEntitiesFromPendingSubmissions,
resolveConflict, restore
resolveConflict, restore, purge
};
11 changes: 8 additions & 3 deletions lib/task/purge.js
Original file line number Diff line number Diff line change
Expand Up @@ -12,23 +12,28 @@ const { task } = require('./task');
const purgeTask = task.withContainer((container) => async (options = {}) => {
// Form/submission purging will happen within its own transaction
const message = await container.db.transaction(async trxn => {
const { Forms, Submissions } = container.with({ db: trxn });
const { Forms, Submissions, Entities } = container.with({ db: trxn });
try {
if (options.mode === 'entities' || options.datasetName || options.entityUuid) {
const count = await Entities.purge(options.force, options.projectId, options.datasetName, options.entityUuid);
return `Entities purged: ${count}`;
}
if (options.mode === 'submissions' || options.instanceId) {
const count = await Submissions.purge(options.force, options.projectId, options.xmlFormId, options.instanceId);
return `Submissions purged: ${count}`;
} else if (options.mode === 'forms' || (options.formId || options.xmlFormId)) {
const count = await Forms.purge(options.force, options.formId, options.projectId, options.xmlFormId);
return `Forms purged: ${count}`;
} else {
// Purge both Forms and Submissions according to options
// Purge all Forms, Submissions and Entities according to options
const formCount = await Forms.purge(options.force, options.formId, options.projectId, options.xmlFormId);
const submissionCount = await Submissions.purge(options.force, options.projectId, options.xmlFormId, options.instanceId);
const entitiesCount = await Entities.purge(options.force, options.projectId, options.datasetName, options.entityUuid);

// Related to form purging: deletes draft form defs that are not in use by any form and have no associated submission defs
await Forms.clearUnneededDrafts();

return `Forms purged: ${formCount}, Submissions purged: ${submissionCount}`;
return `Forms purged: ${formCount}, Submissions purged: ${submissionCount}, Entities purged: ${entitiesCount}`;
}
} catch (error) {
return error?.problemDetails?.error;
Expand Down
4 changes: 2 additions & 2 deletions test/integration/api/entities.js
Original file line number Diff line number Diff line change
Expand Up @@ -2105,7 +2105,7 @@ describe('Entities API', () => {
.then(o => o.get())
.then(audit => {
audit.acteeId.should.not.be.null();
audit.details.uuid.should.be.eql('12345678-1234-4123-8234-123456789abc');
audit.details.entity.uuid.should.be.eql('12345678-1234-4123-8234-123456789abc');
});

await asAlice.get('/v1/projects/1/datasets/people/entities/12345678-1234-4123-8234-123456789abc')
Expand Down Expand Up @@ -2169,7 +2169,7 @@ describe('Entities API', () => {
.then(o => o.get())
.then(audit => {
audit.acteeId.should.not.be.null();
audit.details.uuid.should.be.eql('12345678-1234-4123-8234-123456789abc');
audit.details.entity.uuid.should.be.eql('12345678-1234-4123-8234-123456789abc');
});

await asAlice.get('/v1/projects/1/datasets/people/entities/12345678-1234-4123-8234-123456789abc')
Expand Down
Loading

0 comments on commit 0713461

Please sign in to comment.