Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

#710: Restore Entities, Purge Entities, ability to see deleted Entities #1349

Open
wants to merge 6 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion lib/bin/purge.js
Original file line number Diff line number Diff line change
Expand Up @@ -21,11 +21,13 @@ const { purgeTask } = require('../task/purge');

const { program } = require('commander');
program.option('-f, --force', 'Force any soft-deleted form to be purged right away.');
program.option('-m, --mode <value>', 'Mode of purging. Can be "forms", "submissions", or "all". Default is "all".', 'all');
program.option('-m, --mode <value>', 'Mode of purging. Can be "forms", "submissions", "entities" or "all". Default is "all".', 'all');
program.option('-i, --formId <integer>', 'Purge a specific form based on its id.', parseInt);
program.option('-p, --projectId <integer>', 'Restrict purging to a specific project.', parseInt);
program.option('-x, --xmlFormId <value>', 'Restrict purging to specific form based on xmlFormId (must be used with projectId).');
program.option('-s, --instanceId <value>', 'Restrict purging to a specific submission based on instanceId (use with projectId and xmlFormId).');
program.option('-d, --datasetName <value>', 'Restrict purging to specific dataset/entity-list based on its name (must be used with projectId).');
program.option('-e, --entityUuid <value>', 'Restrict purging to a specific entitiy based on its UUID (use with projectId and datasetName).');

program.parse();

Expand Down
2 changes: 2 additions & 0 deletions lib/data/entity.js
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ const { sanitizeOdataIdentifier, blankStringToNull, isBlank } = require('../util
const odataToColumnMap = new Map([
['__system/createdAt', 'entities.createdAt'],
['__system/updatedAt', 'entities.updatedAt'],
['__system/deletedAt', 'entities.deletedAt'],
['__system/creatorId', 'entities.creatorId'],
['__system/conflict', 'entities.conflict']
]);
Expand Down Expand Up @@ -284,6 +285,7 @@ const selectFields = (entity, properties, selectedProperties) => {
creatorName: entity.aux.creator.displayName,
updates: entity.updates,
updatedAt: entity.updatedAt,
deletedAt: entity.deletedAt,
version: entity.def.version,
conflict: entity.conflict
};
Expand Down
1 change: 1 addition & 0 deletions lib/formats/odata.js
Original file line number Diff line number Diff line change
Expand Up @@ -233,6 +233,7 @@ const edmxTemplaterForEntities = template(`<?xml version="1.0" encoding="UTF-8"?
<Property Name="creatorName" Type="Edm.String"/>
<Property Name="updates" Type="Edm.Int64"/>
<Property Name="updatedAt" Type="Edm.DateTimeOffset"/>
<Property Name="deletedAt" Type="Edm.DateTimeOffset"/>
<Property Name="version" Type="Edm.Int64"/>
<Property Name="conflict" Type="Edm.String"/>
</ComplexType>
Expand Down
20 changes: 20 additions & 0 deletions lib/model/migrations/20241224-01-entity-restore-verb.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
// Copyright 2024 ODK Central Developers
// See the NOTICE file at the top-level directory of this distribution and at
// https://github.com/getodk/central-backend/blob/master/NOTICE.
// This file is part of ODK Central. It is subject to the license terms in
// the LICENSE file found in the top-level directory of this distribution and at
// https://www.apache.org/licenses/LICENSE-2.0. No part of ODK Central,
// including this file, may be copied, modified, propagated, or distributed
// except according to the terms contained in the LICENSE file.

const up = (db) => db.raw(`
UPDATE roles
SET verbs = verbs || '["entity.restore"]'::jsonb
WHERE system IN ('admin', 'manager')`);

const down = (db) => db.raw(`
UPDATE roles
SET verbs = verbs - 'entity.restore'
WHERE system IN ('admin', 'manager')`);

module.exports = { up, down };
20 changes: 20 additions & 0 deletions lib/model/migrations/20241224-02-cascade-entity-purge.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
// Copyright 2024 ODK Central Developers
// See the NOTICE file at the top-level directory of this distribution and at
// https://github.com/getodk/central-backend/blob/master/NOTICE.
// This file is part of ODK Central. It is subject to the license terms in
// the LICENSE file found in the top-level directory of this distribution and at
// https://www.apache.org/licenses/LICENSE-2.0. No part of ODK Central,
// including this file, may be copied, modified, propagated, or distributed
// except according to the terms contained in the LICENSE file.

const up = (db) => db.raw(`
ALTER TABLE public.entity_defs DROP CONSTRAINT entity_defs_entityid_foreign;
ALTER TABLE public.entity_defs ADD CONSTRAINT entity_defs_entityid_foreign FOREIGN KEY ("entityId") REFERENCES public.entities(id) ON DELETE CASCADE;
`);

const down = ((db) => db.raw(`
ALTER TABLE public.entity_defs DROP CONSTRAINT entity_defs_entityid_foreign;
ALTER TABLE public.entity_defs ADD CONSTRAINT entity_defs_entityid_foreign FOREIGN KEY ("entityId") REFERENCES public.entities(id);
`));

module.exports = { up, down };
24 changes: 24 additions & 0 deletions lib/model/migrations/20241226-01-indices-for-purging-entities.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
// Copyright 2024 ODK Central Developers
// See the NOTICE file at the top-level directory of this distribution and at
// https://github.com/getodk/central-backend/blob/master/NOTICE.
// This file is part of ODK Central. It is subject to the license terms in
// the LICENSE file found in the top-level directory of this distribution and at
// https://www.apache.org/licenses/LICENSE-2.0. No part of ODK Central,
// including this file, may be copied, modified, propagated, or distributed
// except according to the terms contained in the LICENSE file.

const up = (db) => db.raw(`
CREATE INDEX audits_details_entity_uuid ON public.audits USING hash ((details->'entity'->>'uuid'))
WHERE ACTION IN ('entity.create', 'entity.update', 'entity.update.version', 'entity.update.resolve', 'entity.delete', 'entity.restore');

CREATE INDEX audits_details_entityUuids ON audits USING gin ((details -> 'entityUuids') jsonb_path_ops)
WHERE ACTION = 'entities.purge';
`);

const down = ((db) => db.raw(`
DROP INDEX audits_details_entity_uuid;
DROP INDEX audits_details_entityUuids;
`));

module.exports = { up, down };

17 changes: 17 additions & 0 deletions lib/model/migrations/20241227-01-backfill-audit-entity-uuid.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
// Copyright 2024 ODK Central Developers
// See the NOTICE file at the top-level directory of this distribution and at
// https://github.com/getodk/central-backend/blob/master/NOTICE.
// This file is part of ODK Central. It is subject to the license terms in
// the LICENSE file found in the top-level directory of this distribution and at
// https://www.apache.org/licenses/LICENSE-2.0. No part of ODK Central,
// including this file, may be copied, modified, propagated, or distributed
// except according to the terms contained in the LICENSE file.

const up = (db) => db.raw(`
UPDATE audits SET details = ('{ "entity":{ "uuid":"' || (details->>'uuid') || '"}}')::JSONB
WHERE action = 'entity.delete' AND details \\? 'uuid';
`);

const down = () => {};

module.exports = { up, down };
35 changes: 23 additions & 12 deletions lib/model/query/audits.js
Original file line number Diff line number Diff line change
Expand Up @@ -150,11 +150,19 @@ const getBySubmissionId = (submissionId, options) => ({ all }) => _getBySubmissi
// There is a separate query below to assemble full submission details for non-deleted
// submissions, but it was far too slow to have be part of this query.
const _getByEntityId = (fields, options, entityId) => sql`
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Objective of the change in this file is to get entity.delete and entity.restore logs as well

SELECT ${fields} FROM entity_defs

LEFT JOIN entity_def_sources on entity_def_sources.id = entity_defs."sourceId"
INNER JOIN audits ON ((audits.details->>'entityDefId')::INTEGER = entity_defs.id OR (audits.details->>'sourceId')::INTEGER = entity_def_sources.id)

SELECT ${fields} FROM (
SELECT audits.* FROM audits
JOIN entities ON (audits.details->'entity'->>'uuid') = entities.uuid
WHERE entities.id = ${entityId}
Comment on lines +154 to +156
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The fetches all logs of the entity (create, update, delete, restore)

UNION ALL
SELECT audits.* FROM audits
JOIN entity_def_sources ON (audits.details->>'sourceId')::INTEGER = entity_def_sources.id
JOIN entity_defs ON entity_def_sources.id = entity_defs."sourceId"
WHERE entity_defs."entityId" = ${entityId} AND audits.action = 'entity.bulk.create'
Comment on lines +158 to +161
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This fetches only entity.bulk.create event

) audits

LEFT JOIN entity_defs ON (audits.details->>'entityDefId')::INTEGER = entity_defs.id
LEFT JOIN entity_def_sources ON entity_defs."sourceId" = entity_def_sources.id OR (audits.details->>'sourceId')::INTEGER = entity_def_sources.id
LEFT JOIN actors ON actors.id=audits."actorId"

LEFT JOIN audits triggering_event ON entity_def_sources."auditId" = triggering_event.id
Expand All @@ -165,7 +173,6 @@ SELECT ${fields} FROM entity_defs
LEFT JOIN audits submission_create_event ON (triggering_event.details->'submissionId')::INTEGER = (submission_create_event.details->'submissionId')::INTEGER AND submission_create_event.action = 'submission.create'
LEFT JOIN actors submission_create_event_actor ON submission_create_event_actor.id = submission_create_event."actorId"

WHERE entity_defs."entityId" = ${entityId}
ORDER BY audits."loggedAt" DESC, audits.id DESC
${page(options)}`;

Expand Down Expand Up @@ -253,13 +260,17 @@ const getByEntityId = (entityId, options) => ({ all }) => {

// Look up the full Submission information and attempt to merge it in if it exists.
const subOption = entityDefDict[audit.aux.def.id];
const fullSubmission = subOption.aux.submission
.map(s => s.withAux('submitter', subOption.aux.submissionActor.orNull()))
.map(s => s.withAux('currentVersion', subOption.aux.currentVersion.map(v => v.withAux('submitter', subOption.aux.currentSubmissionActor.orNull()))))
.map(s => s.forApi())
.map(s => mergeLeft(s, { xmlFormId: subOption.aux.form.map(f => f.xmlFormId).orNull() }));
let submission;

if (subOption) {
const fullSubmission = subOption.aux.submission
.map(s => s.withAux('submitter', subOption.aux.submissionActor.orNull()))
.map(s => s.withAux('currentVersion', subOption.aux.currentVersion.map(v => v.withAux('submitter', subOption.aux.currentSubmissionActor.orNull()))))
.map(s => s.forApi())
.map(s => mergeLeft(s, { xmlFormId: subOption.aux.form.map(f => f.xmlFormId).orNull() }));

const submission = mergeLeft(baseSubmission, fullSubmission.orElse(undefined));
submission = mergeLeft(baseSubmission, fullSubmission.orElse(undefined));
}

// Note: The source added to each audit event represents the source of the
// corresponding entity _version_, rather than the source of the event.
Expand Down
127 changes: 119 additions & 8 deletions lib/model/query/entities.js
Original file line number Diff line number Diff line change
Expand Up @@ -10,16 +10,28 @@
const config = require('config');
const { sql } = require('slonik');
const { Actor, Audit, Entity, Submission, Form } = require('../frames');
const { sqlEquals, extender, unjoiner, page, markDeleted, insertMany } = require('../../util/db');
const { sqlEquals, extender, unjoiner, page, markDeleted, insertMany, markUndeleted } = require('../../util/db');
const { map, mergeRight, pickAll } = require('ramda');
const { blankStringToNull, construct } = require('../../util/util');
const { QueryOptions } = require('../../util/db');
const { odataFilter, odataOrderBy } = require('../../data/odata-filter');
const { odataFilter, odataOrderBy, odataExcludeDeleted } = require('../../data/odata-filter');
const { odataToColumnMap, parseSubmissionXml, getDiffProp, ConflictType, normalizeUuid } = require('../../data/entity');
const { isTrue } = require('../../util/http');
const Problem = require('../../util/problem');
const { getOrReject, runSequentially } = require('../../util/promise');

/////////////////////////////////////////////////////////////////////////////////
// Check if provided UUIDs (array) were used by purged entities.

const _getPurgedUuids = (all, uuids) => all(sql`
SELECT jsonb_array_elements_text(details -> 'entityUuids') AS uuid FROM audits a WHERE action = 'entities.purge'
INTERSECT
SELECT jsonb_array_elements_text(${JSON.stringify(uuids)})`)
.then(all.map(r => r.uuid));

const _isPurgedUuid = (maybeOneFirst, uuid) => maybeOneFirst(sql`
SELECT TRUE FROM audits WHERE action = 'entities.purge' AND details -> 'entityUuids' @> ${JSON.stringify([uuid])}
`);
Comment on lines +26 to +34
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I can pass a single uuid in _getPurgedUuuids but my gut feeling is that _isPurgedUuid would be faster. 🤷‍♂️

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

verified that _isPurgedUuid is using the index and is much quicker.


/////////////////////////////////////////////////////////////////////////////////
// ENTITY DEF SOURCES
Expand Down Expand Up @@ -65,10 +77,16 @@ const nextval = sql`nextval(pg_get_serial_sequence('entities', 'id'))`;
// standard authenticated API request. The worker has better access to the event
// actor/initiator and actee/target so it will do the logging itself (including
// error logging).
const createNew = (dataset, partial, subDef, sourceId, userAgentIn) => ({ one, context }) => {
const createNew = (dataset, partial, subDef, sourceId, userAgentIn) => async ({ one, context, maybeOneFirst }) => {
let creatorId;
let userAgent;

// Validate UUID against purged entities
(await _isPurgedUuid(maybeOneFirst, partial.uuid))
.ifDefined(() => {
throw Problem.user.uniquenessViolation({ fields: 'uuid', values: partial.uuid });
});

// Set creatorId and userAgent from submission def if it exists
if (subDef != null) {
({ submitterId: creatorId, userAgent } = subDef);
Expand Down Expand Up @@ -111,6 +129,12 @@ const createMany = (dataset, rawEntities, sourceId, userAgentIn) => async ({ all
const creatorId = context.auth.actor.map((actor) => actor.id).orNull();
const userAgent = blankStringToNull(userAgentIn);

// Validate UUID with the purged entities
const purgedUuids = await _getPurgedUuids(all, rawEntities.map(e => e.uuid));
if (purgedUuids.length > 0) {
throw Problem.user.uniquenessViolation({ fields: 'uuid', values: purgedUuids.join(', ') });
}

// Augment parsed entity data with dataset and creator IDs
const entitiesForInsert = rawEntities.map(e => new Entity({ datasetId: dataset.id, creatorId, ...e }));
const entities = await all(sql`${insertMany(entitiesForInsert)} RETURNING id`);
Expand Down Expand Up @@ -810,7 +834,7 @@ ${options.skiptoken ? sql`
`: sql``}
WHERE
entities."datasetId" = ${datasetId}
AND entities."deletedAt" IS NULL
AND ${odataExcludeDeleted(options.filter, odataToColumnMap)}
AND entity_defs.current=true
AND ${odataFilter(options.filter, odataToColumnMap)}
${options.orderby ? sql`
Expand All @@ -825,7 +849,7 @@ SELECT * FROM
(
SELECT count(*) count FROM entities
WHERE "datasetId" = ${datasetId}
AND "deletedAt" IS NULL
AND ${odataExcludeDeleted(options.filter, odataToColumnMap)}
AND ${odataFilter(options.filter, odataToColumnMap)}
) AS "all"

Expand All @@ -838,7 +862,7 @@ CROSS JOIN
ON entities."createdAt" <= cursor."createdAt" AND entities.id < cursor.id
`: sql``}
WHERE "datasetId" = ${datasetId}
AND "deletedAt" IS NULL
AND ${odataExcludeDeleted(options.filter, odataToColumnMap)}
AND ${odataFilter(options.filter, odataToColumnMap)}
) AS skiptoken`);

Expand All @@ -849,7 +873,94 @@ CROSS JOIN
const del = (entity) => ({ run }) =>
run(markDeleted(entity));

del.audit = (entity, dataset) => (log) => log('entity.delete', entity.with({ acteeId: dataset.acteeId }), { uuid: entity.uuid });
del.audit = (entity, dataset) => (log) =>
log(
'entity.delete',
entity.with({ acteeId: dataset.acteeId }),
{ entity: { uuid: entity.uuid } }
);
Comment on lines +877 to +881
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Made it consistent with other entity logs where uuid is under the entity object


////////////////////////////////////////////////////////////////////////////////
// RESTORE ENTITY

const restore = (entity) => ({ run }) =>
run(markUndeleted(entity));

restore.audit = (entity, dataset) => (log) =>
log(
'entity.restore',
entity.with({ acteeId: dataset.acteeId }),
{ entity: { uuid: entity.uuid } }
);

////////////////////////////////////////////////////////////////////////////////
// PURGE DELETED ENTITIES

const PURGE_DAY_RANGE = config.has('default.taskSchedule.purge')
? config.get('default.taskSchedule.purge')
: 30; // Default is 30 days
Comment on lines +899 to +901
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe we should move this to central place, this is present in three file at the moment.


const _trashedFilter = (force, projectId, datasetName, entityUuid) => {
const idFilters = [sql`TRUE`];
if (projectId) idFilters.push(sql`datasets."projectId" = ${projectId}`);
if (datasetName) idFilters.push(sql`datasets."name" = ${datasetName}`);
if (entityUuid) idFilters.push(sql`entities."uuid" = ${entityUuid}`);

const idFilter = sql.join(idFilters, sql` AND `);

return (force
? sql`entities."deletedAt" IS NOT NULL AND ${idFilter}`
: sql`entities."deletedAt" < CURRENT_DATE - CAST(${PURGE_DAY_RANGE} AS INT) AND ${idFilter}`);
};

const purge = (force = false, projectId = null, datasetName = null, entityUuid = null) => ({ oneFirst }) => {
if (entityUuid && (!projectId || !datasetName)) {
throw Problem.internal.unknown({ error: 'Must specify projectId and datasetName to purge a specify entity.' });
}
if (datasetName && !projectId) {
throw Problem.internal.unknown({ error: 'Must specify projectId to purge all entities of a dataset/entity-list.' });
}

// Reminder: 'notes' in audit table is set by 'x-action-notes' header of all
// APIs with side-effects. Although we don't provide any way to set it from
// the frontend (as of 2024.3), it is a good idea to clear it for purged
// entities.
return oneFirst(sql`
WITH redacted_audits AS (
UPDATE audits SET notes = NULL
FROM entities
JOIN datasets ON entities."datasetId" = datasets.id
WHERE (audits.details->'entity'->>'uuid') = entities.uuid
AND ${_trashedFilter(force, projectId, datasetName, entityUuid)}
), purge_audit AS (
INSERT INTO audits ("action", "loggedAt", "processed", "details")
sadiqkhoja marked this conversation as resolved.
Show resolved Hide resolved
SELECT 'entities.purge', clock_timestamp(), clock_timestamp(), jsonb_build_object('entitiesDeleted', COUNT(*), 'entityUuids', ARRAY_AGG(uuid))
FROM entities
JOIN datasets ON entities."datasetId" = datasets.id
WHERE ${_trashedFilter(force, projectId, datasetName, entityUuid)}
HAVING count(*) > 0
), delete_entity_sources AS (
DELETE FROM entity_def_sources
USING entity_defs, entities, datasets
WHERE entity_def_sources.id = entity_defs."sourceId"
AND entity_defs."entityId" = entities.id
AND entities."datasetId" = datasets.id
AND ${_trashedFilter(force, projectId, datasetName, entityUuid)}
), delete_submission_backlog AS (
DELETE FROM entity_submission_backlog
USING entities, datasets
WHERE entity_submission_backlog."entityUuid"::varchar = entities.uuid
AND entities."datasetId" = datasets.id
AND ${_trashedFilter(force, projectId, datasetName, entityUuid)}
), deleted_entities AS (
DELETE FROM entities
USING datasets
WHERE entities."datasetId" = datasets.id
AND ${_trashedFilter(force, projectId, datasetName, entityUuid)}
RETURNING 1
)
SELECT COUNT(*) FROM deleted_entities`);
};

module.exports = {
createNew, _processSubmissionEvent,
Expand All @@ -867,5 +978,5 @@ module.exports = {
countByDatasetId, getById, getDef,
getAll, getAllDefs, del,
createEntitiesFromPendingSubmissions,
resolveConflict
resolveConflict, restore, purge
};
13 changes: 13 additions & 0 deletions lib/resources/entities.js
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

await Entities.createMany(dataset, partials, sourceId, userAgent);

Should catch UUID conflict error if it is not provided in the request body (means backend has generated it and there is a collision) and generate a new one.

Original file line number Diff line number Diff line change
Expand Up @@ -169,4 +169,17 @@ module.exports = (service, endpoint) => {
return success();

}));

service.post('/projects/:projectId/datasets/:name/entities/:uuid/restore', endpoint(async ({ Datasets, Entities }, { auth, params }) => {

const dataset = await Datasets.get(params.projectId, params.name, true).then(getOrNotFound);

await auth.canOrReject('entity.restore', dataset);

const entity = await Entities.getById(dataset.id, params.uuid, new QueryOptions({ argData: { deleted: 'true' } })).then(getOrNotFound);

await Entities.restore(entity, dataset);

return success();
}));
};
Loading
Loading