From 31b46f8a97f60cd9c2be5cd86b6e16fedcb7e86a Mon Sep 17 00:00:00 2001 From: alxndrsn Date: Mon, 13 Jan 2025 05:58:51 +0000 Subject: [PATCH 01/10] blobs: prevent default contentType column Set blob."contentType" values to application/octet-stream if no mime type is supplied on upload. Related: #1351 --- lib/model/query/blobs.js | 2 +- test/integration/api/submissions.js | 18 ++++++++---------- 2 files changed, 9 insertions(+), 11 deletions(-) diff --git a/lib/model/query/blobs.js b/lib/model/query/blobs.js index 6808cce58..81e476089 100644 --- a/lib/model/query/blobs.js +++ b/lib/model/query/blobs.js @@ -22,7 +22,7 @@ const { construct } = require('../../util/util'); const ensure = (blob) => ({ oneFirst }) => oneFirst(sql` with ensured as (insert into blobs (sha, md5, content, "contentType") values - (${blob.sha}, ${blob.md5}, ${sql.binary(blob.content)}, ${blob.contentType || null}) + (${blob.sha}, ${blob.md5}, ${sql.binary(blob.content)}, ${blob.contentType || sql`DEFAULT`}) on conflict (sha) do update set sha = ${blob.sha} returning id) select id from ensured`); diff --git a/test/integration/api/submissions.js b/test/integration/api/submissions.js index 6a9580c0b..0c21d0076 100644 --- a/test/integration/api/submissions.js +++ b/test/integration/api/submissions.js @@ -4377,8 +4377,7 @@ one,h,/data/h,2000-01-01T00:06,2000-01-01T00:07,-5,-6,,ee,ff body.toString().should.equal('testvideo'); }))))))); - // Ref https://github.com/getodk/central-backend/issues/1351 - it('should attach a given file with empty Content-Type', testService((service) => + it('should attach a given file with empty Content-Type and serve it with default mime type', testService((service) => service.login('alice', (asAlice) => asAlice.post('/v1/projects/1/forms?publish=true') .set('Content-Type', 'application/xml') @@ -4394,13 +4393,12 @@ one,h,/data/h,2000-01-01T00:06,2000-01-01T00:07,-5,-6,,ee,ff .expect(200) .then(() => asAlice.get('/v1/projects/1/forms/binaryType/submissions/both/attachments/my_file1.mp4') .expect(200) - .then(({ headers, text }) => { - headers['content-type'].should.equal('null'); - text.toString().should.equal('testvideo'); // use 'text' instead of 'body' to avoid supertest response parsing + .then(({ headers, body }) => { + headers['content-type'].should.equal('application/octet-stream'); + body.toString().should.equal('testvideo'); }))))))); - // Ref https://github.com/getodk/central-backend/issues/1351 - it('should attach a given file with missing Content-Type', testService((service) => + it('should attach a given file with missing Content-Type and serve it with default mime type', testService((service) => service.login('alice', (asAlice) => asAlice.post('/v1/projects/1/forms?publish=true') .set('Content-Type', 'application/xml') @@ -4416,9 +4414,9 @@ one,h,/data/h,2000-01-01T00:06,2000-01-01T00:07,-5,-6,,ee,ff .expect(200) .then(() => asAlice.get('/v1/projects/1/forms/binaryType/submissions/both/attachments/my_file1.mp4') .expect(200) - .then(({ headers, text }) => { - headers['content-type'].should.equal('null'); - text.toString().should.equal('testvideo'); // use 'text' instead of 'body' to avoid supertest response parsing + .then(({ headers, body }) => { + headers['content-type'].should.equal('application/octet-stream'); + body.toString().should.equal('testvideo'); }))))))); it('should log an audit entry about initial attachment', testService((service, { Audits, Forms, Submissions, SubmissionAttachments }) => From 80344f6099b3e8b3c4257b68a87ff6b1f0b7af25 Mon Sep 17 00:00:00 2001 From: alxndrsn Date: Mon, 13 Jan 2025 06:24:09 +0000 Subject: [PATCH 02/10] update docs --- docs/api.yaml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/api.yaml b/docs/api.yaml index c25a55925..68b9e4460 100644 --- a/docs/api.yaml +++ b/docs/api.yaml @@ -3885,7 +3885,7 @@ paths: - Individual Form summary: Downloading a Form Attachment description: |- - To download a single file, use this endpoint. The appropriate `Content-Disposition` (attachment with a filename) and `Content-Type` (based on the type supplied at upload time) will be given. + To download a single file, use this endpoint. The appropriate `Content-Disposition` (attachment with a filename) and `Content-Type` (based on the type supplied at upload time, or `application/octet-stream` if none was supplied) will be given. This endpoint supports `ETag`, which can be used to avoid downloading the same content more than once. When an API consumer calls this endpoint, it returns a value in `ETag` header, you can pass this value in the header `If-None-Match` of subsequent requests. If the file has not been changed since the previous request, you will receive `304 Not Modified` response otherwise you'll get the latest file. operationId: Downloading a Form Attachment @@ -4415,7 +4415,7 @@ paths: summary: Downloading a Draft Form Attachment description: To download a single file, use this endpoint. The appropriate `Content-Disposition` (attachment with a filename or Dataset name) and `Content-Type` (based on - the type supplied at upload time or `text/csv` in the case of a linked Dataset) + the type supplied at upload time, or `text/csv` in the case of a linked Dataset, or `application/octet-stream` otherwise) will be given. This endpoint supports `ETag`, which can be used to avoid downloading the same content more than once. When an API consumer calls this endpoint, it returns a value in `ETag` header, you can pass this value in the header `If-None-Match` of subsequent requests. If the file has not been changed since the previous request, you will receive `304 Not Modified` response otherwise you'll get the latest file. @@ -5116,7 +5116,7 @@ paths: - Published Form Versions summary: Downloading a Form Version Attachment description: |- - To download a single file, use this endpoint. The appropriate `Content-Disposition` (attachment with a filename) and `Content-Type` (based on the type supplied at upload time) will be given. + To download a single file, use this endpoint. The appropriate `Content-Disposition` (attachment with a filename) and `Content-Type` (based on the type supplied at upload time, or `application/octet-stream` if none was supplied) will be given. This endpoint supports `ETag` header, which can be used to avoid downloading the same content more than once. When an API consumer calls this endpoint, the endpoint returns a value in `ETag` header. If you pass that value in the `If-None-Match` header of a subsequent request, then if the file has not been changed since the previous request, you will receive `304 Not Modified` response; otherwise you'll get the latest file. operationId: Downloading a Form Version Attachment From d90d79eed3ddaf29186aaa770b27ab5280fb1ac6 Mon Sep 17 00:00:00 2001 From: alxndrsn Date: Mon, 13 Jan 2025 06:27:45 +0000 Subject: [PATCH 03/10] add the migration! --- ...-01-disable-nullable-blob-content-types.js | 22 +++++++++++++++++++ 1 file changed, 22 insertions(+) create mode 100644 lib/model/migrations/20250113-01-disable-nullable-blob-content-types.js diff --git a/lib/model/migrations/20250113-01-disable-nullable-blob-content-types.js b/lib/model/migrations/20250113-01-disable-nullable-blob-content-types.js new file mode 100644 index 000000000..0b33c9249 --- /dev/null +++ b/lib/model/migrations/20250113-01-disable-nullable-blob-content-types.js @@ -0,0 +1,22 @@ +// Copyright 2025 ODK Central Developers +// See the NOTICE file at the top-level directory of this distribution and at +// https://github.com/getodk/central-backend/blob/master/NOTICE. +// This file is part of ODK Central. It is subject to the license terms in +// the LICENSE file found in the top-level directory of this distribution and at +// https://www.apache.org/licenses/LICENSE-2.0. No part of ODK Central, +// including this file, may be copied, modified, propagated, or distributed +// except according to the terms contained in the LICENSE file. + +const up = (db) => db.raw(` + ALTER TABLE blobs + ALTER COLUMN "contentType" SET NOT NULL, + ALTER COLUMN "contentType" SET DEFAULT 'application/octet-stream' +`); + +const down = (db) => db.raw(` + ALTER TABLE blobs + ALTER COLUMN "contentType" DROP NOT NULL, + ALTER COLUMN "contentType" DROP DEFAULT +`); + +module.exports = { up, down }; From cdfc4c6893b4d60595d056e796f10ed6e47b6713 Mon Sep 17 00:00:00 2001 From: alxndrsn Date: Mon, 13 Jan 2025 06:47:10 +0000 Subject: [PATCH 04/10] test/e2e: expect default mime type --- test/e2e/s3/test.js | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/test/e2e/s3/test.js b/test/e2e/s3/test.js index b94c2e75c..c599a44bb 100644 --- a/test/e2e/s3/test.js +++ b/test/e2e/s3/test.js @@ -379,9 +379,7 @@ describe('s3 support', () => { const filepath = `${attDir}/${name}`; - // "null" is a questionable content-type, but matches current central behaviour - // See: https://github.com/getodk/central-backend/pull/1352 - const expectedContentType = mimetypeFor(name) ?? 'null'; + const expectedContentType = mimetypeFor(name) ?? 'application/octet-stream'; const actualContentType = res.headers.get('content-type'); should.equal(actualContentType, expectedContentType); From 29210023a10596fdb16374c10906f98133c877d1 Mon Sep 17 00:00:00 2001 From: alxndrsn Date: Mon, 13 Jan 2025 06:48:53 +0000 Subject: [PATCH 05/10] task test: set default contentType for aBlobExistsWith() --- test/integration/task/s3.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/integration/task/s3.js b/test/integration/task/s3.js index 0c3b07285..f9d58d874 100644 --- a/test/integration/task/s3.js +++ b/test/integration/task/s3.js @@ -11,7 +11,7 @@ const aBlobExistsWith = async (container, { status }) => { const blob = await Blob.fromBuffer(crypto.randomBytes(100)); await container.run(sql` INSERT INTO BLOBS (sha, md5, content, "contentType", s3_status) - VALUES (${blob.sha}, ${blob.md5}, ${sql.binary(blob.content)}, ${blob.contentType || null}, ${status}) + VALUES (${blob.sha}, ${blob.md5}, ${sql.binary(blob.content)}, ${blob.contentType || sql`DEFAULT`}, ${status}) `); }; From 44ba1c7c0756156a4f9a856dc20f324927e6ca96 Mon Sep 17 00:00:00 2001 From: alxndrsn Date: Tue, 14 Jan 2025 06:32:24 +0000 Subject: [PATCH 06/10] add migration test; fix existing NULL value handling --- .eslintrc.json | 2 +- ...-01-disable-nullable-blob-content-types.js | 5 +- ...isable-nullable-blob-content-types.spec.js | 58 +++++++++++++++ test/db-migrations/utils.js | 73 +++++++++++++++++-- 4 files changed, 130 insertions(+), 8 deletions(-) create mode 100644 test/db-migrations/20250113-01-disable-nullable-blob-content-types.spec.js diff --git a/.eslintrc.json b/.eslintrc.json index d1a9a5c6c..c64a114ea 100644 --- a/.eslintrc.json +++ b/.eslintrc.json @@ -28,7 +28,7 @@ "no-else-return": "off", "no-multiple-empty-lines": "off", "no-nested-ternary": "off", - "no-only-tests/no-only-tests": "error", + "no-only-tests/no-only-tests": [ "error", { "block": [ "describe", "it", "describeMigration" ] } ], "no-restricted-syntax": "off", "no-underscore-dangle": "off", "nonblock-statement-body-position": "off", diff --git a/lib/model/migrations/20250113-01-disable-nullable-blob-content-types.js b/lib/model/migrations/20250113-01-disable-nullable-blob-content-types.js index 0b33c9249..b90d8b8e0 100644 --- a/lib/model/migrations/20250113-01-disable-nullable-blob-content-types.js +++ b/lib/model/migrations/20250113-01-disable-nullable-blob-content-types.js @@ -9,8 +9,9 @@ const up = (db) => db.raw(` ALTER TABLE blobs - ALTER COLUMN "contentType" SET NOT NULL, - ALTER COLUMN "contentType" SET DEFAULT 'application/octet-stream' + ALTER COLUMN "contentType" TYPE TEXT USING(COALESCE("contentType", 'application/octet-stream')), + ALTER COLUMN "contentType" SET DEFAULT 'application/octet-stream', + ALTER COLUMN "contentType" SET NOT NULL `); const down = (db) => db.raw(` diff --git a/test/db-migrations/20250113-01-disable-nullable-blob-content-types.spec.js b/test/db-migrations/20250113-01-disable-nullable-blob-content-types.spec.js new file mode 100644 index 000000000..3fbee0f35 --- /dev/null +++ b/test/db-migrations/20250113-01-disable-nullable-blob-content-types.spec.js @@ -0,0 +1,58 @@ +const assert = require('node:assert/strict'); +const { hash, randomBytes } = require('node:crypto'); + +const { // eslint-disable-line object-curly-newline + assertTableContents, + describeMigration, + rowsExistFor, +} = require('./utils'); // eslint-disable-line object-curly-newline + +describeMigration('20250113-01-disable-nullable-blob-content-types', ({ runMigrationBeingTested }) => { + const aBlobWith = props => { + const randomContent = randomBytes(100); + const md5 = hash('md5', randomContent); // eslint-disable-line no-multi-spaces + const sha = hash('sha1', randomContent); + return { md5, sha, ...props }; + }; + const aBlob = () => aBlobWith({}); + + const blob1 = aBlobWith({ contentType: null }); + const blob2 = aBlobWith({ contentType: 'text/plain' }); + + before(async () => { + await rowsExistFor('blobs', blob1, blob2); + + await runMigrationBeingTested(); + }); + + it('should change NULL contentType values to application/octet-stream, and preserve non-NULL values', async () => { + await assertTableContents('blobs', + { ...blob1, contentType: 'application/octet-stream' }, + { ...blob2, contentType: 'text/plain' }, + ); + }); + + it(`should create new blobs with contentType 'application/octet-stream' (contentType not supplied)`, async () => { + const { md5, sha } = aBlob(); + + const created = await db.oneFirst(sql` + INSERT INTO blobs (md5, sha, "contentType") + VALUES(${md5}, ${sha}, DEFAULT) + RETURNING "contentType" + `); + + assert.equal(created, 'application/octet-stream'); + }); + + it(`should create new blobs with contentType 'application/octet-stream' (supplied DEFAULT contentType)`, async () => { + const { md5, sha } = aBlob(); + + const created = await db.oneFirst(sql` + INSERT INTO blobs (md5, sha, "contentType") + VALUES(${md5}, ${sha}, DEFAULT) + RETURNING "contentType" + `); + + assert.equal(created, 'application/octet-stream'); + }); +}); diff --git a/test/db-migrations/utils.js b/test/db-migrations/utils.js index 85788ea01..90d9b7af2 100644 --- a/test/db-migrations/utils.js +++ b/test/db-migrations/utils.js @@ -1,4 +1,4 @@ -const assert = require('node:assert'); +const assert = require('node:assert/strict'); const _ = require('lodash'); const migrator = require('./migrator'); @@ -77,22 +77,22 @@ function assertRowsMatch(actualRows, expectedRows) { const remainingRows = [...actualRows]; for (let i=0; i _.pick(r, Object.keys(x))); + const filteredRemainingRows = remainingRows.map(r => _.pick(r, Object.keys(expectedRow))); assert.fail( `Expected row ${i} not found:\njson=` + - JSON.stringify({ remainingRows, filteredRemainingRows, expectedRow: x }), + JSON.stringify({ remainingRows, filteredRemainingRows, expectedRow }), ); } } @@ -115,9 +115,72 @@ function assertIncludes(actual, expected) { } } +async function rowsExistFor(tableName, ...rows) { + if (!rows.length) throw new Error(`Attempted to insert 0 rows into table ${tableName}`); + + assertAllHaveSameProps(rows); // eslint-disable-line no-use-before-define + const colNames = Object.keys(rows[0]); + if (!colNames.length) throw new Error(`Attempted to insert data with 0 defined columns`); + + const table = sql.identifier([tableName]); + const cols = sql.join(colNames.map(k => sql.identifier([k])), sql`,`); + + return db.query( + sql` + INSERT INTO ${table} (${cols}) + SELECT ${cols} + FROM JSON_POPULATE_RECORDSET(NULL::${table}, ${JSON.stringify(rows)}) + `, + ); +} + +async function assertTableContents(tableName, ...expected) { + const { rows: actual } = await db.query(sql`SELECT * FROM ${sql.identifier([tableName])}`); + + assert.equal( + actual.length, + expected.length, + `Unexpected number of rows in table '${tableName}'. ` + + `Expected ${expected.length} but got ${actual.length}. ` + + `DB returned: ${JSON.stringify(actual, null, 2)}`, + ); + + const remainingRows = [ ...actual ]; + for (let i=0; i _.pick(r, Object.keys(x))); + assert.fail(`Expected row ${i} not found in table '${tableName}':\n json=${JSON.stringify({ remainingRows, filteredRemainingRows, expectedRow: x })}`); + } + } +} + +function assertAllHaveSameProps(list) { + if (list.length < 2) return; + const [ first, ...rest ] = list.map(Object.keys); + + rest.forEach((v, i) => { + assert.deepEqual(v, first, `Row #${i+1} has different props to row #0. All supplied rows must have the same props.`); + }); +} + module.exports = { assertIndexExists, + assertTableContents, assertTableDoesNotExist, assertTableSchema, + describeMigration, + + rowsExistFor, }; From 887b5f4bbaf10ed3d6e68a9263fb975b07161ae3 Mon Sep 17 00:00:00 2001 From: alxndrsn Date: Tue, 14 Jan 2025 06:42:43 +0000 Subject: [PATCH 07/10] clarify test name --- .../20250113-01-disable-nullable-blob-content-types.spec.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/db-migrations/20250113-01-disable-nullable-blob-content-types.spec.js b/test/db-migrations/20250113-01-disable-nullable-blob-content-types.spec.js index 3fbee0f35..e3fb41c03 100644 --- a/test/db-migrations/20250113-01-disable-nullable-blob-content-types.spec.js +++ b/test/db-migrations/20250113-01-disable-nullable-blob-content-types.spec.js @@ -25,7 +25,7 @@ describeMigration('20250113-01-disable-nullable-blob-content-types', ({ runMigra await runMigrationBeingTested(); }); - it('should change NULL contentType values to application/octet-stream, and preserve non-NULL values', async () => { + it('should change existing NULL contentType values to application/octet-stream, and preserve non-NULL values', async () => { await assertTableContents('blobs', { ...blob1, contentType: 'application/octet-stream' }, { ...blob2, contentType: 'text/plain' }, From 37fc98c7a2c0c44aa5d6df1b5d8a746f7dc0fe06 Mon Sep 17 00:00:00 2001 From: alxndrsn Date: Tue, 14 Jan 2025 06:48:55 +0000 Subject: [PATCH 08/10] revert eslintrc changes --- .eslintrc.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.eslintrc.json b/.eslintrc.json index c64a114ea..d1a9a5c6c 100644 --- a/.eslintrc.json +++ b/.eslintrc.json @@ -28,7 +28,7 @@ "no-else-return": "off", "no-multiple-empty-lines": "off", "no-nested-ternary": "off", - "no-only-tests/no-only-tests": [ "error", { "block": [ "describe", "it", "describeMigration" ] } ], + "no-only-tests/no-only-tests": "error", "no-restricted-syntax": "off", "no-underscore-dangle": "off", "nonblock-statement-body-position": "off", From 84cf7275bdb7d2762e43a3d9c59985a5d7c92bd9 Mon Sep 17 00:00:00 2001 From: alxndrsn Date: Tue, 14 Jan 2025 06:55:45 +0000 Subject: [PATCH 09/10] revert assert change --- test/db-migrations/utils.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/db-migrations/utils.js b/test/db-migrations/utils.js index 90d9b7af2..d98007697 100644 --- a/test/db-migrations/utils.js +++ b/test/db-migrations/utils.js @@ -1,4 +1,4 @@ -const assert = require('node:assert/strict'); +const assert = require('node:assert'); const _ = require('lodash'); const migrator = require('./migrator'); From 9b76566f72e005a0eb8be8c9febfaf9c46f309d6 Mon Sep 17 00:00:00 2001 From: alxndrsn Date: Tue, 14 Jan 2025 07:01:37 +0000 Subject: [PATCH 10/10] revert renaming --- test/db-migrations/utils.js | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/test/db-migrations/utils.js b/test/db-migrations/utils.js index d98007697..43e811d2d 100644 --- a/test/db-migrations/utils.js +++ b/test/db-migrations/utils.js @@ -77,22 +77,22 @@ function assertRowsMatch(actualRows, expectedRows) { const remainingRows = [...actualRows]; for (let i=0; i _.pick(r, Object.keys(expectedRow))); + const filteredRemainingRows = remainingRows.map(r => _.pick(r, Object.keys(x))); assert.fail( `Expected row ${i} not found:\njson=` + - JSON.stringify({ remainingRows, filteredRemainingRows, expectedRow }), + JSON.stringify({ remainingRows, filteredRemainingRows, expectedRow: x }), ); } }