diff --git a/docs/api.yaml b/docs/api.yaml index c25a55925..68b9e4460 100644 --- a/docs/api.yaml +++ b/docs/api.yaml @@ -3885,7 +3885,7 @@ paths: - Individual Form summary: Downloading a Form Attachment description: |- - To download a single file, use this endpoint. The appropriate `Content-Disposition` (attachment with a filename) and `Content-Type` (based on the type supplied at upload time) will be given. + To download a single file, use this endpoint. The appropriate `Content-Disposition` (attachment with a filename) and `Content-Type` (based on the type supplied at upload time, or `application/octet-stream` if none was supplied) will be given. This endpoint supports `ETag`, which can be used to avoid downloading the same content more than once. When an API consumer calls this endpoint, it returns a value in `ETag` header, you can pass this value in the header `If-None-Match` of subsequent requests. If the file has not been changed since the previous request, you will receive `304 Not Modified` response otherwise you'll get the latest file. operationId: Downloading a Form Attachment @@ -4415,7 +4415,7 @@ paths: summary: Downloading a Draft Form Attachment description: To download a single file, use this endpoint. The appropriate `Content-Disposition` (attachment with a filename or Dataset name) and `Content-Type` (based on - the type supplied at upload time or `text/csv` in the case of a linked Dataset) + the type supplied at upload time, or `text/csv` in the case of a linked Dataset, or `application/octet-stream` otherwise) will be given. This endpoint supports `ETag`, which can be used to avoid downloading the same content more than once. When an API consumer calls this endpoint, it returns a value in `ETag` header, you can pass this value in the header `If-None-Match` of subsequent requests. If the file has not been changed since the previous request, you will receive `304 Not Modified` response otherwise you'll get the latest file. @@ -5116,7 +5116,7 @@ paths: - Published Form Versions summary: Downloading a Form Version Attachment description: |- - To download a single file, use this endpoint. The appropriate `Content-Disposition` (attachment with a filename) and `Content-Type` (based on the type supplied at upload time) will be given. + To download a single file, use this endpoint. The appropriate `Content-Disposition` (attachment with a filename) and `Content-Type` (based on the type supplied at upload time, or `application/octet-stream` if none was supplied) will be given. This endpoint supports `ETag` header, which can be used to avoid downloading the same content more than once. When an API consumer calls this endpoint, the endpoint returns a value in `ETag` header. If you pass that value in the `If-None-Match` header of a subsequent request, then if the file has not been changed since the previous request, you will receive `304 Not Modified` response; otherwise you'll get the latest file. operationId: Downloading a Form Version Attachment diff --git a/lib/model/migrations/20250113-01-disable-nullable-blob-content-types.js b/lib/model/migrations/20250113-01-disable-nullable-blob-content-types.js new file mode 100644 index 000000000..b90d8b8e0 --- /dev/null +++ b/lib/model/migrations/20250113-01-disable-nullable-blob-content-types.js @@ -0,0 +1,23 @@ +// Copyright 2025 ODK Central Developers +// See the NOTICE file at the top-level directory of this distribution and at +// https://github.com/getodk/central-backend/blob/master/NOTICE. +// This file is part of ODK Central. It is subject to the license terms in +// the LICENSE file found in the top-level directory of this distribution and at +// https://www.apache.org/licenses/LICENSE-2.0. No part of ODK Central, +// including this file, may be copied, modified, propagated, or distributed +// except according to the terms contained in the LICENSE file. + +const up = (db) => db.raw(` + ALTER TABLE blobs + ALTER COLUMN "contentType" TYPE TEXT USING(COALESCE("contentType", 'application/octet-stream')), + ALTER COLUMN "contentType" SET DEFAULT 'application/octet-stream', + ALTER COLUMN "contentType" SET NOT NULL +`); + +const down = (db) => db.raw(` + ALTER TABLE blobs + ALTER COLUMN "contentType" DROP NOT NULL, + ALTER COLUMN "contentType" DROP DEFAULT +`); + +module.exports = { up, down }; diff --git a/lib/model/query/blobs.js b/lib/model/query/blobs.js index 6808cce58..81e476089 100644 --- a/lib/model/query/blobs.js +++ b/lib/model/query/blobs.js @@ -22,7 +22,7 @@ const { construct } = require('../../util/util'); const ensure = (blob) => ({ oneFirst }) => oneFirst(sql` with ensured as (insert into blobs (sha, md5, content, "contentType") values - (${blob.sha}, ${blob.md5}, ${sql.binary(blob.content)}, ${blob.contentType || null}) + (${blob.sha}, ${blob.md5}, ${sql.binary(blob.content)}, ${blob.contentType || sql`DEFAULT`}) on conflict (sha) do update set sha = ${blob.sha} returning id) select id from ensured`); diff --git a/test/db-migrations/20250113-01-disable-nullable-blob-content-types.spec.js b/test/db-migrations/20250113-01-disable-nullable-blob-content-types.spec.js new file mode 100644 index 000000000..e3fb41c03 --- /dev/null +++ b/test/db-migrations/20250113-01-disable-nullable-blob-content-types.spec.js @@ -0,0 +1,58 @@ +const assert = require('node:assert/strict'); +const { hash, randomBytes } = require('node:crypto'); + +const { // eslint-disable-line object-curly-newline + assertTableContents, + describeMigration, + rowsExistFor, +} = require('./utils'); // eslint-disable-line object-curly-newline + +describeMigration('20250113-01-disable-nullable-blob-content-types', ({ runMigrationBeingTested }) => { + const aBlobWith = props => { + const randomContent = randomBytes(100); + const md5 = hash('md5', randomContent); // eslint-disable-line no-multi-spaces + const sha = hash('sha1', randomContent); + return { md5, sha, ...props }; + }; + const aBlob = () => aBlobWith({}); + + const blob1 = aBlobWith({ contentType: null }); + const blob2 = aBlobWith({ contentType: 'text/plain' }); + + before(async () => { + await rowsExistFor('blobs', blob1, blob2); + + await runMigrationBeingTested(); + }); + + it('should change existing NULL contentType values to application/octet-stream, and preserve non-NULL values', async () => { + await assertTableContents('blobs', + { ...blob1, contentType: 'application/octet-stream' }, + { ...blob2, contentType: 'text/plain' }, + ); + }); + + it(`should create new blobs with contentType 'application/octet-stream' (contentType not supplied)`, async () => { + const { md5, sha } = aBlob(); + + const created = await db.oneFirst(sql` + INSERT INTO blobs (md5, sha, "contentType") + VALUES(${md5}, ${sha}, DEFAULT) + RETURNING "contentType" + `); + + assert.equal(created, 'application/octet-stream'); + }); + + it(`should create new blobs with contentType 'application/octet-stream' (supplied DEFAULT contentType)`, async () => { + const { md5, sha } = aBlob(); + + const created = await db.oneFirst(sql` + INSERT INTO blobs (md5, sha, "contentType") + VALUES(${md5}, ${sha}, DEFAULT) + RETURNING "contentType" + `); + + assert.equal(created, 'application/octet-stream'); + }); +}); diff --git a/test/db-migrations/utils.js b/test/db-migrations/utils.js index 85788ea01..43e811d2d 100644 --- a/test/db-migrations/utils.js +++ b/test/db-migrations/utils.js @@ -115,9 +115,72 @@ function assertIncludes(actual, expected) { } } +async function rowsExistFor(tableName, ...rows) { + if (!rows.length) throw new Error(`Attempted to insert 0 rows into table ${tableName}`); + + assertAllHaveSameProps(rows); // eslint-disable-line no-use-before-define + const colNames = Object.keys(rows[0]); + if (!colNames.length) throw new Error(`Attempted to insert data with 0 defined columns`); + + const table = sql.identifier([tableName]); + const cols = sql.join(colNames.map(k => sql.identifier([k])), sql`,`); + + return db.query( + sql` + INSERT INTO ${table} (${cols}) + SELECT ${cols} + FROM JSON_POPULATE_RECORDSET(NULL::${table}, ${JSON.stringify(rows)}) + `, + ); +} + +async function assertTableContents(tableName, ...expected) { + const { rows: actual } = await db.query(sql`SELECT * FROM ${sql.identifier([tableName])}`); + + assert.equal( + actual.length, + expected.length, + `Unexpected number of rows in table '${tableName}'. ` + + `Expected ${expected.length} but got ${actual.length}. ` + + `DB returned: ${JSON.stringify(actual, null, 2)}`, + ); + + const remainingRows = [ ...actual ]; + for (let i=0; i _.pick(r, Object.keys(x))); + assert.fail(`Expected row ${i} not found in table '${tableName}':\n json=${JSON.stringify({ remainingRows, filteredRemainingRows, expectedRow: x })}`); + } + } +} + +function assertAllHaveSameProps(list) { + if (list.length < 2) return; + const [ first, ...rest ] = list.map(Object.keys); + + rest.forEach((v, i) => { + assert.deepEqual(v, first, `Row #${i+1} has different props to row #0. All supplied rows must have the same props.`); + }); +} + module.exports = { assertIndexExists, + assertTableContents, assertTableDoesNotExist, assertTableSchema, + describeMigration, + + rowsExistFor, }; diff --git a/test/e2e/s3/test.js b/test/e2e/s3/test.js index b94c2e75c..c599a44bb 100644 --- a/test/e2e/s3/test.js +++ b/test/e2e/s3/test.js @@ -379,9 +379,7 @@ describe('s3 support', () => { const filepath = `${attDir}/${name}`; - // "null" is a questionable content-type, but matches current central behaviour - // See: https://github.com/getodk/central-backend/pull/1352 - const expectedContentType = mimetypeFor(name) ?? 'null'; + const expectedContentType = mimetypeFor(name) ?? 'application/octet-stream'; const actualContentType = res.headers.get('content-type'); should.equal(actualContentType, expectedContentType); diff --git a/test/integration/api/submissions.js b/test/integration/api/submissions.js index 6a9580c0b..0c21d0076 100644 --- a/test/integration/api/submissions.js +++ b/test/integration/api/submissions.js @@ -4377,8 +4377,7 @@ one,h,/data/h,2000-01-01T00:06,2000-01-01T00:07,-5,-6,,ee,ff body.toString().should.equal('testvideo'); }))))))); - // Ref https://github.com/getodk/central-backend/issues/1351 - it('should attach a given file with empty Content-Type', testService((service) => + it('should attach a given file with empty Content-Type and serve it with default mime type', testService((service) => service.login('alice', (asAlice) => asAlice.post('/v1/projects/1/forms?publish=true') .set('Content-Type', 'application/xml') @@ -4394,13 +4393,12 @@ one,h,/data/h,2000-01-01T00:06,2000-01-01T00:07,-5,-6,,ee,ff .expect(200) .then(() => asAlice.get('/v1/projects/1/forms/binaryType/submissions/both/attachments/my_file1.mp4') .expect(200) - .then(({ headers, text }) => { - headers['content-type'].should.equal('null'); - text.toString().should.equal('testvideo'); // use 'text' instead of 'body' to avoid supertest response parsing + .then(({ headers, body }) => { + headers['content-type'].should.equal('application/octet-stream'); + body.toString().should.equal('testvideo'); }))))))); - // Ref https://github.com/getodk/central-backend/issues/1351 - it('should attach a given file with missing Content-Type', testService((service) => + it('should attach a given file with missing Content-Type and serve it with default mime type', testService((service) => service.login('alice', (asAlice) => asAlice.post('/v1/projects/1/forms?publish=true') .set('Content-Type', 'application/xml') @@ -4416,9 +4414,9 @@ one,h,/data/h,2000-01-01T00:06,2000-01-01T00:07,-5,-6,,ee,ff .expect(200) .then(() => asAlice.get('/v1/projects/1/forms/binaryType/submissions/both/attachments/my_file1.mp4') .expect(200) - .then(({ headers, text }) => { - headers['content-type'].should.equal('null'); - text.toString().should.equal('testvideo'); // use 'text' instead of 'body' to avoid supertest response parsing + .then(({ headers, body }) => { + headers['content-type'].should.equal('application/octet-stream'); + body.toString().should.equal('testvideo'); }))))))); it('should log an audit entry about initial attachment', testService((service, { Audits, Forms, Submissions, SubmissionAttachments }) => diff --git a/test/integration/task/s3.js b/test/integration/task/s3.js index 0c3b07285..f9d58d874 100644 --- a/test/integration/task/s3.js +++ b/test/integration/task/s3.js @@ -11,7 +11,7 @@ const aBlobExistsWith = async (container, { status }) => { const blob = await Blob.fromBuffer(crypto.randomBytes(100)); await container.run(sql` INSERT INTO BLOBS (sha, md5, content, "contentType", s3_status) - VALUES (${blob.sha}, ${blob.md5}, ${sql.binary(blob.content)}, ${blob.contentType || null}, ${status}) + VALUES (${blob.sha}, ${blob.md5}, ${sql.binary(blob.content)}, ${blob.contentType || sql`DEFAULT`}, ${status}) `); };