From 37d13202e8f8df31e289d43b28eed728c40508eb Mon Sep 17 00:00:00 2001 From: alxndrsn Date: Mon, 13 Jan 2025 05:58:51 +0000 Subject: [PATCH 1/3] blobs: derive missing contentType from file extension Ref #1351 --- lib/model/query/submission-attachments.js | 6 +- lib/resources/forms.js | 4 +- lib/resources/submissions.js | 4 +- lib/util/blob.js | 12 +++- test/data/xml.js | 1 + test/integration/api/submissions.js | 72 +++++++++++++++++++++++ 6 files changed, 91 insertions(+), 8 deletions(-) diff --git a/lib/model/query/submission-attachments.js b/lib/model/query/submission-attachments.js index 8a2901b8f..359ef7e02 100644 --- a/lib/model/query/submission-attachments.js +++ b/lib/model/query/submission-attachments.js @@ -20,7 +20,7 @@ const { insertMany, QueryOptions } = require('../../util/db'); const { resolve } = require('../../util/promise'); const { isBlank, construct } = require('../../util/util'); const { traverseXml, findAll, root, node, text } = require('../../util/xml'); -const { streamBlobs } = require('../../util/blob'); +const { defaultMimetypeFor, streamBlobs } = require('../../util/blob'); //////////////////////////////////////////////////////////////////////////////// @@ -29,7 +29,7 @@ const { streamBlobs } = require('../../util/blob'); const _makeAttachment = (ensure, submissionDefId, name, file = null, deprecated = null, index = null, isClientAudit = null) => { const data = { submissionDefId, name, index, isClientAudit, blobId: deprecated }; return (file == null) ? resolve(new Submission.Attachment(data)) - : ensure(Blob.fromBuffer(file.buffer, file.mimetype)).then((blobId) => { + : ensure(Blob.fromBuffer(file.buffer, file.mimetype || defaultMimetypeFor(name))).then((blobId) => { data.blobId = blobId; return new Submission.Attachment(data); }); @@ -106,7 +106,7 @@ const upsert = (def, files) => ({ Blobs, SubmissionAttachments }) => const lookup = new Set(expecteds.map((att) => att.name)); const present = files.filter((file) => lookup.has(file.fieldname)); return Promise.all(present - .map((file) => Blobs.ensure(Blob.fromBuffer(file.buffer, file.mimetype)) + .map((file) => Blobs.ensure(Blob.fromBuffer(file.buffer, file.mimetype || defaultMimetypeFor(file.fieldname))) .then((blobId) => SubmissionAttachments.attach(def, file.fieldname, blobId)))); }); diff --git a/lib/resources/forms.js b/lib/resources/forms.js index 15cf8f585..a37b9d16f 100644 --- a/lib/resources/forms.js +++ b/lib/resources/forms.js @@ -13,7 +13,7 @@ const { Blob, Form } = require('../model/frames'); const { ensureDef } = require('../model/frame'); const { QueryOptions } = require('../util/db'); const { isTrue, xml, contentDisposition, withEtag } = require('../util/http'); -const { blobResponse } = require('../util/blob'); +const { blobResponse, defaultMimetypeFor } = require('../util/blob'); const Problem = require('../util/problem'); const { sanitizeFieldsForOdata, setVersion } = require('../data/schema'); const { getOrNotFound, reject, resolve, rejectIf } = require('../util/promise'); @@ -380,7 +380,7 @@ module.exports = (service, endpoint) => { .then(getOrNotFound) .then((form) => auth.canOrReject('form.update', form)) .then((form) => Promise.all([ - Blob.fromStream(request, headers['content-type']).then((blob) => Blobs.ensure(blob)), + Blob.fromStream(request, headers['content-type'] || defaultMimetypeFor(params.name)).then((blob) => Blobs.ensure(blob)), FormAttachments.getByFormDefIdAndName(form.draftDefId, params.name).then(getOrNotFound) ]) .then(([ blobId, attachment ]) => FormAttachments.update(form, attachment, blobId, null)) diff --git a/lib/resources/submissions.js b/lib/resources/submissions.js index 987a0c2c9..518a4e984 100644 --- a/lib/resources/submissions.js +++ b/lib/resources/submissions.js @@ -16,7 +16,7 @@ const { createdMessage } = require('../formats/openrosa'); const { getOrNotFound, getOrReject, rejectIf, reject } = require('../util/promise'); const { QueryOptions } = require('../util/db'); const { success, xml, isFalse, contentDisposition, redirect, url } = require('../util/http'); -const { blobResponse } = require('../util/blob'); +const { blobResponse, defaultMimetypeFor } = require('../util/blob'); const Problem = require('../util/problem'); const { streamBriefcaseCsvs } = require('../data/briefcase'); const { streamAttachments } = require('../data/attachments'); @@ -446,7 +446,7 @@ module.exports = (service, endpoint) => { .then((def) => SubmissionAttachments.getBySubmissionDefIdAndName(def.id, params.name) // just for audit logging .then(getOrNotFound) .then((oldAttachment) => [ form, def, oldAttachment ]))), - Blob.fromStream(request, headers['content-type']).then(Blobs.ensure) + Blob.fromStream(request, headers['content-type'] || defaultMimetypeFor(params.name)).then(Blobs.ensure) ]) .then(([ [ form, def, oldAttachment ], blobId ]) => Promise.all([ SubmissionAttachments.attach(def, params.name, blobId), diff --git a/lib/util/blob.js b/lib/util/blob.js index 099f5aa14..6dd38bfdc 100644 --- a/lib/util/blob.js +++ b/lib/util/blob.js @@ -7,6 +7,7 @@ // including this file, may be copied, modified, propagated, or distributed // except according to the terms contained in the LICENSE file. +const { extname } = require('node:path'); const { Transform } = require('stream'); const { PartialPipe } = require('./stream'); const { contentDisposition, redirect, withEtag } = require('./http'); @@ -67,4 +68,13 @@ async function blobResponse(s3, filename, blob) { } } -module.exports = { blobContent, blobResponse, streamBlobs, streamEncBlobs }; +function defaultMimetypeFor(filename) { + if (!filename) return null; + switch (extname(filename)) { + case '.csv': return 'text/csv'; // eslint-disable-line no-multi-spaces + case '.geojson': return 'application/geo+json'; + default: return null; // eslint-disable-line no-multi-spaces + } +} + +module.exports = { blobContent, blobResponse, defaultMimetypeFor, streamBlobs, streamEncBlobs }; diff --git a/test/data/xml.js b/test/data/xml.js index ba2c2b082..8328edce3 100644 --- a/test/data/xml.js +++ b/test/data/xml.js @@ -637,6 +637,7 @@ module.exports = { two: instance('binaryType', 'btwo', 'here_is_file2.jpg'), both: instance('binaryType', 'both', 'my_file1.mp4here_is_file2.jpg'), unicode: instance('binaryType', 'both', 'fîlé2f😂le3صادق'), + withFile: (filename) => instance('binaryType', 'with-file', `${filename}`), }, encrypted: { // TODO: the jpg binary associated with this sample blob is >3MB. will replace diff --git a/test/integration/api/submissions.js b/test/integration/api/submissions.js index 6a9580c0b..a77512cc1 100644 --- a/test/integration/api/submissions.js +++ b/test/integration/api/submissions.js @@ -4421,6 +4421,78 @@ one,h,/data/h,2000-01-01T00:06,2000-01-01T00:07,-5,-6,,ee,ff text.toString().should.equal('testvideo'); // use 'text' instead of 'body' to avoid supertest response parsing }))))))); + [ + // express ALWAYS adds "charset=..." suffix to text-based Content-Type response headers + // See: https://github.com/expressjs/express/issues/2654 + [ 'CSV', 'myfile.csv', 'text/csv; charset=utf-8', 'a,b,c' ], // eslint-disable-line no-multi-spaces + [ 'GeoJSON', 'myfile.geojson', 'application/geo+json', '{}' ], // eslint-disable-line no-multi-spaces + ].forEach(([ humanType, filename, officialContentType, fileContents ]) => { + describe(`special handling for ${humanType}`, () => { + it('should attach the given file and serve it with supplied mime type', testService((service) => + service.login('alice', (asAlice) => + asAlice.post('/v1/projects/1/forms?publish=true') + .set('Content-Type', 'application/xml') + .send(testData.forms.binaryType) + .expect(200) + .then(() => asAlice.post('/v1/projects/1/forms/binaryType/submissions') + .send(testData.instances.binaryType.withFile(filename)) + .set('Content-Type', 'text/xml') + .expect(200) + .then(() => asAlice.post(`/v1/projects/1/forms/binaryType/submissions/with-file/attachments/${filename}`) + .set('Content-Type', 'application/x-abiword') + .send(fileContents) + .expect(200) + .then(() => asAlice.get(`/v1/projects/1/forms/binaryType/submissions/with-file/attachments/${filename}`) + .expect(200) + .then(({ headers, text }) => { + headers['content-type'].should.equal('application/x-abiword'); + text.toString().should.equal(fileContents); // use 'text' instead of 'body' to avoid supertest response parsing + }))))))); + + it(`should attach a given ${humanType} file with empty Content-Type and serve it with correct mime type`, testService((service) => + service.login('alice', (asAlice) => + asAlice.post('/v1/projects/1/forms?publish=true') + .set('Content-Type', 'application/xml') + .send(testData.forms.binaryType) + .expect(200) + .then(() => asAlice.post('/v1/projects/1/forms/binaryType/submissions') + .send(testData.instances.binaryType.withFile(filename)) + .set('Content-Type', 'text/xml') + .expect(200) + .then(() => asAlice.post(`/v1/projects/1/forms/binaryType/submissions/with-file/attachments/${filename}`) + .send(fileContents) + .set('Content-Type', '') // N.B. must be called _after_ send() + .expect(200) + .then(() => asAlice.get(`/v1/projects/1/forms/binaryType/submissions/with-file/attachments/${filename}`) + .expect(200) + .then(({ headers, text }) => { + headers['content-type'].should.equal(officialContentType); + text.toString().should.equal(fileContents); // use 'text' instead of 'body' to avoid supertest response parsing + }))))))); + + it(`should attach a given ${humanType} file with missing Content-Type and serve it with correct mime type`, testService((service) => + service.login('alice', (asAlice) => + asAlice.post('/v1/projects/1/forms?publish=true') + .set('Content-Type', 'application/xml') + .send(testData.forms.binaryType) + .expect(200) + .then(() => asAlice.post('/v1/projects/1/forms/binaryType/submissions') + .send(testData.instances.binaryType.withFile(filename)) + .set('Content-Type', 'text/xml') + .expect(200) + .then(() => asAlice.post(`/v1/projects/1/forms/binaryType/submissions/with-file/attachments/${filename}`) + .send(fileContents) + .unset('Content-Type') // N.B. must be called _after_ send() + .expect(200) + .then(() => asAlice.get(`/v1/projects/1/forms/binaryType/submissions/with-file/attachments/${filename}`) + .expect(200) + .then(({ headers, text }) => { + headers['content-type'].should.equal(officialContentType); + text.toString().should.equal(fileContents); // use 'text' instead of 'body' to avoid supertest response parsing + }))))))); + }); + }); + it('should log an audit entry about initial attachment', testService((service, { Audits, Forms, Submissions, SubmissionAttachments }) => service.login('alice', (asAlice) => asAlice.post('/v1/projects/1/forms?publish=true') From a18655c7e77a3367755e52b8fac72dc4171097c7 Mon Sep 17 00:00:00 2001 From: alxndrsn Date: Mon, 13 Jan 2025 07:47:51 +0000 Subject: [PATCH 2/3] docs --- docs/api.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/api.yaml b/docs/api.yaml index c25a55925..7ecc6cbf1 100644 --- a/docs/api.yaml +++ b/docs/api.yaml @@ -3885,7 +3885,7 @@ paths: - Individual Form summary: Downloading a Form Attachment description: |- - To download a single file, use this endpoint. The appropriate `Content-Disposition` (attachment with a filename) and `Content-Type` (based on the type supplied at upload time) will be given. + To download a single file, use this endpoint. The appropriate `Content-Disposition` (attachment with a filename) and `Content-Type` (based on the type supplied at upload time, or a type derived from the file extension if no `Content-Type` was supplied at upload time) will be given. This endpoint supports `ETag`, which can be used to avoid downloading the same content more than once. When an API consumer calls this endpoint, it returns a value in `ETag` header, you can pass this value in the header `If-None-Match` of subsequent requests. If the file has not been changed since the previous request, you will receive `304 Not Modified` response otherwise you'll get the latest file. operationId: Downloading a Form Attachment @@ -5116,7 +5116,7 @@ paths: - Published Form Versions summary: Downloading a Form Version Attachment description: |- - To download a single file, use this endpoint. The appropriate `Content-Disposition` (attachment with a filename) and `Content-Type` (based on the type supplied at upload time) will be given. + To download a single file, use this endpoint. The appropriate `Content-Disposition` (attachment with a filename) and `Content-Type` (based on the type supplied at upload time, or a type derived from the file extension if no `Content-Type` was supplied at upload time) will be given. This endpoint supports `ETag` header, which can be used to avoid downloading the same content more than once. When an API consumer calls this endpoint, the endpoint returns a value in `ETag` header. If you pass that value in the `If-None-Match` header of a subsequent request, then if the file has not been changed since the previous request, you will receive `304 Not Modified` response; otherwise you'll get the latest file. operationId: Downloading a Form Version Attachment From 31c21f9f523a9154869253640186174b8717a524 Mon Sep 17 00:00:00 2001 From: alxndrsn Date: Mon, 13 Jan 2025 08:05:03 +0000 Subject: [PATCH 3/3] lowercase ext --- lib/util/blob.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/util/blob.js b/lib/util/blob.js index 6dd38bfdc..5362dc331 100644 --- a/lib/util/blob.js +++ b/lib/util/blob.js @@ -70,7 +70,7 @@ async function blobResponse(s3, filename, blob) { function defaultMimetypeFor(filename) { if (!filename) return null; - switch (extname(filename)) { + switch (extname(filename).toLowerCase()) { case '.csv': return 'text/csv'; // eslint-disable-line no-multi-spaces case '.geojson': return 'application/geo+json'; default: return null; // eslint-disable-line no-multi-spaces