From 18c8bc0efd56c0f4ec20d954dc421ae7ca522972 Mon Sep 17 00:00:00 2001 From: Phillip Barta Date: Mon, 15 Dec 2025 18:08:18 +0100 Subject: [PATCH] refactor: explicit charset validation functions for JSON and URL-encoded payloads --- HISTORY.md | 1 + README.md | 6 ++--- lib/types/json.js | 5 ++-- lib/types/urlencoded.js | 5 ++-- lib/utils.js | 32 +++++++++++++++++++++- test/json.js | 21 +++------------ test/utils.js | 59 +++++++++++++++++++++++++++++++++++++++-- 7 files changed, 100 insertions(+), 29 deletions(-) diff --git a/HISTORY.md b/HISTORY.md index 5bf9f59b..609caa3c 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -3,6 +3,7 @@ unreleased * fix: use static exports instead of lazy getters to improve ESM compatibility * feat: add subpath exports for individual parsers +* feat: JSON parser now only accepts UTF-8 encoding per RFC 8259 section 8.1 2.2.2 / 2026-01-07 ========================= diff --git a/README.md b/README.md index 579be488..2dd95eff 100644 --- a/README.md +++ b/README.md @@ -71,9 +71,9 @@ The various errors returned by this module are described in the ### bodyParser.json([options]) Returns middleware that only parses `json` and only looks at requests where -the `Content-Type` header matches the `type` option. This parser accepts any -Unicode encoding of the body and supports automatic inflation of `gzip`, -`br` (brotli) and `deflate` encodings. +the `Content-Type` header matches the `type` option. This parser only accepts +UTF-8 encoding of the body per [RFC 8259 section 8.1](https://datatracker.ietf.org/doc/html/rfc8259#section-8.1) +and supports automatic inflation of `gzip`, `br` (brotli) and `deflate` encodings. A new `body` object containing the parsed data is populated on the `request` object after the middleware (i.e. `req.body`). diff --git a/lib/types/json.js b/lib/types/json.js index 23c73577..e73f3dc6 100644 --- a/lib/types/json.js +++ b/lib/types/json.js @@ -14,7 +14,7 @@ var debug = require('debug')('body-parser:json') var read = require('../read') -var { normalizeOptions } = require('../utils') +var { normalizeOptions, isValidJsonCharset } = require('../utils') /** * Module exports. @@ -80,8 +80,7 @@ function json (options) { const readOptions = { ...normalizedOptions, - // assert charset per RFC 7159 sec 8.1 - isValidCharset: (charset) => charset.slice(0, 4) === 'utf-' + isValidCharset: isValidJsonCharset } return function jsonParser (req, res, next) { diff --git a/lib/types/urlencoded.js b/lib/types/urlencoded.js index 4b08631c..31fdce3e 100644 --- a/lib/types/urlencoded.js +++ b/lib/types/urlencoded.js @@ -16,7 +16,7 @@ var createError = require('http-errors') var debug = require('debug')('body-parser:urlencoded') var read = require('../read') var qs = require('qs') -var { normalizeOptions } = require('../utils') +var { normalizeOptions, isValidUrlencodedCharset } = require('../utils') /** * Module exports. @@ -43,8 +43,7 @@ function urlencoded (options) { const readOptions = { ...normalizedOptions, - // assert charset - isValidCharset: (charset) => charset === 'utf-8' || charset === 'iso-8859-1' + isValidCharset: isValidUrlencodedCharset } return function urlencodedParser (req, res, next) { diff --git a/lib/utils.js b/lib/utils.js index e0bf9741..33e21d2c 100644 --- a/lib/utils.js +++ b/lib/utils.js @@ -14,7 +14,9 @@ var typeis = require('type-is') module.exports = { getCharset, normalizeOptions, - passthrough + passthrough, + isValidJsonCharset, + isValidUrlencodedCharset } /** @@ -96,3 +98,31 @@ function normalizeOptions (options, defaultType) { function passthrough (value) { return value } + +/** + * Determines whether a charset is permitted for application/json payloads. + * + * Per RFC 8259 section 8.1, JSON text exchanged between systems that are not part of a closed + * ecosystem MUST be encoded using UTF-8 [RFC 3629]. + * Src: https://datatracker.ietf.org/doc/html/rfc8259#section-8.1 + * + * @param {string | null | undefined} charset lowercase charset label + * @returns {boolean} + * @private + */ +function isValidJsonCharset (charset) { + return charset === 'utf-8' +} + +/** + * Determines whether a charset is permitted for application/x-www-form-urlencoded payloads. + * + * Per HTML Living Standard, the only supported encodings are UTF-8 and ISO-8859-1. + * + * @param {string | null | undefined} charset lowercase charset label + * @returns {boolean} + * @private + */ +function isValidUrlencodedCharset (charset) { + return charset === 'utf-8' || charset === 'iso-8859-1' +} diff --git a/test/json.js b/test/json.js index 9582e7d9..f2319206 100644 --- a/test/json.js +++ b/test/json.js @@ -496,19 +496,6 @@ describe('bodyParser.json()', function () { .expect(200, '{"user":"tobi"}', done) }) - it('should work with different charsets', function (done) { - var server = createServer({ - verify: function (req, res, buf) { - if (buf[0] === 0x5b) throw new Error('no arrays') - } - }) - - var test = request(server).post('/') - test.set('Content-Type', 'application/json; charset=utf-16') - test.write(Buffer.from('feff007b0022006e0061006d00650022003a00228bba0022007d', 'hex')) - test.expect(200, '{"name":"论"}', done) - }) - it('should 415 on unknown charset prior to verify', function (done) { var server = createServer({ verify: function (req, res, buf) { @@ -621,18 +608,18 @@ describe('bodyParser.json()', function () { test.expect(200, '{"name":"论"}', done) }) - it('should parse utf-16', function (done) { + it('should fail on utf-16', function (done) { var test = request(this.server).post('/') test.set('Content-Type', 'application/json; charset=utf-16') test.write(Buffer.from('feff007b0022006e0061006d00650022003a00228bba0022007d', 'hex')) - test.expect(200, '{"name":"论"}', done) + test.expect(415, '[charset.unsupported] unsupported charset "UTF-16"', done) }) - it('should parse utf-32', function (done) { + it('should fail on utf-32', function (done) { var test = request(this.server).post('/') test.set('Content-Type', 'application/json; charset=utf-32') test.write(Buffer.from('fffe00007b000000220000006e000000610000006d00000065000000220000003a00000022000000ba8b0000220000007d000000', 'hex')) - test.expect(200, '{"name":"论"}', done) + test.expect(415, '[charset.unsupported] unsupported charset "UTF-32"', done) }) it('should parse when content-length != char length', function (done) { diff --git a/test/utils.js b/test/utils.js index 364d3838..a6130cf4 100644 --- a/test/utils.js +++ b/test/utils.js @@ -1,7 +1,7 @@ 'use strict' -const assert = require('node:assert') -const { normalizeOptions } = require('../lib/utils.js') +const assert = require('node:assert/strict') +const { normalizeOptions, isValidJsonCharset, isValidUrlencodedCharset } = require('../lib/utils.js') describe('normalizeOptions(options, defaultType)', () => { it('should return default options when no options are provided', () => { @@ -161,3 +161,58 @@ describe('normalizeOptions(options, defaultType)', () => { }) }) }) + +describe('isValidJsonCharset(charset)', () => { + it('should return false for missing/empty values', () => { + assert.equal(isValidJsonCharset(), false) + assert.equal(isValidJsonCharset(undefined), false) + assert.equal(isValidJsonCharset(null), false) + assert.equal(isValidJsonCharset(''), false) + }) + + it('should return true for "utf-8"', () => { + assert.equal(isValidJsonCharset('utf-8'), true) + }) + + it('should return false for other utf-* labels (not allowed by RFC 8259)', () => { + assert.equal(isValidJsonCharset('utf-7'), false) + assert.equal(isValidJsonCharset('utf-16'), false) + assert.equal(isValidJsonCharset('utf-32'), false) + assert.equal(isValidJsonCharset('utf-16le'), false) + assert.equal(isValidJsonCharset('utf-16be'), false) + assert.equal(isValidJsonCharset('utf-32le'), false) + assert.equal(isValidJsonCharset('utf-32be'), false) + assert.equal(isValidJsonCharset('utf-1'), false) + }) + + it('should return false for non-JSON charsets', () => { + assert.equal(isValidJsonCharset('us-ascii'), false) + assert.equal(isValidJsonCharset('iso-8859-1'), false) + assert.equal(isValidJsonCharset('windows-1252'), false) + }) +}) + +describe('isValidUrlencodedCharset(charset)', () => { + it('should return false for missing/empty values', () => { + assert.equal(isValidUrlencodedCharset(), false) + assert.equal(isValidUrlencodedCharset(undefined), false) + assert.equal(isValidUrlencodedCharset(null), false) + assert.equal(isValidUrlencodedCharset(''), false) + }) + + it('should return true for "utf-8" and "iso-8859-1"', () => { + assert.equal(isValidUrlencodedCharset('utf-8'), true) + assert.equal(isValidUrlencodedCharset('iso-8859-1'), true) + }) + + it('should return false for other UTF encodings', () => { + assert.equal(isValidUrlencodedCharset('utf-16'), false) + assert.equal(isValidUrlencodedCharset('utf-32'), false) + }) + + it('should return false for non-form encodings', () => { + assert.equal(isValidUrlencodedCharset('us-ascii'), false) + assert.equal(isValidUrlencodedCharset('windows-1252'), false) + assert.equal(isValidUrlencodedCharset('shift_jis'), false) + }) +})