From 18c8bc0efd56c0f4ec20d954dc421ae7ca522972 Mon Sep 17 00:00:00 2001
From: Phillip Barta <barta.phillip@gmail.com>
Date: Mon, 15 Dec 2025 18:08:18 +0100
Subject: [PATCH] refactor: explicit charset validation functions for JSON and
 URL-encoded payloads

---
 HISTORY.md              |  1 +
 README.md               |  6 ++---
 lib/types/json.js       |  5 ++--
 lib/types/urlencoded.js |  5 ++--
 lib/utils.js            | 32 +++++++++++++++++++++-
 test/json.js            | 21 +++------------
 test/utils.js           | 59 +++++++++++++++++++++++++++++++++++++++--
 7 files changed, 100 insertions(+), 29 deletions(-)

diff --git a/HISTORY.md b/HISTORY.md
index 5bf9f59b..609caa3c 100644
--- a/HISTORY.md
+++ b/HISTORY.md
@@ -3,6 +3,7 @@ unreleased
 
 * fix: use static exports instead of lazy getters to improve ESM compatibility
 * feat: add subpath exports for individual parsers
+* feat: JSON parser now only accepts UTF-8 encoding per RFC 8259 section 8.1
 
 2.2.2 / 2026-01-07
 =========================
diff --git a/README.md b/README.md
index 579be488..2dd95eff 100644
--- a/README.md
+++ b/README.md
@@ -71,9 +71,9 @@ The various errors returned by this module are described in the
 ### bodyParser.json([options])
 
 Returns middleware that only parses `json` and only looks at requests where
-the `Content-Type` header matches the `type` option. This parser accepts any
-Unicode encoding of the body and supports automatic inflation of `gzip`,
-`br` (brotli) and `deflate` encodings.
+the `Content-Type` header matches the `type` option. This parser only accepts
+UTF-8 encoding of the body per [RFC 8259 section 8.1](https://datatracker.ietf.org/doc/html/rfc8259#section-8.1)
+and supports automatic inflation of `gzip`, `br` (brotli) and `deflate` encodings.
 
 A new `body` object containing the parsed data is populated on the `request`
 object after the middleware (i.e. `req.body`).
diff --git a/lib/types/json.js b/lib/types/json.js
index 23c73577..e73f3dc6 100644
--- a/lib/types/json.js
+++ b/lib/types/json.js
@@ -14,7 +14,7 @@
 
 var debug = require('debug')('body-parser:json')
 var read = require('../read')
-var { normalizeOptions } = require('../utils')
+var { normalizeOptions, isValidJsonCharset } = require('../utils')
 
 /**
  * Module exports.
@@ -80,8 +80,7 @@ function json (options) {
 
   const readOptions = {
     ...normalizedOptions,
-    // assert charset per RFC 7159 sec 8.1
-    isValidCharset: (charset) => charset.slice(0, 4) === 'utf-'
+    isValidCharset: isValidJsonCharset
   }
 
   return function jsonParser (req, res, next) {
diff --git a/lib/types/urlencoded.js b/lib/types/urlencoded.js
index 4b08631c..31fdce3e 100644
--- a/lib/types/urlencoded.js
+++ b/lib/types/urlencoded.js
@@ -16,7 +16,7 @@ var createError = require('http-errors')
 var debug = require('debug')('body-parser:urlencoded')
 var read = require('../read')
 var qs = require('qs')
-var { normalizeOptions } = require('../utils')
+var { normalizeOptions, isValidUrlencodedCharset } = require('../utils')
 
 /**
  * Module exports.
@@ -43,8 +43,7 @@ function urlencoded (options) {
 
   const readOptions = {
     ...normalizedOptions,
-    // assert charset
-    isValidCharset: (charset) => charset === 'utf-8' || charset === 'iso-8859-1'
+    isValidCharset: isValidUrlencodedCharset
   }
 
   return function urlencodedParser (req, res, next) {
diff --git a/lib/utils.js b/lib/utils.js
index e0bf9741..33e21d2c 100644
--- a/lib/utils.js
+++ b/lib/utils.js
@@ -14,7 +14,9 @@ var typeis = require('type-is')
 module.exports = {
   getCharset,
   normalizeOptions,
-  passthrough
+  passthrough,
+  isValidJsonCharset,
+  isValidUrlencodedCharset
 }
 
 /**
@@ -96,3 +98,31 @@ function normalizeOptions (options, defaultType) {
 function passthrough (value) {
   return value
 }
+
+/**
+ * Determines whether a charset is permitted for application/json payloads.
+ *
+ * Per RFC 8259 section 8.1, JSON text exchanged between systems that are not part of a closed
+ * ecosystem MUST be encoded using UTF-8 [RFC 3629].
+ * Src: https://datatracker.ietf.org/doc/html/rfc8259#section-8.1
+ *
+ * @param {string | null | undefined} charset lowercase charset label
+ * @returns {boolean}
+ * @private
+ */
+function isValidJsonCharset (charset) {
+  return charset === 'utf-8'
+}
+
+/**
+ * Determines whether a charset is permitted for application/x-www-form-urlencoded payloads.
+ *
+ * Per HTML Living Standard, the only supported encodings are UTF-8 and ISO-8859-1.
+ *
+ * @param {string | null | undefined} charset lowercase charset label
+ * @returns {boolean}
+ * @private
+ */
+function isValidUrlencodedCharset (charset) {
+  return charset === 'utf-8' || charset === 'iso-8859-1'
+}
diff --git a/test/json.js b/test/json.js
index 9582e7d9..f2319206 100644
--- a/test/json.js
+++ b/test/json.js
@@ -496,19 +496,6 @@ describe('bodyParser.json()', function () {
         .expect(200, '{"user":"tobi"}', done)
     })
 
-    it('should work with different charsets', function (done) {
-      var server = createServer({
-        verify: function (req, res, buf) {
-          if (buf[0] === 0x5b) throw new Error('no arrays')
-        }
-      })
-
-      var test = request(server).post('/')
-      test.set('Content-Type', 'application/json; charset=utf-16')
-      test.write(Buffer.from('feff007b0022006e0061006d00650022003a00228bba0022007d', 'hex'))
-      test.expect(200, '{"name":"论"}', done)
-    })
-
     it('should 415 on unknown charset prior to verify', function (done) {
       var server = createServer({
         verify: function (req, res, buf) {
@@ -621,18 +608,18 @@ describe('bodyParser.json()', function () {
       test.expect(200, '{"name":"论"}', done)
     })
 
-    it('should parse utf-16', function (done) {
+    it('should fail on utf-16', function (done) {
       var test = request(this.server).post('/')
       test.set('Content-Type', 'application/json; charset=utf-16')
       test.write(Buffer.from('feff007b0022006e0061006d00650022003a00228bba0022007d', 'hex'))
-      test.expect(200, '{"name":"论"}', done)
+      test.expect(415, '[charset.unsupported] unsupported charset "UTF-16"', done)
     })
 
-    it('should parse utf-32', function (done) {
+    it('should fail on utf-32', function (done) {
       var test = request(this.server).post('/')
       test.set('Content-Type', 'application/json; charset=utf-32')
       test.write(Buffer.from('fffe00007b000000220000006e000000610000006d00000065000000220000003a00000022000000ba8b0000220000007d000000', 'hex'))
-      test.expect(200, '{"name":"论"}', done)
+      test.expect(415, '[charset.unsupported] unsupported charset "UTF-32"', done)
     })
 
     it('should parse when content-length != char length', function (done) {
diff --git a/test/utils.js b/test/utils.js
index 364d3838..a6130cf4 100644
--- a/test/utils.js
+++ b/test/utils.js
@@ -1,7 +1,7 @@
 'use strict'
 
-const assert = require('node:assert')
-const { normalizeOptions } = require('../lib/utils.js')
+const assert = require('node:assert/strict')
+const { normalizeOptions, isValidJsonCharset, isValidUrlencodedCharset } = require('../lib/utils.js')
 
 describe('normalizeOptions(options, defaultType)', () => {
   it('should return default options when no options are provided', () => {
@@ -161,3 +161,58 @@ describe('normalizeOptions(options, defaultType)', () => {
     })
   })
 })
+
+describe('isValidJsonCharset(charset)', () => {
+  it('should return false for missing/empty values', () => {
+    assert.equal(isValidJsonCharset(), false)
+    assert.equal(isValidJsonCharset(undefined), false)
+    assert.equal(isValidJsonCharset(null), false)
+    assert.equal(isValidJsonCharset(''), false)
+  })
+
+  it('should return true for "utf-8"', () => {
+    assert.equal(isValidJsonCharset('utf-8'), true)
+  })
+
+  it('should return false for other utf-* labels (not allowed by RFC 8259)', () => {
+    assert.equal(isValidJsonCharset('utf-7'), false)
+    assert.equal(isValidJsonCharset('utf-16'), false)
+    assert.equal(isValidJsonCharset('utf-32'), false)
+    assert.equal(isValidJsonCharset('utf-16le'), false)
+    assert.equal(isValidJsonCharset('utf-16be'), false)
+    assert.equal(isValidJsonCharset('utf-32le'), false)
+    assert.equal(isValidJsonCharset('utf-32be'), false)
+    assert.equal(isValidJsonCharset('utf-1'), false)
+  })
+
+  it('should return false for non-JSON charsets', () => {
+    assert.equal(isValidJsonCharset('us-ascii'), false)
+    assert.equal(isValidJsonCharset('iso-8859-1'), false)
+    assert.equal(isValidJsonCharset('windows-1252'), false)
+  })
+})
+
+describe('isValidUrlencodedCharset(charset)', () => {
+  it('should return false for missing/empty values', () => {
+    assert.equal(isValidUrlencodedCharset(), false)
+    assert.equal(isValidUrlencodedCharset(undefined), false)
+    assert.equal(isValidUrlencodedCharset(null), false)
+    assert.equal(isValidUrlencodedCharset(''), false)
+  })
+
+  it('should return true for "utf-8" and "iso-8859-1"', () => {
+    assert.equal(isValidUrlencodedCharset('utf-8'), true)
+    assert.equal(isValidUrlencodedCharset('iso-8859-1'), true)
+  })
+
+  it('should return false for other UTF encodings', () => {
+    assert.equal(isValidUrlencodedCharset('utf-16'), false)
+    assert.equal(isValidUrlencodedCharset('utf-32'), false)
+  })
+
+  it('should return false for non-form encodings', () => {
+    assert.equal(isValidUrlencodedCharset('us-ascii'), false)
+    assert.equal(isValidUrlencodedCharset('windows-1252'), false)
+    assert.equal(isValidUrlencodedCharset('shift_jis'), false)
+  })
+})