From 10369bdf8a2f9e6df6fc3f63a19808079597ba3d Mon Sep 17 00:00:00 2001 From: He Zhenxing Date: Fri, 6 Mar 2026 20:53:26 +0800 Subject: [PATCH] fix: escape backslash in param encoding to fix newline in ViewId/state (#187) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Previously, sanitizeParamText and desanitizeParamText did not escape the backslash character itself. This caused a collision: JSON's escape sequence for a newline character (the two chars '\' and 'n', written as \n in JSON) was indistinguishable from the param encoding's escape for a real newline. When a ViewId contained a list of Text with newline characters (e.g. Message ["\n"]), the list was JSON-encoded to ["\n"], and then desanitizeParamText would convert \n back into a real newline inside the JSON string, producing invalid JSON. The subsequent parse failure meant no handler matched the viewId, giving the error: "No Handler for Event viewId when view id or state data has new line character" Fix: escape backslash first in sanitizeParamText (\ → \\), and unescape it last in desanitizeParamText via a single-pass char-by-char decoder. This ensures that: - A real newline in a plain Text param is still encoded as \n (wire) and decoded back to a real newline. - A backslash in any param value (e.g. from JSON encoding) is encoded as \\ on the wire and decoded back to a single backslash, preserving JSON escape sequences intact. Added regression tests in EncodedSpec and ViewIdSpec covering the exact scenario from the issue report. --- src/Web/Hyperbole/Data/Encoded.hs | 36 ++++++++++++++++++++++++++----- test/Test/EncodedSpec.hs | 11 ++++++++++ test/Test/ViewIdSpec.hs | 17 +++++++++++++++ 3 files changed, 59 insertions(+), 5 deletions(-) diff --git a/src/Web/Hyperbole/Data/Encoded.hs b/src/Web/Hyperbole/Data/Encoded.hs index b04f199d..d8c3d710 100644 --- a/src/Web/Hyperbole/Data/Encoded.hs +++ b/src/Web/Hyperbole/Data/Encoded.hs @@ -164,11 +164,33 @@ decodeParam = \case t -> ParamValue $ desanitizeParamText t --- replace all underscores that are NOT "\\_" with spaces +-- Param encoding scheme: +-- Wire format must not contain bare spaces (field separator) or real newlines. +-- We use backslash as the escape character: +-- '\' → "\\" (escape backslash itself, so it cannot be confused with an escape prefix) +-- '\n' → "\n" (literal backslash + 'n', for real newline characters) +-- '_' → "\_" (escape underscore, since bare underscore encodes space) +-- ' ' → "_" (encode space as underscore) +-- Decoding is the single-pass reverse of the above. +-- +-- The critical invariant: backslash is escaped FIRST on encode and unescaped +-- LAST on decode. This ensures that JSON escape sequences (e.g. the two chars +-- '\' 'n' inside "[\"\n\"]") are treated as an escaped backslash followed by +-- a plain 'n', not as the param newline escape. +-- See: https://github.com/seanhess/hyperbole/issues/187 desanitizeParamText :: Text -> Text -desanitizeParamText = - T.replace "\\ " "_" . T.replace "_" " " . T.replace "\\n" "\n" +desanitizeParamText = go + where + go t = case T.uncons t of + Nothing -> "" + Just ('\\', rest) -> case T.uncons rest of + Just ('\\', rest') -> T.cons '\\' (go rest') -- \\ → \ + Just ('n', rest') -> T.cons '\n' (go rest') -- \n → newline + Just ('_', rest') -> T.cons '_' (go rest') -- \_ → _ + _ -> T.cons '\\' (go rest) -- bare backslash (shouldn't occur) + Just ('_', rest) -> T.cons ' ' (go rest) -- _ → space + Just (c, rest) -> T.cons c (go rest) -- other chars verbatim -- | T.isSuffixOf "\\" seg = T.dropEnd 1 seg <> "_" <> txt @@ -190,10 +212,14 @@ encodeParam (ParamValue t) = "" -> "|" _ -> sanitizeParamText t where - -- Q: Should we also sanitize \r\n? + -- Encode a param value for the wire format. + -- Backslash MUST be escaped first (before newline), otherwise a literal + -- backslash followed by 'n' in the input (e.g. from JSON encoding) would + -- be indistinguishable from the newline escape on the wire. + -- See: https://github.com/seanhess/hyperbole/issues/187 sanitizeParamText :: Text -> Text sanitizeParamText = - T.replace " " "_" . T.replace "_" "\\_" . T.replace "\n" "\\n" + T.replace " " "_" . T.replace "_" "\\_" . T.replace "\n" "\\n" . T.replace "\\" "\\\\" -- decodeParamValue :: (FromParam a) => Text -> Either String a diff --git a/test/Test/EncodedSpec.hs b/test/Test/EncodedSpec.hs index ccec5ceb..ab102aae 100644 --- a/test/Test/EncodedSpec.hs +++ b/test/Test/EncodedSpec.hs @@ -232,6 +232,17 @@ spec = withMarkers ["encoded"] $ do print $ encode l decode @Sum (encode l) `shouldBe` Just l + -- Regression tests for https://github.com/seanhess/hyperbole/issues/187 + -- A ViewId (or state) containing a list with newline characters must + -- encode/decode correctly. Previously, desanitizeParamText blindly + -- replaced the JSON escape sequence "\\n" with a real newline, corrupting + -- the JSON and causing "No Handler for Event viewId". + it "list with newline character round-trips correctly (issue #187)" $ do + decode @Sum (encode (List ["\n"])) `shouldBe` Just (List ["\n"]) + + it "list with newline in multiple elements" $ do + decode @Sum (encode (List ["\n", "hello\nworld", "plain"])) `shouldBe` Just (List ["\n", "hello\nworld", "plain"]) + it "strings" $ do decode @Sum (encode (Str "")) `shouldBe` pure (Str "") decode @Sum (encode (Str " ")) `shouldBe` pure (Str " ") diff --git a/test/Test/ViewIdSpec.hs b/test/Test/ViewIdSpec.hs index e4650095..226c8a3d 100644 --- a/test/Test/ViewIdSpec.hs +++ b/test/Test/ViewIdSpec.hs @@ -34,6 +34,11 @@ data Product4 = Product4 Text Text Text Text deriving (Generic, Show, Eq, Read, ViewId) +-- Regression test for https://github.com/seanhess/hyperbole/issues/187 +data MessageView = MessageView [Text] + deriving (Generic, Show, Eq, ViewId) + + newtype Id a = Id {fromId :: Text} deriving newtype (Eq, ToJSON, FromJSON, Ord, Show, ToParam, FromParam) deriving (Generic) @@ -80,6 +85,18 @@ spec = withMarkers ["encoded"] $ do let vid = encodeViewId p decodeViewId vid `shouldBe` pure p + -- Regression tests for https://github.com/seanhess/hyperbole/issues/187 + -- When a ViewId contains a list of Text with newline characters, the + -- encoded/decoded form must round-trip correctly. + describe "list with newline (issue #187)" $ do + it "roundtrips MessageView with single newline" $ do + let v = MessageView ["\n"] + decodeViewId (encodeViewId v) `shouldBe` pure v + + it "roundtrips MessageView with newlines in multiple elements" $ do + let v = MessageView ["\n", "hello\nworld", "plain"] + decodeViewId (encodeViewId v) `shouldBe` pure v + -- describe "Param Attributes" $ do -- it "should serialize basic id" $ do