Skip to content

Commit 2ccdec0

Browse files
Merge pull request #358 from eccenca/feature/provideHtmlEntitiesDecoding-CMEM-7032
Provide functionality to decode HTML entities (CMEM-7032)
2 parents 4125ac2 + be337c3 commit 2ccdec0

File tree

8 files changed

+104
-9
lines changed

8 files changed

+104
-9
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@ This is a major release, and it might be not compatible with your current usage
5454
- `colorCalculateDistance()`: calculates the difference between 2 colors using the simple CIE76 formula
5555
- `textToColorHash()`: calculates a color from a text string
5656
- `reduceToText`: shrinks HTML content and React elements to plain text, used for `<TextReducer />`
57+
- `decodeHtmlEntities`: decode a string of HTML text, map HTML entities back to UTF-8 chars
5758
- SCSS color functions
5859
- `eccgui-color-var`: returns a var of a custom property used for palette color
5960
- `eccgui-color-mix`: mix 2 colors in `srgb`, works with all types of color values and CSS custom properties

package.json

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,7 @@
8787
"codemirror": "^6.0.1",
8888
"color": "^4.2.3",
8989
"compute-scroll-into-view": "^3.1.1",
90+
"he": "^1.2.0",
9091
"jshint": "^2.13.6",
9192
"lodash": "^4.17.21",
9293
"n3": "^1.25.1",
@@ -134,6 +135,7 @@
134135
"@testing-library/react": "^12.1.5",
135136
"@types/codemirror": "^5.60.15",
136137
"@types/color": "^3.0.6",
138+
"@types/he": "^1.2.3",
137139
"@types/jest": "^29.5.14",
138140
"@types/jshint": "^2.12.4",
139141
"@types/lodash": "^4.17.16",

src/common/index.ts

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
import { decode } from "he";
2+
13
import { invisibleZeroWidthCharacters } from "./utils/characters";
24
import { colorCalculateDistance } from "./utils/colorCalculateDistance";
35
import decideContrastColorValue from "./utils/colorDecideContrastvalue";
@@ -6,7 +8,8 @@ import getColorConfiguration from "./utils/getColorConfiguration";
68
import { getScrollParent } from "./utils/getScrollParent";
79
import { getGlobalVar, setGlobalVar } from "./utils/globalVars";
810
import { openInNewTab } from "./utils/openInNewTab";
9-
import { reduceToText } from "./utils/reduceToText"
11+
import { reduceToText } from "./utils/reduceToText";
12+
export type { DecodeOptions as DecodeHtmlEntitiesOptions } from "he";
1013
export type { IntentTypes as IntentBaseTypes } from "./Intent";
1114

1215
export const utils = {
@@ -20,5 +23,6 @@ export const utils = {
2023
getScrollParent,
2124
getEnabledColorsFromPalette,
2225
textToColorHash,
23-
reduceToText
26+
reduceToText,
27+
decodeHtmlEntities: decode,
2428
};

src/common/utils/reduceToText.tsx

Lines changed: 30 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,19 +3,20 @@ import { renderToString } from "react-dom/server";
33
import * as ReactIs from "react-is";
44

55
import { TextReducerProps } from "./../../components/TextReducer/TextReducer";
6+
import { DecodeHtmlEntitiesOptions, utils } from "./../";
67

78
export interface ReduceToTextFuncType {
89
(
910
/**
1011
* Component or text to reduce HTML markup content to plain text.
1112
*/
1213
input: React.ReactNode | React.ReactNode[] | string,
13-
options?: Pick<TextReducerProps, "maxNodes" | "maxLength">
14+
options?: Pick<TextReducerProps, "maxNodes" | "maxLength" | "decodeHtmlEntities" | "decodeHtmlEntitiesOptions">
1415
): string;
1516
}
1617

1718
export const reduceToText: ReduceToTextFuncType = (input, options) => {
18-
const { maxNodes, maxLength } = options || {};
19+
const { maxNodes, maxLength, decodeHtmlEntities } = options || {};
1920
const content: React.ReactNode | React.ReactNode[] = input;
2021
let nodeCount = 0;
2122

@@ -46,6 +47,33 @@ export const reduceToText: ReduceToTextFuncType = (input, options) => {
4647
// Basic HTML cleanup
4748
text = text.replace(/<[^\s][^>]*>/g, "").replace(/\n/g, " ");
4849

50+
if (decodeHtmlEntities) {
51+
const decodeDefaultOptions = {
52+
isAttributeValue: true,
53+
strict: true,
54+
} as DecodeHtmlEntitiesOptions;
55+
let decodeErrors = 0;
56+
// we decode in pieces to apply some error tolerance even in strict mode
57+
text = text
58+
.split(" ")
59+
.map((value) => {
60+
try {
61+
return utils.decodeHtmlEntities(value, {
62+
...decodeDefaultOptions,
63+
...options?.decodeHtmlEntitiesOptions,
64+
});
65+
} catch {
66+
decodeErrors++;
67+
return value;
68+
}
69+
})
70+
.join(" ");
71+
if (decodeErrors > 0) {
72+
// eslint-disable-next-line no-console
73+
console.warn(`${decodeErrors} parse error(s) for decodeHtmlEntities, return un-decoded text`, text);
74+
}
75+
}
76+
4977
if (typeof maxLength === "number") {
5078
text = text.slice(0, maxLength);
5179
}

src/components/TextReducer/TextReducer.stories.tsx

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,9 @@ Default.args = {
1818
<LoremIpsum p={1} avgSentencesPerParagraph={1} random={false} />,
1919
"Simple text with URL http://example.com/ that should not get parsed.",
2020
"a < b to test equations in text like b > a.",
21+
`Something with a "quote" in it.`,
2122
<>
22-
<Markdown>{`* This\n* is\n* a\n* list\n\nwritten in Markdown.`}</Markdown>
23+
<Markdown>{`* This\n* is\n* a\n* list\n\nwritten in Markdown\n* containing a few HTML 'entities' & "quotes".`}</Markdown>
2324
<HtmlContentBlock>
2425
<h1>Block with sub elements</h1>
2526
<LoremIpsum p={3} avgSentencesPerParagraph={3} random={false} />
Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
import React from "react";
2+
import {render, RenderResult} from "@testing-library/react";
3+
4+
import "@testing-library/jest-dom";
5+
6+
import { Markdown, TextReducer } from "./../../";
7+
import { Default as TextReducerStory } from "./TextReducer.stories";
8+
9+
describe("TextReducer", () => {
10+
const textMustExist = (queryByText: RenderResult["queryByText"], text: string) => {
11+
expect(queryByText(text, { exact: false })).not.toBeNull();
12+
}
13+
const textMustNotExist = (queryByText: RenderResult["queryByText"], text: string) => {
14+
expect(queryByText(text, { exact: false })).toBeNull();
15+
}
16+
it("should display encoded HTML entities by default if they are used in the transformed markup", () => {
17+
const { queryByText } = render(<TextReducer {...TextReducerStory.args} />);
18+
textMustExist(queryByText, "&#x27;entities&#x27; &amp; &quot;quotes&quot;");
19+
textMustNotExist(queryByText, `'entities' & "quotes"`);
20+
});
21+
it("should not display encoded HTML entities if `decodeHtmlEntities` is enabled", () => {
22+
const { queryByText } = render(<TextReducer {...TextReducerStory.args} decodeHtmlEntities />);
23+
textMustNotExist(queryByText, "&#x27;entities&#x27; &amp; &quot;quotes&quot;");
24+
textMustExist(queryByText, `'entities' & "quotes"`);
25+
});
26+
it("should only decode if correct encoded HTML entities are found (strict mode)", () => {
27+
const { queryByText } = render(
28+
<TextReducer decodeHtmlEntities>
29+
<Markdown>&</Markdown>&amp foo&ampbar
30+
</TextReducer>
31+
);
32+
textMustExist(queryByText, "& &amp foo&ampbar");
33+
textMustNotExist(queryByText, "& & foo&ampbar");
34+
});
35+
it("should allow decoding non-strict encoded HTML entities", () => {
36+
const { queryByText } = render(
37+
<TextReducer decodeHtmlEntities decodeHtmlEntitiesOptions={{ strict: false }}>
38+
<Markdown>&</Markdown>&amp foo&ampbar
39+
</TextReducer>
40+
);
41+
textMustNotExist(queryByText, "& &amp foo&ampbar");
42+
textMustExist(queryByText, "& & foo&ampbar");
43+
});
44+
});

src/components/TextReducer/TextReducer.tsx

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
import React from "react";
22

3-
import { reduceToText } from "../../common/utils/reduceToText";
3+
import { DecodeHtmlEntitiesOptions, utils } from "../../common";
44
import { CLASSPREFIX as eccgui } from "../../configuration/constants";
55

66
import { OverflowText, OverflowTextProps } from "./../Typography";
@@ -24,6 +24,17 @@ export interface TextReducerProps extends Pick<React.HTMLAttributes<HTMLElement>
2424
* Specify more `OverflowText` properties used when `useOverflowTextWrapper` is set to `true`.
2525
*/
2626
overflowTextProps?: Omit<OverflowTextProps, "passDown">;
27+
/**
28+
* If you transform HTML markup to text then the result could contain HTML entity encoded strings.
29+
* By enabling this option they are decoded back to it's original char.
30+
*/
31+
decodeHtmlEntities?: boolean;
32+
/**
33+
* Set the options used to decode the HTML entities, if `decodeHtmlEntities` is enabled.
34+
* Internally we use `he` library, see their [documentation on decode options](https://www.npmjs.com/package/he#hedecodehtml-options).
35+
* If not set we use `{ isAttributeValue: true, strict: true }` as default value.
36+
*/
37+
decodeHtmlEntitiesOptions?: DecodeHtmlEntitiesOptions;
2738
}
2839

2940
/**
@@ -32,16 +43,15 @@ export interface TextReducerProps extends Pick<React.HTMLAttributes<HTMLElement>
3243
*/
3344
export const TextReducer = ({
3445
children,
35-
maxNodes,
36-
maxLength,
3746
useOverflowTextWrapper,
3847
overflowTextProps,
48+
...reduceToTextOptions
3949
}: TextReducerProps) => {
4050
if (typeof children === "undefined") {
4151
return <></>;
4252
}
4353

44-
const shrinkedContent = reduceToText(children, { maxLength, maxNodes });
54+
const shrinkedContent = utils.reduceToText(children, reduceToTextOptions);
4555

4656
return useOverflowTextWrapper ? (
4757
<OverflowText

yarn.lock

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3079,6 +3079,11 @@
30793079
dependencies:
30803080
"@types/unist" "*"
30813081

3082+
"@types/he@^1.2.3":
3083+
version "1.2.3"
3084+
resolved "https://registry.yarnpkg.com/@types/he/-/he-1.2.3.tgz#c33ca3096f30cbd5d68d78211572de3f9adff75a"
3085+
integrity sha512-q67/qwlxblDzEDvzHhVkwc1gzVWxaNxeyHUBF4xElrvjL11O+Ytze+1fGpBHlr/H9myiBUaUXNnNPmBHxxfAcA==
3086+
30823087
"@types/hoist-non-react-statics@^3.3.0":
30833088
version "3.3.6"
30843089
resolved "https://registry.yarnpkg.com/@types/hoist-non-react-statics/-/hoist-non-react-statics-3.3.6.tgz#6bba74383cdab98e8db4e20ce5b4a6b98caed010"

0 commit comments

Comments
 (0)