@@ -98,7 +98,12 @@ function isEndOfAtomicTag(word: string, tag: string){
9898 return word . substring ( word . length - tag . length - 2 ) === ( '</' + tag ) ;
9999}
100100
101- const styleTagsRegExp = / ^ < ( s t r o n g | e m | b | i | q | c i t e | m a r k | d f n | s u p | s u b | u | s | n o b r ) ( ^ (? ! \w ) | > ) / ;
101+ const styleTagsRegExp = / ^ < ( s t r o n g | e m | b | i | q | c i t e | m a r k | d f n | s u p | s u b | u | s | s p a n | n o b r ) ( \s + [ ^ > ] * ) ? ( ^ (? ! \w ) | > ) / ;
102+
103+ type Style = {
104+ tag : string ; // The raw tag, e.g. 'span'
105+ tagWithAttributes : string ; // E.g. 'span style="font-weight:400"'
106+ } ;
102107
103108/**
104109 * Checks if the current word is the beginning of a style tag. A style tag is one whose
@@ -110,24 +115,29 @@ const styleTagsRegExp = /^<(strong|em|b|i|q|cite|mark|dfn|sup|sub|u|s|nobr)(^(?!
110115 * @return {string|null } The name of the atomic tag if the word will be an atomic tag,
111116 * null otherwise
112117 */
113-
114- function isStartOfStyleTag ( word : string ) {
118+ function isStartOfStyleTag ( word : string ) : Style | null {
115119 const result = styleTagsRegExp . exec ( word ) ;
116- return result && result [ 1 ] ;
120+ if ( result && result [ 1 ] ) {
121+ return {
122+ tag : result [ 1 ] ,
123+ tagWithAttributes : result [ 1 ] + ( result [ 2 ] ?? '' ) ,
124+ } ;
125+ }
126+ return null ;
117127}
118128
119129/**
120130 * Checks if the current word is the end of a style tag (i.e. it has all the characters,
121131 * except for the end bracket of the closing tag, such as '<strong></strong').
122132 *
123133 * @param {string } word The characters of the current token read so far.
124- * @param {string } tag The ending tag to look for.
134+ * @param {Style } tag The ending tag to look for.
125135 *
126136 * @return {boolean } True if the word is now a complete token (including the end tag),
127137 * false otherwise.
128138 */
129- function isEndOfStyleTag ( word : string , tag : string ) {
130- return word . substring ( word . length - tag . length - 2 ) === ( '</' + tag ) ;
139+ function isEndOfStyleTag ( word : string , tag : Style ) {
140+ return word . substring ( word . length - tag . tag . length - 2 ) === ( '</' + tag . tag ) ;
131141}
132142
133143const tableTagsRegExp = / ^ < ( t a b l e | t b o d y | t h e a d | t r | t h | t d | b l o c k q u o t e | u l | o l | l i | h [ 1 - 6 ] ) ( ^ (? ! \w ) | > ) / ;
@@ -186,7 +196,7 @@ function isWrappable(token: string): boolean {
186196type Token = {
187197 str : string ;
188198 key : string ;
189- styles : string [ ] ;
199+ styles : Style [ ] ;
190200 tableTags : string [ ] ;
191201} ;
192202
@@ -198,7 +208,7 @@ type Token = {
198208 *
199209 * @return {Object } A token object with a string and key property.
200210 */
201- export function createToken ( currentWord : string , currentStyleTags : string [ ] , currentTableTags : string [ ] ) : Token {
211+ export function createToken ( currentWord : string , currentStyleTags : Style [ ] , currentTableTags : string [ ] ) : Token {
202212 return {
203213 str : currentWord ,
204214 key : getKeyForToken ( currentWord ) ,
@@ -272,7 +282,7 @@ function splitStringLocaleAware(str: string): string[] {
272282}
273283
274284
275- function splitStringLocaleAwareAndCreateTokens ( currentWord : string , currentStyleTags : string [ ] , currentTableTags : string [ ] ) : Token [ ] {
285+ function splitStringLocaleAwareAndCreateTokens ( currentWord : string , currentStyleTags : Style [ ] , currentTableTags : string [ ] ) : Token [ ] {
276286 const parts = splitStringLocaleAware ( currentWord ) ;
277287 const tokens : Token [ ] = [ ] ;
278288 for ( const token of parts ) {
@@ -295,7 +305,7 @@ export function htmlToTokens(html: string): Token[] {
295305 let mode : ParseMode = 'char' ;
296306 let currentWord = '' ;
297307 let currentAtomicTag = '' ;
298- const currentStyleTags : string [ ] = [ ] ;
308+ const currentStyleTags : Style [ ] = [ ] ;
299309 const currentTableTags : string [ ] = [ ] ;
300310 const words : Token [ ] = [ ] ;
301311
@@ -1090,9 +1100,9 @@ function combineTokenNotes(
10901100 return segments . map ( mapFn ) . join ( '' ) ;
10911101}
10921102
1093- function arrayDiff ( a1 : string [ ] , a2 : string [ ] ) {
1094- let beforeArray : string [ ] = [ ] ;
1095- let afterArray : string [ ] = [ ] ;
1103+ function arrayDiff ( a1 : Style [ ] , a2 : Style [ ] ) {
1104+ let beforeArray : Style [ ] = [ ] ;
1105+ let afterArray : Style [ ] = [ ] ;
10961106 let isDiff = false ;
10971107 while ( a1 . length && a2 . length ) {
10981108 const curr1 = a1 . shift ( ) ;
@@ -1111,24 +1121,28 @@ function arrayDiff(a1: string[], a2: string[]) {
11111121 } ) ;
11121122}
11131123
1114- function closeStyles ( p : { content : string , styles : string [ ] } ) {
1124+ function closeStyles ( p : { content : string , styles : Style [ ] } ) {
11151125 let currentContent = p . content ;
11161126 const styles = [ ...p . styles ] ;
1117- while ( styles . length ) { currentContent += `</${ styles . pop ( ) } >` ; }
1127+ while ( styles . length ) {
1128+ currentContent += `</${ styles . pop ( ) ?. tag } >` ;
1129+ }
11181130 return currentContent ;
11191131}
11201132
11211133function reduceTokens ( tokens : Token [ ] ) {
1122- return closeStyles ( tokens . reduce ( ( acc : { content : string , styles : string [ ] } , curr : Token ) => {
1134+ return closeStyles ( tokens . reduce ( ( acc : { content : string , styles : Style [ ] } , curr : Token ) => {
11231135 let currContent = acc . content ;
11241136 const { before, after } = arrayDiff ( [ ...acc . styles ] , [ ...curr . styles ] ) ;
11251137 before . forEach ( ( ) => {
11261138 const tag = acc . styles . pop ( ) ;
1127- if ( tag ) currContent += `</${ tag } >` ;
1139+ if ( tag ) {
1140+ currContent += `</${ tag . tag } >` ;
1141+ }
11281142 } ) ;
1129- after . forEach ( ( tag : string ) => {
1143+ after . forEach ( ( tag : Style ) => {
11301144 acc . styles . push ( tag ) ;
1131- currContent += `<${ tag } >` ;
1145+ currContent += `<${ tag . tagWithAttributes } >` ;
11321146 } ) ;
11331147 currContent += curr . str ;
11341148 return ( { content : currContent , styles : acc . styles } ) ;
0 commit comments