@@ -216,6 +216,37 @@ describe('ANTLR VBA Main Parser', () => {
216216
217217 console . log ( ' ✅ Successfully parsed VBA code with external type references (Dictionary, Excel.Application, etc.)' ) ;
218218 } ) ;
219+
220+ it ( 'should parse VBA code with Unicode identifiers (accented characters) without errors' , ( ) => {
221+ const testFilePath = path . join ( __dirname , '../../../test/fixtures/UnicodeIdentifiers.bas' ) ;
222+ const input = fs . readFileSync ( testFilePath , 'utf8' ) ;
223+
224+ const result = parseAndGetErrors ( input ) ;
225+
226+ logParsingResults ( input , result ) ;
227+ const implicitTokens = checkImplicitTokens ( result ) ;
228+
229+ // The test should pass with Unicode identifiers containing accented characters
230+ assert . strictEqual ( result . syntaxErrors , 0 , `Expected no syntax errors, but found: ${ result . errors . join ( ', ' ) } ` ) ;
231+
232+ // Ensure Unicode identifiers are properly tokenized as IDENTIFIER tokens, not implicit tokens
233+ assert . strictEqual ( implicitTokens . length , 0 , `Found implicit tokens: ${ implicitTokens . map ( t => t . typeName ) . join ( ', ' ) } ` ) ;
234+
235+ // Verify that specific Unicode identifiers are properly tokenized as IDENTIFIER
236+ const identifierTokens = result . tokenInfo . filter ( t => t . typeName === 'IDENTIFIER' ) ;
237+ const unicodeIdentifiers = [ 'café' , 'naïve' , 'résumé' , 'piñata' , 'señor' , 'mañana' , 'jalapeño' , 'façade' ,
238+ 'björk' , 'José' , 'François' , 'Müller' , 'Łukasz' , 'Αθήνα' , 'москва' ,
239+ 'messäge' , 'calculér' , 'numbér' ] ;
240+
241+ const foundUnicodeIds = identifierTokens . filter ( token =>
242+ unicodeIdentifiers . some ( unicodeId => token . text === unicodeId )
243+ ) ;
244+
245+ assert . ok ( foundUnicodeIds . length > 0 , 'Expected to find Unicode identifiers in the token stream' ) ;
246+
247+ console . log ( ' ✅ Successfully parsed VBA code with Unicode identifiers (accented characters)' ) ;
248+ console . log ( ` 📝 Found ${ foundUnicodeIds . length } Unicode identifiers: ${ foundUnicodeIds . map ( t => t . text ) . join ( ', ' ) } ` ) ;
249+ } ) ;
219250
220251
221252} ) ;
0 commit comments