Skip to content

Commit 17a11c1

Browse files
committed
Add test for Unicode characters
1 parent 972e104 commit 17a11c1

File tree

2 files changed

+39
-0
lines changed

2 files changed

+39
-0
lines changed

server/src/test/antlr-parser.test.ts

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -216,6 +216,37 @@ describe('ANTLR VBA Main Parser', () => {
216216

217217
console.log(' ✅ Successfully parsed VBA code with external type references (Dictionary, Excel.Application, etc.)');
218218
});
219+
220+
it('should parse VBA code with Unicode identifiers (accented characters) without errors', () => {
221+
const testFilePath = path.join(__dirname, '../../../test/fixtures/UnicodeIdentifiers.bas');
222+
const input = fs.readFileSync(testFilePath, 'utf8');
223+
224+
const result = parseAndGetErrors(input);
225+
226+
logParsingResults(input, result);
227+
const implicitTokens = checkImplicitTokens(result);
228+
229+
// The test should pass with Unicode identifiers containing accented characters
230+
assert.strictEqual(result.syntaxErrors, 0, `Expected no syntax errors, but found: ${result.errors.join(', ')}`);
231+
232+
// Ensure Unicode identifiers are properly tokenized as IDENTIFIER tokens, not implicit tokens
233+
assert.strictEqual(implicitTokens.length, 0, `Found implicit tokens: ${implicitTokens.map(t => t.typeName).join(', ')}`);
234+
235+
// Verify that specific Unicode identifiers are properly tokenized as IDENTIFIER
236+
const identifierTokens = result.tokenInfo.filter(t => t.typeName === 'IDENTIFIER');
237+
const unicodeIdentifiers = ['café', 'naïve', 'résumé', 'piñata', 'señor', 'mañana', 'jalapeño', 'façade',
238+
'björk', 'José', 'François', 'Müller', 'Łukasz', 'Αθήνα', 'москва',
239+
'messäge', 'calculér', 'numbér'];
240+
241+
const foundUnicodeIds = identifierTokens.filter(token =>
242+
unicodeIdentifiers.some(unicodeId => token.text === unicodeId)
243+
);
244+
245+
assert.ok(foundUnicodeIds.length > 0, 'Expected to find Unicode identifiers in the token stream');
246+
247+
console.log(' ✅ Successfully parsed VBA code with Unicode identifiers (accented characters)');
248+
console.log(` 📝 Found ${foundUnicodeIds.length} Unicode identifiers: ${foundUnicodeIds.map(t => t.text).join(', ')}`);
249+
});
219250

220251

221252
});
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
Attribute VB_Name = "UnicodeIdentifiers"
2+
Option Explicit
3+
4+
' Test module for Unicode identifiers with Latin accented characters
5+
Public Sub TestUnicodeIdentifiers()
6+
' Variables with Latin accented characters
7+
Dim café As String
8+
End Sub

0 commit comments

Comments
 (0)