@@ -414,7 +414,8 @@ void BindingData::CreatePerIsolateProperties(IsolateData* isolate_data,
414414 SetMethodNoSideEffect (isolate, target, " decodeUTF8" , DecodeUTF8);
415415 SetMethodNoSideEffect (isolate, target, " toASCII" , ToASCII);
416416 SetMethodNoSideEffect (isolate, target, " toUnicode" , ToUnicode);
417- SetMethodNoSideEffect (isolate, target, " decodeLatin1" , DecodeLatin1);
417+ SetMethodNoSideEffect (
418+ isolate, target, " decodeWindows1252" , DecodeWindows1252);
418419}
419420
420421void BindingData::CreatePerContextProperties (Local<Object> target,
@@ -432,10 +433,10 @@ void BindingData::RegisterTimerExternalReferences(
432433 registry->Register (DecodeUTF8);
433434 registry->Register (ToASCII);
434435 registry->Register (ToUnicode);
435- registry->Register (DecodeLatin1 );
436+ registry->Register (DecodeWindows1252 );
436437}
437438
438- void BindingData::DecodeLatin1 (const FunctionCallbackInfo<Value>& args) {
439+ void BindingData::DecodeWindows1252 (const FunctionCallbackInfo<Value>& args) {
439440 Environment* env = Environment::GetCurrent (args);
440441
441442 CHECK_GE (args.Length (), 1 );
@@ -448,7 +449,6 @@ void BindingData::DecodeLatin1(const FunctionCallbackInfo<Value>& args) {
448449 }
449450
450451 bool ignore_bom = args[1 ]->IsTrue ();
451- bool has_fatal = args[2 ]->IsTrue ();
452452
453453 ArrayBufferViewContents<uint8_t > buffer (args[0 ]);
454454 const uint8_t * data = buffer.data ();
@@ -463,20 +463,45 @@ void BindingData::DecodeLatin1(const FunctionCallbackInfo<Value>& args) {
463463 return args.GetReturnValue ().SetEmptyString ();
464464 }
465465
466- std::string result (length * 2 , ' \0 ' );
467-
468- size_t written = simdutf::convert_latin1_to_utf8 (
469- reinterpret_cast <const char *>(data), length, result.data ());
466+ // Windows-1252 specific mapping for bytes 128-159
467+ // These differ from Latin-1/ISO-8859-1
468+ static const uint16_t windows1252_mapping[32 ] = {
469+ 0x20AC , 0x0081 , 0x201A , 0x0192 , 0x201E , 0x2026 , 0x2020 , 0x2021 , // 80-87
470+ 0x02C6 , 0x2030 , 0x0160 , 0x2039 , 0x0152 , 0x008D , 0x017D , 0x008F , // 88-8F
471+ 0x0090 , 0x2018 , 0x2019 , 0x201C , 0x201D , 0x2022 , 0x2013 , 0x2014 , // 90-97
472+ 0x02DC , 0x2122 , 0x0161 , 0x203A , 0x0153 , 0x009D , 0x017E , 0x0178 // 98-9F
473+ };
474+
475+ std::string result;
476+ result.reserve (length * 3 ); // Reserve space for UTF-8 output
477+
478+ for (size_t i = 0 ; i < length; i++) {
479+ uint8_t byte = data[i];
480+ uint32_t codepoint;
481+
482+ // Check if byte is in the special Windows-1252 range (128-159)
483+ if (byte >= 0x80 && byte <= 0x9F ) {
484+ codepoint = windows1252_mapping[byte - 0x80 ];
485+ } else {
486+ // For all other bytes, Windows-1252 is identical to Latin-1
487+ codepoint = byte;
488+ }
470489
471- if (has_fatal && written == 0 ) {
472- return node::THROW_ERR_ENCODING_INVALID_ENCODED_DATA (
473- env->isolate (), " The encoded data was not valid for encoding latin1" );
490+ // Convert codepoint to UTF-8
491+ if (codepoint < 0x80 ) {
492+ result.push_back (static_cast <char >(codepoint));
493+ } else if (codepoint < 0x800 ) {
494+ result.push_back (static_cast <char >(0xC0 | (codepoint >> 6 )));
495+ result.push_back (static_cast <char >(0x80 | (codepoint & 0x3F )));
496+ } else {
497+ result.push_back (static_cast <char >(0xE0 | (codepoint >> 12 )));
498+ result.push_back (static_cast <char >(0x80 | ((codepoint >> 6 ) & 0x3F )));
499+ result.push_back (static_cast <char >(0x80 | (codepoint & 0x3F )));
500+ }
474501 }
475502
476- std::string_view view (result.c_str (), written);
477-
478503 Local<Value> ret;
479- if (ToV8Value (env->context (), view , env->isolate ()).ToLocal (&ret)) {
504+ if (ToV8Value (env->context (), result , env->isolate ()).ToLocal (&ret)) {
480505 args.GetReturnValue ().Set (ret);
481506 }
482507}
0 commit comments