|
17 | 17 | const char* kOdbcEncoding = "utf-16-le"; // ODBC uses UTF-16LE for SQLWCHAR |
18 | 18 | const size_t kUcsLength = 2; // SQLWCHAR is 2 bytes on all platforms |
19 | 19 |
|
20 | | -// Function to convert SQLWCHAR strings to std::wstring on macOS |
| 20 | +// Function to convert SQLWCHAR strings to std::wstring on macOS/Linux |
| 21 | +// Optimized version: direct conversion without intermediate buffer |
21 | 22 | std::wstring SQLWCHARToWString(const SQLWCHAR* sqlwStr, size_t length = SQL_NTS) { |
22 | 23 | if (!sqlwStr) { |
23 | 24 | return std::wstring(); |
24 | 25 | } |
25 | 26 |
|
| 27 | + // Lambda to calculate string length using pointer arithmetic |
| 28 | + auto calculateLength = [](const SQLWCHAR* str) -> size_t { |
| 29 | + const SQLWCHAR* p = str; |
| 30 | + while (*p) ++p; |
| 31 | + return p - str; |
| 32 | + }; |
| 33 | + |
26 | 34 | if (length == SQL_NTS) { |
27 | | - // Determine length if not provided |
28 | | - size_t i = 0; |
29 | | - while (sqlwStr[i] != 0) |
30 | | - ++i; |
31 | | - length = i; |
| 35 | + length = calculateLength(sqlwStr); |
32 | 36 | } |
33 | 37 |
|
34 | | - // Create a UTF-16LE byte array from the SQLWCHAR array |
35 | | - std::vector<char> utf16Bytes(length * kUcsLength); |
36 | | - for (size_t i = 0; i < length; ++i) { |
37 | | - // Copy each SQLWCHAR (2 bytes) to the byte array |
38 | | - memcpy(&utf16Bytes[i * kUcsLength], &sqlwStr[i], kUcsLength); |
| 38 | + if (length == 0) { |
| 39 | + return std::wstring(); |
39 | 40 | } |
40 | 41 |
|
41 | | - // Convert UTF-16LE to std::wstring (UTF-32 on macOS) |
42 | | - try { |
43 | | - // Use C++11 codecvt to convert between UTF-16LE and wstring |
44 | | - std::wstring_convert<std::codecvt_utf8_utf16<wchar_t, 0x10ffff, std::little_endian>> |
45 | | - converter; |
46 | | - std::wstring result = converter.from_bytes( |
47 | | - reinterpret_cast<const char*>(utf16Bytes.data()), |
48 | | - reinterpret_cast<const char*>(utf16Bytes.data() + utf16Bytes.size())); |
49 | | - return result; |
50 | | - } catch (const std::exception& e) { |
51 | | - // Fallback to character-by-character conversion if codecvt fails |
52 | | - std::wstring result; |
53 | | - result.reserve(length); |
54 | | - for (size_t i = 0; i < length; ++i) { |
55 | | - result.push_back(static_cast<wchar_t>(sqlwStr[i])); |
| 42 | + // Lambda to check if character is in Basic Multilingual Plane |
| 43 | + auto isBMP = [](uint16_t ch) { return ch < 0xD800 || ch > 0xDFFF; }; |
| 44 | + |
| 45 | + // Lambda to decode surrogate pair into code point |
| 46 | + auto decodeSurrogatePair = [](uint16_t high, uint16_t low) -> uint32_t { |
| 47 | + return 0x10000 + |
| 48 | + (static_cast<uint32_t>(high & 0x3FF) << 10) + |
| 49 | + (low & 0x3FF); |
| 50 | + }; |
| 51 | + |
| 52 | + // Convert UTF-16 to UTF-32 directly without intermediate buffer |
| 53 | + std::wstring result; |
| 54 | + result.reserve(length); // Reserve assuming most chars are BMP |
| 55 | + |
| 56 | + size_t i = 0; |
| 57 | + while (i < length) { |
| 58 | + uint16_t utf16Char = static_cast<uint16_t>(sqlwStr[i]); |
| 59 | + |
| 60 | + // Fast path: BMP character (most common - ~99% of strings) |
| 61 | + if (isBMP(utf16Char)) { |
| 62 | + result.push_back(static_cast<wchar_t>(utf16Char)); |
| 63 | + ++i; |
| 64 | + } |
| 65 | + // Handle surrogate pairs for characters outside BMP |
| 66 | + else if (utf16Char <= 0xDBFF) { // High surrogate |
| 67 | + if (i + 1 < length) { |
| 68 | + uint16_t lowSurrogate = static_cast<uint16_t>(sqlwStr[i + 1]); |
| 69 | + if (lowSurrogate >= 0xDC00 && lowSurrogate <= 0xDFFF) { |
| 70 | + uint32_t codePoint = decodeSurrogatePair(utf16Char, lowSurrogate); |
| 71 | + result.push_back(static_cast<wchar_t>(codePoint)); |
| 72 | + i += 2; |
| 73 | + continue; |
| 74 | + } |
| 75 | + } |
| 76 | + // Invalid surrogate - push as-is |
| 77 | + result.push_back(static_cast<wchar_t>(utf16Char)); |
| 78 | + ++i; |
| 79 | + } |
| 80 | + else { // Low surrogate without high - invalid but push as-is |
| 81 | + result.push_back(static_cast<wchar_t>(utf16Char)); |
| 82 | + ++i; |
56 | 83 | } |
57 | | - return result; |
58 | 84 | } |
| 85 | + return result; |
59 | 86 | } |
60 | 87 |
|
61 | | -// Function to convert std::wstring to SQLWCHAR array on macOS |
| 88 | +// Function to convert std::wstring to SQLWCHAR array on macOS/Linux |
| 89 | +// Optimized version: streamlined conversion with better branch prediction |
62 | 90 | std::vector<SQLWCHAR> WStringToSQLWCHAR(const std::wstring& str) { |
63 | | - try { |
64 | | - // Convert wstring (UTF-32 on macOS) to UTF-16LE bytes |
65 | | - std::wstring_convert<std::codecvt_utf8_utf16<wchar_t, 0x10ffff, std::little_endian>> |
66 | | - converter; |
67 | | - std::string utf16Bytes = converter.to_bytes(str); |
| 91 | + if (str.empty()) { |
| 92 | + return std::vector<SQLWCHAR>(1, 0); // Just null terminator |
| 93 | + } |
68 | 94 |
|
69 | | - // Convert the bytes to SQLWCHAR array |
70 | | - std::vector<SQLWCHAR> result(utf16Bytes.size() / kUcsLength + 1, |
71 | | - 0); // +1 for null terminator |
72 | | - for (size_t i = 0; i < utf16Bytes.size() / kUcsLength; ++i) { |
73 | | - memcpy(&result[i], &utf16Bytes[i * kUcsLength], kUcsLength); |
74 | | - } |
75 | | - return result; |
76 | | - } catch (const std::exception& e) { |
77 | | - // Fallback to simple casting if codecvt fails |
78 | | - std::vector<SQLWCHAR> result(str.size() + 1, |
79 | | - 0); // +1 for null terminator |
80 | | - for (size_t i = 0; i < str.size(); ++i) { |
81 | | - result[i] = static_cast<SQLWCHAR>(str[i]); |
| 95 | + // Lambda to encode code point as surrogate pair and append to result |
| 96 | + auto encodeSurrogatePair = [](std::vector<SQLWCHAR>& vec, uint32_t cp) { |
| 97 | + cp -= 0x10000; |
| 98 | + vec.push_back(static_cast<SQLWCHAR>(0xD800 | ((cp >> 10) & 0x3FF))); |
| 99 | + vec.push_back(static_cast<SQLWCHAR>(0xDC00 | (cp & 0x3FF))); |
| 100 | + }; |
| 101 | + |
| 102 | + // Convert wstring (UTF-32) to UTF-16 |
| 103 | + std::vector<SQLWCHAR> result; |
| 104 | + result.reserve(str.size() + 1); // Most chars are BMP, so reserve exact size |
| 105 | + |
| 106 | + for (wchar_t wc : str) { |
| 107 | + uint32_t codePoint = static_cast<uint32_t>(wc); |
| 108 | + |
| 109 | + // Fast path: BMP character (most common - ~99% of strings) |
| 110 | + if (codePoint <= 0xFFFF) { |
| 111 | + result.push_back(static_cast<SQLWCHAR>(codePoint)); |
| 112 | + } |
| 113 | + // Encode as surrogate pair for characters outside BMP |
| 114 | + else if (codePoint <= 0x10FFFF) { |
| 115 | + encodeSurrogatePair(result, codePoint); |
82 | 116 | } |
83 | | - return result; |
| 117 | + // Invalid code points silently skipped |
84 | 118 | } |
| 119 | + |
| 120 | + result.push_back(0); // Null terminator |
| 121 | + return result; |
85 | 122 | } |
86 | 123 |
|
87 | 124 | #endif |
0 commit comments