1 // Compiler implementation of the D programming language 2 // Copyright (c) 1999-2015 by Digital Mars 3 // All Rights Reserved 4 // written by Walter Bright 5 // http://www.digitalmars.com 6 // Distributed under the Boost Software License, Version 1.0. 7 // http://www.boost.org/LICENSE_1_0.txt 8 9 module ddmd.utf; 10 11 /// A UTF-8 code unit 12 /// A UTF-16 code unit 13 alias utf16_t = ushort; 14 /// A UTF-32 code unit 15 alias utf32_t = uint; 16 alias dchar_t = utf32_t; 17 18 enum ALPHA_TABLE_LENGTH = 245; 19 extern (C++) __gshared const(utf16_t)** ALPHA_TABLE = 20 [ 21 [0x00AA, 0x00AA], 22 [0x00B5, 0x00B5], 23 [0x00B7, 0x00B7], 24 [0x00BA, 0x00BA], 25 [0x00C0, 0x00D6], 26 [0x00D8, 0x00F6], 27 [0x00F8, 0x01F5], 28 [0x01FA, 0x0217], 29 [0x0250, 0x02A8], 30 [0x02B0, 0x02B8], 31 [0x02BB, 0x02BB], 32 [0x02BD, 0x02C1], 33 [0x02D0, 0x02D1], 34 [0x02E0, 0x02E4], 35 [0x037A, 0x037A], 36 [0x0386, 0x0386], 37 [0x0388, 0x038A], 38 [0x038C, 0x038C], 39 [0x038E, 0x03A1], 40 [0x03A3, 0x03CE], 41 [0x03D0, 0x03D6], 42 [0x03DA, 0x03DA], 43 [0x03DC, 0x03DC], 44 [0x03DE, 0x03DE], 45 [0x03E0, 0x03E0], 46 [0x03E2, 0x03F3], 47 [0x0401, 0x040C], 48 [0x040E, 0x044F], 49 [0x0451, 0x045C], 50 [0x045E, 0x0481], 51 [0x0490, 0x04C4], 52 [0x04C7, 0x04C8], 53 [0x04CB, 0x04CC], 54 [0x04D0, 0x04EB], 55 [0x04EE, 0x04F5], 56 [0x04F8, 0x04F9], 57 [0x0531, 0x0556], 58 [0x0559, 0x0559], 59 [0x0561, 0x0587], 60 [0x05B0, 0x05B9], 61 [0x05BB, 0x05BD], 62 [0x05BF, 0x05BF], 63 [0x05C1, 0x05C2], 64 [0x05D0, 0x05EA], 65 [0x05F0, 0x05F2], 66 [0x0621, 0x063A], 67 [0x0640, 0x0652], 68 [0x0660, 0x0669], 69 [0x0670, 0x06B7], 70 [0x06BA, 0x06BE], 71 [0x06C0, 0x06CE], 72 [0x06D0, 0x06DC], 73 [0x06E5, 0x06E8], 74 [0x06EA, 0x06ED], 75 [0x06F0, 0x06F9], 76 [0x0901, 0x0903], 77 [0x0905, 0x0939], 78 [0x093D, 0x094D], 79 [0x0950, 0x0952], 80 [0x0958, 0x0963], 81 [0x0966, 0x096F], 82 [0x0981, 0x0983], 83 [0x0985, 0x098C], 84 [0x098F, 0x0990], 85 [0x0993, 0x09A8], 86 [0x09AA, 0x09B0], 87 [0x09B2, 0x09B2], 88 [0x09B6, 0x09B9], 89 [0x09BE, 0x09C4], 90 [0x09C7, 0x09C8], 91 [0x09CB, 0x09CD], 92 [0x09DC, 0x09DD], 93 [0x09DF, 0x09E3], 94 [0x09E6, 0x09F1], 95 [0x0A02, 0x0A02], 96 [0x0A05, 0x0A0A], 97 [0x0A0F, 0x0A10], 98 [0x0A13, 0x0A28], 99 [0x0A2A, 0x0A30], 100 [0x0A32, 0x0A33], 101 [0x0A35, 0x0A36], 102 [0x0A38, 0x0A39], 103 [0x0A3E, 0x0A42], 104 [0x0A47, 0x0A48], 105 [0x0A4B, 0x0A4D], 106 [0x0A59, 0x0A5C], 107 [0x0A5E, 0x0A5E], 108 [0x0A66, 0x0A6F], 109 [0x0A74, 0x0A74], 110 [0x0A81, 0x0A83], 111 [0x0A85, 0x0A8B], 112 [0x0A8D, 0x0A8D], 113 [0x0A8F, 0x0A91], 114 [0x0A93, 0x0AA8], 115 [0x0AAA, 0x0AB0], 116 [0x0AB2, 0x0AB3], 117 [0x0AB5, 0x0AB9], 118 [0x0ABD, 0x0AC5], 119 [0x0AC7, 0x0AC9], 120 [0x0ACB, 0x0ACD], 121 [0x0AD0, 0x0AD0], 122 [0x0AE0, 0x0AE0], 123 [0x0AE6, 0x0AEF], 124 [0x0B01, 0x0B03], 125 [0x0B05, 0x0B0C], 126 [0x0B0F, 0x0B10], 127 [0x0B13, 0x0B28], 128 [0x0B2A, 0x0B30], 129 [0x0B32, 0x0B33], 130 [0x0B36, 0x0B39], 131 [0x0B3D, 0x0B43], 132 [0x0B47, 0x0B48], 133 [0x0B4B, 0x0B4D], 134 [0x0B5C, 0x0B5D], 135 [0x0B5F, 0x0B61], 136 [0x0B66, 0x0B6F], 137 [0x0B82, 0x0B83], 138 [0x0B85, 0x0B8A], 139 [0x0B8E, 0x0B90], 140 [0x0B92, 0x0B95], 141 [0x0B99, 0x0B9A], 142 [0x0B9C, 0x0B9C], 143 [0x0B9E, 0x0B9F], 144 [0x0BA3, 0x0BA4], 145 [0x0BA8, 0x0BAA], 146 [0x0BAE, 0x0BB5], 147 [0x0BB7, 0x0BB9], 148 [0x0BBE, 0x0BC2], 149 [0x0BC6, 0x0BC8], 150 [0x0BCA, 0x0BCD], 151 [0x0BE7, 0x0BEF], 152 [0x0C01, 0x0C03], 153 [0x0C05, 0x0C0C], 154 [0x0C0E, 0x0C10], 155 [0x0C12, 0x0C28], 156 [0x0C2A, 0x0C33], 157 [0x0C35, 0x0C39], 158 [0x0C3E, 0x0C44], 159 [0x0C46, 0x0C48], 160 [0x0C4A, 0x0C4D], 161 [0x0C60, 0x0C61], 162 [0x0C66, 0x0C6F], 163 [0x0C82, 0x0C83], 164 [0x0C85, 0x0C8C], 165 [0x0C8E, 0x0C90], 166 [0x0C92, 0x0CA8], 167 [0x0CAA, 0x0CB3], 168 [0x0CB5, 0x0CB9], 169 [0x0CBE, 0x0CC4], 170 [0x0CC6, 0x0CC8], 171 [0x0CCA, 0x0CCD], 172 [0x0CDE, 0x0CDE], 173 [0x0CE0, 0x0CE1], 174 [0x0CE6, 0x0CEF], 175 [0x0D02, 0x0D03], 176 [0x0D05, 0x0D0C], 177 [0x0D0E, 0x0D10], 178 [0x0D12, 0x0D28], 179 [0x0D2A, 0x0D39], 180 [0x0D3E, 0x0D43], 181 [0x0D46, 0x0D48], 182 [0x0D4A, 0x0D4D], 183 [0x0D60, 0x0D61], 184 [0x0D66, 0x0D6F], 185 [0x0E01, 0x0E3A], 186 [0x0E40, 0x0E5B], 187 [0x0E81, 0x0E82], 188 [0x0E84, 0x0E84], 189 [0x0E87, 0x0E88], 190 [0x0E8A, 0x0E8A], 191 [0x0E8D, 0x0E8D], 192 [0x0E94, 0x0E97], 193 [0x0E99, 0x0E9F], 194 [0x0EA1, 0x0EA3], 195 [0x0EA5, 0x0EA5], 196 [0x0EA7, 0x0EA7], 197 [0x0EAA, 0x0EAB], 198 [0x0EAD, 0x0EAE], 199 [0x0EB0, 0x0EB9], 200 [0x0EBB, 0x0EBD], 201 [0x0EC0, 0x0EC4], 202 [0x0EC6, 0x0EC6], 203 [0x0EC8, 0x0ECD], 204 [0x0ED0, 0x0ED9], 205 [0x0EDC, 0x0EDD], 206 [0x0F00, 0x0F00], 207 [0x0F18, 0x0F19], 208 [0x0F20, 0x0F33], 209 [0x0F35, 0x0F35], 210 [0x0F37, 0x0F37], 211 [0x0F39, 0x0F39], 212 [0x0F3E, 0x0F47], 213 [0x0F49, 0x0F69], 214 [0x0F71, 0x0F84], 215 [0x0F86, 0x0F8B], 216 [0x0F90, 0x0F95], 217 [0x0F97, 0x0F97], 218 [0x0F99, 0x0FAD], 219 [0x0FB1, 0x0FB7], 220 [0x0FB9, 0x0FB9], 221 [0x10A0, 0x10C5], 222 [0x10D0, 0x10F6], 223 [0x1E00, 0x1E9B], 224 [0x1EA0, 0x1EF9], 225 [0x1F00, 0x1F15], 226 [0x1F18, 0x1F1D], 227 [0x1F20, 0x1F45], 228 [0x1F48, 0x1F4D], 229 [0x1F50, 0x1F57], 230 [0x1F59, 0x1F59], 231 [0x1F5B, 0x1F5B], 232 [0x1F5D, 0x1F5D], 233 [0x1F5F, 0x1F7D], 234 [0x1F80, 0x1FB4], 235 [0x1FB6, 0x1FBC], 236 [0x1FBE, 0x1FBE], 237 [0x1FC2, 0x1FC4], 238 [0x1FC6, 0x1FCC], 239 [0x1FD0, 0x1FD3], 240 [0x1FD6, 0x1FDB], 241 [0x1FE0, 0x1FEC], 242 [0x1FF2, 0x1FF4], 243 [0x1FF6, 0x1FFC], 244 [0x203F, 0x2040], 245 [0x207F, 0x207F], 246 [0x2102, 0x2102], 247 [0x2107, 0x2107], 248 [0x210A, 0x2113], 249 [0x2115, 0x2115], 250 [0x2118, 0x211D], 251 [0x2124, 0x2124], 252 [0x2126, 0x2126], 253 [0x2128, 0x2128], 254 [0x212A, 0x2131], 255 [0x2133, 0x2138], 256 [0x2160, 0x2182], 257 [0x3005, 0x3007], 258 [0x3021, 0x3029], 259 [0x3041, 0x3093], 260 [0x309B, 0x309C], 261 [0x30A1, 0x30F6], 262 [0x30FB, 0x30FC], 263 [0x3105, 0x312C], 264 [0x4E00, 0x9FA5], 265 [0xAC00, 0xD7A3] 266 ]; 267 extern (C++) __gshared const(const(char)*) UTF8_DECODE_OK = null; 268 extern (C++) __gshared const(const(char)*) UTF16_DECODE_OK = null; 269 /* The following encodings are valid, except for the 5 and 6 byte 270 * combinations: 271 * 0xxxxxxx 272 * 110xxxxx 10xxxxxx 273 * 1110xxxx 10xxxxxx 10xxxxxx 274 * 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx 275 * 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 276 * 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 277 */ 278 extern (C++) __gshared const(uint)* UTF8_STRIDE = 279 [ 280 1, 281 1, 282 1, 283 1, 284 1, 285 1, 286 1, 287 1, 288 1, 289 1, 290 1, 291 1, 292 1, 293 1, 294 1, 295 1, 296 1, 297 1, 298 1, 299 1, 300 1, 301 1, 302 1, 303 1, 304 1, 305 1, 306 1, 307 1, 308 1, 309 1, 310 1, 311 1, 312 1, 313 1, 314 1, 315 1, 316 1, 317 1, 318 1, 319 1, 320 1, 321 1, 322 1, 323 1, 324 1, 325 1, 326 1, 327 1, 328 1, 329 1, 330 1, 331 1, 332 1, 333 1, 334 1, 335 1, 336 1, 337 1, 338 1, 339 1, 340 1, 341 1, 342 1, 343 1, 344 1, 345 1, 346 1, 347 1, 348 1, 349 1, 350 1, 351 1, 352 1, 353 1, 354 1, 355 1, 356 1, 357 1, 358 1, 359 1, 360 1, 361 1, 362 1, 363 1, 364 1, 365 1, 366 1, 367 1, 368 1, 369 1, 370 1, 371 1, 372 1, 373 1, 374 1, 375 1, 376 1, 377 1, 378 1, 379 1, 380 1, 381 1, 382 1, 383 1, 384 1, 385 1, 386 1, 387 1, 388 1, 389 1, 390 1, 391 1, 392 1, 393 1, 394 1, 395 1, 396 1, 397 1, 398 1, 399 1, 400 1, 401 1, 402 1, 403 1, 404 1, 405 1, 406 1, 407 1, 408 0xFF, 409 0xFF, 410 0xFF, 411 0xFF, 412 0xFF, 413 0xFF, 414 0xFF, 415 0xFF, 416 0xFF, 417 0xFF, 418 0xFF, 419 0xFF, 420 0xFF, 421 0xFF, 422 0xFF, 423 0xFF, 424 0xFF, 425 0xFF, 426 0xFF, 427 0xFF, 428 0xFF, 429 0xFF, 430 0xFF, 431 0xFF, 432 0xFF, 433 0xFF, 434 0xFF, 435 0xFF, 436 0xFF, 437 0xFF, 438 0xFF, 439 0xFF, 440 0xFF, 441 0xFF, 442 0xFF, 443 0xFF, 444 0xFF, 445 0xFF, 446 0xFF, 447 0xFF, 448 0xFF, 449 0xFF, 450 0xFF, 451 0xFF, 452 0xFF, 453 0xFF, 454 0xFF, 455 0xFF, 456 0xFF, 457 0xFF, 458 0xFF, 459 0xFF, 460 0xFF, 461 0xFF, 462 0xFF, 463 0xFF, 464 0xFF, 465 0xFF, 466 0xFF, 467 0xFF, 468 0xFF, 469 0xFF, 470 0xFF, 471 0xFF, 472 2, 473 2, 474 2, 475 2, 476 2, 477 2, 478 2, 479 2, 480 2, 481 2, 482 2, 483 2, 484 2, 485 2, 486 2, 487 2, 488 2, 489 2, 490 2, 491 2, 492 2, 493 2, 494 2, 495 2, 496 2, 497 2, 498 2, 499 2, 500 2, 501 2, 502 2, 503 2, 504 3, 505 3, 506 3, 507 3, 508 3, 509 3, 510 3, 511 3, 512 3, 513 3, 514 3, 515 3, 516 3, 517 3, 518 3, 519 3, 520 4, 521 4, 522 4, 523 4, 524 4, 525 4, 526 4, 527 4, 528 5, 529 5, 530 5, 531 5, 532 6, 533 6, 534 0xFF, 535 0xFF 536 ]; 537 // UTF-8 decoding errors 538 extern (C++) __gshared const(char)* UTF8_DECODE_OUTSIDE_CODE_SPACE = "Outside Unicode code space"; 539 extern (C++) __gshared const(char)* UTF8_DECODE_TRUNCATED_SEQUENCE = "Truncated UTF-8 sequence"; 540 extern (C++) __gshared const(char)* UTF8_DECODE_OVERLONG = "Overlong UTF-8 sequence"; 541 extern (C++) __gshared const(char)* UTF8_DECODE_INVALID_TRAILER = "Invalid trailing code unit"; 542 extern (C++) __gshared const(char)* UTF8_DECODE_INVALID_CODE_POINT = "Invalid code point decoded"; 543 // UTF-16 decoding errors 544 extern (C++) __gshared const(char)* UTF16_DECODE_TRUNCATED_SEQUENCE = "Truncated UTF-16 sequence"; 545 extern (C++) __gshared const(char)* UTF16_DECODE_INVALID_SURROGATE = "Invalid low surrogate"; 546 extern (C++) __gshared const(char)* UTF16_DECODE_UNPAIRED_SURROGATE = "Unpaired surrogate"; 547 extern (C++) __gshared const(char)* UTF16_DECODE_INVALID_CODE_POINT = "Invalid code point decoded"; 548 549 /// The Unicode code space is the range of code points [0x000000,0x10FFFF] 550 /// except the UTF-16 surrogate pairs in the range [0xD800,0xDFFF] 551 /// and non-characters (which end in 0xFFFE or 0xFFFF). 552 extern (C++) bool utf_isValidDchar(dchar_t c) 553 { 554 // TODO: Whether non-char code points should be rejected is pending review 555 // largest character code point 556 if (c > 0x10FFFF) 557 return false; 558 // surrogate pairs 559 if (0xD800 <= c && c <= 0xDFFF) 560 return false; 561 // non-characters 562 if ((c & 0xFFFFFE) == 0x00FFFE) 563 return false; 564 return true; 565 } 566 567 /******************************* 568 * Return !=0 if unicode alpha. 569 * Use table from C99 Appendix D. 570 */ 571 extern (C++) bool isUniAlpha(dchar_t c) 572 { 573 size_t high = ALPHA_TABLE_LENGTH - 1; 574 // Shortcut search if c is out of range 575 size_t low = (c < ALPHA_TABLE[0][0] || ALPHA_TABLE[high][1] < c) ? high + 1 : 0; 576 // Binary search 577 while (low <= high) 578 { 579 size_t mid = (low + high) >> 1; 580 if (c < ALPHA_TABLE[mid][0]) 581 high = mid - 1; 582 else if (ALPHA_TABLE[mid][1] < c) 583 low = mid + 1; 584 else 585 { 586 assert(ALPHA_TABLE[mid][0] <= c && c <= ALPHA_TABLE[mid][1]); 587 return true; 588 } 589 } 590 return false; 591 } 592 593 /** 594 * Returns the code length of c in code units. 595 */ 596 extern (C++) int utf_codeLengthChar(dchar_t c) 597 { 598 return c <= 0x7F ? 1 : c <= 0x7FF ? 2 : c <= 0xFFFF ? 3 : c <= 0x10FFFF ? 4 : (assert(false), 6); 599 } 600 601 extern (C++) int utf_codeLengthWchar(dchar_t c) 602 { 603 return c <= 0xFFFF ? 1 : 2; 604 } 605 606 /** 607 * Returns the code length of c in code units for the encoding. 608 * sz is the encoding: 1 = utf8, 2 = utf16, 4 = utf32. 609 */ 610 extern (C++) int utf_codeLength(int sz, dchar_t c) 611 { 612 if (sz == 1) 613 return utf_codeLengthChar(c); 614 if (sz == 2) 615 return utf_codeLengthWchar(c); 616 assert(sz == 4); 617 return 1; 618 } 619 620 extern (C++) void utf_encodeChar(char* s, dchar_t c) 621 { 622 assert(s !is null); 623 assert(utf_isValidDchar(c)); 624 if (c <= 0x7F) 625 { 626 s[0] = cast(char)c; 627 } 628 else if (c <= 0x07FF) 629 { 630 s[0] = cast(char)(0xC0 | (c >> 6)); 631 s[1] = cast(char)(0x80 | (c & 0x3F)); 632 } 633 else if (c <= 0xFFFF) 634 { 635 s[0] = cast(char)(0xE0 | (c >> 12)); 636 s[1] = cast(char)(0x80 | ((c >> 6) & 0x3F)); 637 s[2] = cast(char)(0x80 | (c & 0x3F)); 638 } 639 else if (c <= 0x10FFFF) 640 { 641 s[0] = cast(char)(0xF0 | (c >> 18)); 642 s[1] = cast(char)(0x80 | ((c >> 12) & 0x3F)); 643 s[2] = cast(char)(0x80 | ((c >> 6) & 0x3F)); 644 s[3] = cast(char)(0x80 | (c & 0x3F)); 645 } 646 else 647 assert(0); 648 } 649 650 extern (C++) void utf_encodeWchar(utf16_t* s, dchar_t c) 651 { 652 assert(s !is null); 653 assert(utf_isValidDchar(c)); 654 if (c <= 0xFFFF) 655 { 656 s[0] = cast(utf16_t)c; 657 } 658 else 659 { 660 s[0] = cast(utf16_t)((((c - 0x010000) >> 10) & 0x03FF) + 0xD800); 661 s[1] = cast(utf16_t)(((c - 0x010000) & 0x03FF) + 0xDC00); 662 } 663 } 664 665 extern (C++) void utf_encode(int sz, void* s, dchar_t c) 666 { 667 if (sz == 1) 668 utf_encodeChar(cast(char*)s, c); 669 else if (sz == 2) 670 utf_encodeWchar(cast(utf16_t*)s, c); 671 else 672 { 673 assert(sz == 4); 674 *(cast(utf32_t*)s) = c; 675 } 676 } 677 678 /******************************************** 679 * Decode a UTF-8 sequence as a single UTF-32 code point. 680 * Returns: 681 * NULL success 682 * !=NULL error message string 683 */ 684 extern (C++) const(char)* utf_decodeChar(const(char)* s, size_t len, size_t* pidx, dchar_t* presult) 685 { 686 assert(s !is null); 687 assert(pidx !is null); 688 assert(presult !is null); 689 size_t i = (*pidx)++; 690 assert(i < len); 691 char u = s[i]; 692 // Pre-stage results for ASCII and error cases 693 *presult = u; 694 //printf("utf_decodeChar(s = %02x, %02x, %02x len = %d)\n", u, s[1], s[2], len); 695 // Get expected sequence length 696 size_t n = UTF8_STRIDE[u]; 697 switch (n) 698 { 699 case 1: 700 // ASCII 701 return UTF8_DECODE_OK; 702 case 2: 703 case 3: 704 case 4: 705 // multi-byte UTF-8 706 break; 707 default: 708 // 5- or 6-byte sequence 709 return UTF8_DECODE_OUTSIDE_CODE_SPACE; 710 } 711 if (len < i + n) // source too short 712 return UTF8_DECODE_TRUNCATED_SEQUENCE; 713 // Pick off 7 - n low bits from first code unit 714 utf32_t c = u & ((1 << (7 - n)) - 1); 715 /* The following combinations are overlong, and illegal: 716 * 1100000x (10xxxxxx) 717 * 11100000 100xxxxx (10xxxxxx) 718 * 11110000 1000xxxx (10xxxxxx 10xxxxxx) 719 * 11111000 10000xxx (10xxxxxx 10xxxxxx 10xxxxxx) 720 * 11111100 100000xx (10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx) 721 */ 722 char u2 = s[++i]; 723 // overlong combination 724 if ((u & 0xFE) == 0xC0 || (u == 0xE0 && (u2 & 0xE0) == 0x80) || (u == 0xF0 && (u2 & 0xF0) == 0x80) || (u == 0xF8 && (u2 & 0xF8) == 0x80) || (u == 0xFC && (u2 & 0xFC) == 0x80)) 725 return UTF8_DECODE_OVERLONG; 726 // Decode remaining bits 727 for (n += i - 1; i != n; ++i) 728 { 729 u = s[i]; 730 if ((u & 0xC0) != 0x80) // trailing bytes are 10xxxxxx 731 return UTF8_DECODE_INVALID_TRAILER; 732 c = (c << 6) | (u & 0x3F); 733 } 734 if (!utf_isValidDchar(c)) 735 return UTF8_DECODE_INVALID_CODE_POINT; 736 *pidx = i; 737 *presult = c; 738 return UTF8_DECODE_OK; 739 } 740 741 /******************************************** 742 * Decode a UTF-16 sequence as a single UTF-32 code point. 743 * Returns: 744 * NULL success 745 * !=NULL error message string 746 */ 747 extern (C++) const(char)* utf_decodeWchar(const(utf16_t)* s, size_t len, size_t* pidx, dchar_t* presult) 748 { 749 assert(s !is null); 750 assert(pidx !is null); 751 assert(presult !is null); 752 size_t i = (*pidx)++; 753 assert(i < len); 754 // Pre-stage results for ASCII and error cases 755 utf32_t u = *presult = s[i]; 756 if (u < 0x80) // ASCII 757 return UTF16_DECODE_OK; 758 if (0xD800 <= u && u <= 0xDBFF) // Surrogate pair 759 { 760 if (len <= i + 1) 761 return UTF16_DECODE_TRUNCATED_SEQUENCE; 762 utf16_t u2 = s[i + 1]; 763 if (u2 < 0xDC00 || 0xDFFF < u) 764 return UTF16_DECODE_INVALID_SURROGATE; 765 u = ((u - 0xD7C0) << 10) + (u2 - 0xDC00); 766 ++*pidx; 767 } 768 else if (0xDC00 <= u && u <= 0xDFFF) 769 return UTF16_DECODE_UNPAIRED_SURROGATE; 770 if (!utf_isValidDchar(u)) 771 return UTF16_DECODE_INVALID_CODE_POINT; 772 *presult = u; 773 return UTF16_DECODE_OK; 774 }