1 // Compiler implementation of the D programming language
2 // Copyright (c) 1999-2015 by Digital Mars
3 // All Rights Reserved
4 // written by Walter Bright
5 // http://www.digitalmars.com
6 // Distributed under the Boost Software License, Version 1.0.
7 // http://www.boost.org/LICENSE_1_0.txt
8 
9 module ddmd.lexer;
10 
11 import core.stdc.ctype, core.stdc.errno, core.stdc.stdarg, core.stdc.stdio, core.stdc..string, core.stdc.time;
12 import ddmd.entity, ddmd.errors, ddmd.globals, ddmd.id, ddmd.identifier, ddmd.root.longdouble, ddmd.root.outbuffer, ddmd.root.port, ddmd.root.rmem, ddmd.root.stringtable, ddmd.tokens, ddmd.utf;
13 
14 enum LS = 0x2028;
15 // UTF line separator
16 enum PS = 0x2029;
17 /********************************************
18  * Do our own char maps
19  */
20 extern (C++) __gshared ubyte[256] cmtable;
21 extern (C++) __gshared const(int) CMoctal = 0x1;
22 extern (C++) __gshared const(int) CMhex = 0x2;
23 extern (C++) __gshared const(int) CMidchar = 0x4;
24 
25 extern (C++) bool isoctal(char c)
26 {
27     return (cmtable[c] & CMoctal) != 0;
28 }
29 
30 extern (C++) bool ishex(char c)
31 {
32     return (cmtable[c] & CMhex) != 0;
33 }
34 
35 extern (C++) bool isidchar(char c)
36 {
37     return (cmtable[c] & CMidchar) != 0;
38 }
39 
40 extern (C++) static void cmtable_init()
41 {
42     for (uint c = 0; c < 256; c++)
43     {
44         if ('0' <= c && c <= '7')
45             cmtable[c] |= CMoctal;
46         if (isxdigit(c))
47             cmtable[c] |= CMhex;
48         if (isalnum(c) || c == '_')
49             cmtable[c] |= CMidchar;
50     }
51 }
52 
53 version (unittest)
54 {
55     extern (C++) void unittest_lexer()
56     {
57         //printf("unittest_lexer()\n");
58         /* Not much here, just trying things out.
59          */
60         const(char)* text = "int";
61         scope Lexer lex1 = new Lexer(null, cast(char*)text, 0, text.sizeof, 0, 0);
62         TOK tok;
63         tok = lex1.nextToken();
64         //printf("tok == %s, %d, %d\n", Token::toChars(tok), tok, TOKint32);
65         assert(tok == TOKint32);
66         tok = lex1.nextToken();
67         assert(tok == TOKeof);
68         tok = lex1.nextToken();
69         assert(tok == TOKeof);
70     }
71 }
72 
73 extern (C++) class Lexer
74 {
75 public:
76     /*************************** Lexer ********************************************/
77     extern (C++) static __gshared OutBuffer stringbuffer;
78     Loc scanloc; // for error messages
79     const(char)* base; // pointer to start of buffer
80     const(char)* end; // past end of buffer
81     const(char)* p; // current character
82     const(char)* line; // start of current line
83     Token token;
84     int doDocComment; // collect doc comment information
85     int anyToken; // !=0 means seen at least one token
86     int commentToken; // !=0 means comments are TOKcomment's
87     bool errors; // errors occurred during lexing or parsing
88 
89     final extern (D) this(const(char)* filename, const(char)* base, size_t begoffset, size_t endoffset, int doDocComment, int commentToken)
90     {
91         scanloc = Loc(filename, 1, 1);
92         //printf("Lexer::Lexer(%p,%d)\n",base,length);
93         //printf("lexer.filename = %s\n", filename);
94         memset(&token, 0, token.sizeof);
95         this.base = base;
96         this.end = base + endoffset;
97         p = base + begoffset;
98         line = p;
99         this.doDocComment = doDocComment;
100         this.anyToken = 0;
101         this.commentToken = commentToken;
102         this.errors = false;
103         //initKeywords();
104         /* If first line starts with '#!', ignore the line
105          */
106         if (p[0] == '#' && p[1] == '!')
107         {
108             p += 2;
109             while (1)
110             {
111                 char c = *p;
112                 switch (c)
113                 {
114                 case '\n':
115                     p++;
116                     break;
117                 case '\r':
118                     p++;
119                     if (*p == '\n')
120                         p++;
121                     break;
122                 case 0:
123                 case 0x1A:
124                     break;
125                 default:
126                     if (c & 0x80)
127                     {
128                         uint u = decodeUTF();
129                         if (u == PS || u == LS)
130                             break;
131                     }
132                     p++;
133                     continue;
134                 }
135                 break;
136             }
137             endOfLine();
138         }
139     }
140 
141     final static void initLexer()
142     {
143         cmtable_init();
144         Identifier.initTable();
145         Token.initTokens();
146         version (unittest)
147         {
148             unittest_lexer();
149         }
150     }
151 
152     final TOK nextToken()
153     {
154         if (token.next)
155         {
156             Token* t = token.next;
157             memcpy(&token, t, Token.sizeof);
158             t.free();
159         }
160         else
161         {
162             scan(&token);
163         }
164         //token.print();
165         return token.value;
166     }
167 
168     /***********************
169      * Look ahead at next token's value.
170      */
171     final TOK peekNext()
172     {
173         return peek(&token).value;
174     }
175 
176     /***********************
177      * Look 2 tokens ahead at value.
178      */
179     final TOK peekNext2()
180     {
181         Token* t = peek(&token);
182         return peek(t).value;
183     }
184 
185     /****************************
186      * Turn next token in buffer into a token.
187      */
188     final void scan(Token* t)
189     {
190         uint lastLine = scanloc.linnum;
191         Loc startLoc;
192         t.blockComment = null;
193         t.lineComment = null;
194         while (1)
195         {
196             t.ptr = p;
197             //printf("p = %p, *p = '%c'\n",p,*p);
198             t.loc = loc();
199             switch (*p)
200             {
201             case 0:
202             case 0x1A:
203                 t.value = TOKeof; // end of file
204                 return;
205             case ' ':
206             case '\t':
207             case '\v':
208             case '\f':
209                 p++;
210                 continue;
211                 // skip white space
212             case '\r':
213                 p++;
214                 if (*p != '\n') // if CR stands by itself
215                     endOfLine();
216                 continue;
217                 // skip white space
218             case '\n':
219                 p++;
220                 endOfLine();
221                 continue;
222                 // skip white space
223             case '0':
224             case '1':
225             case '2':
226             case '3':
227             case '4':
228             case '5':
229             case '6':
230             case '7':
231             case '8':
232             case '9':
233                 t.value = number(t);
234                 return;
235             case '\'':
236                 t.value = charConstant(t, 0);
237                 return;
238             case 'r':
239                 if (p[1] != '"')
240                     goto case_ident;
241                 p++;
242             case '`':
243                 t.value = wysiwygStringConstant(t, *p);
244                 return;
245             case 'x':
246                 if (p[1] != '"')
247                     goto case_ident;
248                 p++;
249                 t.value = hexStringConstant(t);
250                 return;
251             case 'q':
252                 if (p[1] == '"')
253                 {
254                     p++;
255                     t.value = delimitedStringConstant(t);
256                     return;
257                 }
258                 else if (p[1] == '{')
259                 {
260                     p++;
261                     t.value = tokenStringConstant(t);
262                     return;
263                 }
264                 else
265                     goto case_ident;
266             case '"':
267                 t.value = escapeStringConstant(t, 0);
268                 return;
269             case 'a':
270             case 'b':
271             case 'c':
272             case 'd':
273             case 'e':
274             case 'f':
275             case 'g':
276             case 'h':
277             case 'i':
278             case 'j':
279             case 'k':
280             case 'l':
281             case 'm':
282             case 'n':
283             case 'o':
284             case 'p':
285                 /*case 'q': case 'r':*/
286             case 's':
287             case 't':
288             case 'u':
289             case 'v':
290             case 'w':
291                 /*case 'x':*/
292             case 'y':
293             case 'z':
294             case 'A':
295             case 'B':
296             case 'C':
297             case 'D':
298             case 'E':
299             case 'F':
300             case 'G':
301             case 'H':
302             case 'I':
303             case 'J':
304             case 'K':
305             case 'L':
306             case 'M':
307             case 'N':
308             case 'O':
309             case 'P':
310             case 'Q':
311             case 'R':
312             case 'S':
313             case 'T':
314             case 'U':
315             case 'V':
316             case 'W':
317             case 'X':
318             case 'Y':
319             case 'Z':
320             case '_':
321             case_ident:
322                 {
323                     char c;
324                     while (1)
325                     {
326                         c = *++p;
327                         if (isidchar(c))
328                             continue;
329                         else if (c & 0x80)
330                         {
331                             const(char)* s = p;
332                             uint u = decodeUTF();
333                             if (isUniAlpha(u))
334                                 continue;
335                             error("char 0x%04x not allowed in identifier", u);
336                             p = s;
337                         }
338                         break;
339                     }
340                     Identifier id = Identifier.idPool(cast(char*)t.ptr, p - t.ptr);
341                     t.ident = id;
342                     t.value = cast(TOK)id.value;
343                     anyToken = 1;
344                     if (*t.ptr == '_') // if special identifier token
345                     {
346                         static __gshared bool initdone = false;
347                         static __gshared char[11 + 1] date;
348                         static __gshared char[8 + 1] time;
349                         static __gshared char[24 + 1] timestamp;
350                         if (!initdone) // lazy evaluation
351                         {
352                             initdone = true;
353                             time_t ct;
354                             .time(&ct);
355                             char* p = ctime(&ct);
356                             assert(p);
357                             sprintf(&date[0], "%.6s %.4s", p + 4, p + 20);
358                             sprintf(&time[0], "%.8s", p + 11);
359                             sprintf(&timestamp[0], "%.24s", p);
360                         }
361                         if (id == Id.DATE)
362                         {
363                             t.ustring = cast(char*)date;
364                             goto Lstr;
365                         }
366                         else if (id == Id.TIME)
367                         {
368                             t.ustring = cast(char*)time;
369                             goto Lstr;
370                         }
371                         else if (id == Id.VENDOR)
372                         {
373                             t.ustring = cast(char*)global.compiler.vendor;
374                             goto Lstr;
375                         }
376                         else if (id == Id.TIMESTAMP)
377                         {
378                             t.ustring = cast(char*)timestamp;
379                         Lstr:
380                             t.value = TOKstring;
381                             t.postfix = 0;
382                             t.len = cast(uint)strlen(cast(char*)t.ustring);
383                         }
384                         else if (id == Id.VERSIONX)
385                         {
386                             uint major = 0;
387                             uint minor = 0;
388                             bool point = false;
389                             for (const(char)* p = global._version + 1; 1; p++)
390                             {
391                                 c = *p;
392                                 if (isdigit(cast(char)c))
393                                     minor = minor * 10 + c - '0';
394                                 else if (c == '.')
395                                 {
396                                     if (point)
397                                         break;
398                                     // ignore everything after second '.'
399                                     point = true;
400                                     major = minor;
401                                     minor = 0;
402                                 }
403                                 else
404                                     break;
405                             }
406                             t.value = TOKint64v;
407                             t.uns64value = major * 1000 + minor;
408                         }
409                         else if (id == Id.EOFX)
410                         {
411                             t.value = TOKeof;
412                             // Advance scanner to end of file
413                             while (!(*p == 0 || *p == 0x1A))
414                                 p++;
415                         }
416                     }
417                     //printf("t->value = %d\n",t->value);
418                     return;
419                 }
420             case '/':
421                 p++;
422                 switch (*p)
423                 {
424                 case '=':
425                     p++;
426                     t.value = TOKdivass;
427                     return;
428                 case '*':
429                     p++;
430                     startLoc = loc();
431                     while (1)
432                     {
433                         while (1)
434                         {
435                             char c = *p;
436                             switch (c)
437                             {
438                             case '/':
439                                 break;
440                             case '\n':
441                                 endOfLine();
442                                 p++;
443                                 continue;
444                             case '\r':
445                                 p++;
446                                 if (*p != '\n')
447                                     endOfLine();
448                                 continue;
449                             case 0:
450                             case 0x1A:
451                                 error("unterminated /* */ comment");
452                                 p = end;
453                                 t.loc = loc();
454                                 t.value = TOKeof;
455                                 return;
456                             default:
457                                 if (c & 0x80)
458                                 {
459                                     uint u = decodeUTF();
460                                     if (u == PS || u == LS)
461                                         endOfLine();
462                                 }
463                                 p++;
464                                 continue;
465                             }
466                             break;
467                         }
468                         p++;
469                         if (p[-2] == '*' && p - 3 != t.ptr)
470                             break;
471                     }
472                     if (commentToken)
473                     {
474                         t.loc = startLoc;
475                         t.value = TOKcomment;
476                         return;
477                     }
478                     else if (doDocComment && t.ptr[2] == '*' && p - 4 != t.ptr)
479                     {
480                         // if /** but not /**/
481                         getDocComment(t, lastLine == startLoc.linnum);
482                     }
483                     continue;
484                 case '/':
485                     // do // style comments
486                     startLoc = loc();
487                     while (1)
488                     {
489                         char c = *++p;
490                         switch (c)
491                         {
492                         case '\n':
493                             break;
494                         case '\r':
495                             if (p[1] == '\n')
496                                 p++;
497                             break;
498                         case 0:
499                         case 0x1A:
500                             if (commentToken)
501                             {
502                                 p = end;
503                                 t.loc = startLoc;
504                                 t.value = TOKcomment;
505                                 return;
506                             }
507                             if (doDocComment && t.ptr[2] == '/')
508                                 getDocComment(t, lastLine == startLoc.linnum);
509                             p = end;
510                             t.loc = loc();
511                             t.value = TOKeof;
512                             return;
513                         default:
514                             if (c & 0x80)
515                             {
516                                 uint u = decodeUTF();
517                                 if (u == PS || u == LS)
518                                     break;
519                             }
520                             continue;
521                         }
522                         break;
523                     }
524                     if (commentToken)
525                     {
526                         p++;
527                         endOfLine();
528                         t.loc = startLoc;
529                         t.value = TOKcomment;
530                         return;
531                     }
532                     if (doDocComment && t.ptr[2] == '/')
533                         getDocComment(t, lastLine == startLoc.linnum);
534                     p++;
535                     endOfLine();
536                     continue;
537                 case '+':
538                     {
539                         int nest;
540                         startLoc = loc();
541                         p++;
542                         nest = 1;
543                         while (1)
544                         {
545                             char c = *p;
546                             switch (c)
547                             {
548                             case '/':
549                                 p++;
550                                 if (*p == '+')
551                                 {
552                                     p++;
553                                     nest++;
554                                 }
555                                 continue;
556                             case '+':
557                                 p++;
558                                 if (*p == '/')
559                                 {
560                                     p++;
561                                     if (--nest == 0)
562                                         break;
563                                 }
564                                 continue;
565                             case '\r':
566                                 p++;
567                                 if (*p != '\n')
568                                     endOfLine();
569                                 continue;
570                             case '\n':
571                                 endOfLine();
572                                 p++;
573                                 continue;
574                             case 0:
575                             case 0x1A:
576                                 error("unterminated /+ +/ comment");
577                                 p = end;
578                                 t.loc = loc();
579                                 t.value = TOKeof;
580                                 return;
581                             default:
582                                 if (c & 0x80)
583                                 {
584                                     uint u = decodeUTF();
585                                     if (u == PS || u == LS)
586                                         endOfLine();
587                                 }
588                                 p++;
589                                 continue;
590                             }
591                             break;
592                         }
593                         if (commentToken)
594                         {
595                             t.loc = startLoc;
596                             t.value = TOKcomment;
597                             return;
598                         }
599                         if (doDocComment && t.ptr[2] == '+' && p - 4 != t.ptr)
600                         {
601                             // if /++ but not /++/
602                             getDocComment(t, lastLine == startLoc.linnum);
603                         }
604                         continue;
605                     }
606                 default:
607                     break;
608                 }
609                 t.value = TOKdiv;
610                 return;
611             case '.':
612                 p++;
613                 if (isdigit(*p))
614                 {
615                     /* Note that we don't allow ._1 and ._ as being
616                      * valid floating point numbers.
617                      */
618                     p--;
619                     t.value = inreal(t);
620                 }
621                 else if (p[0] == '.')
622                 {
623                     if (p[1] == '.')
624                     {
625                         p += 2;
626                         t.value = TOKdotdotdot;
627                     }
628                     else
629                     {
630                         p++;
631                         t.value = TOKslice;
632                     }
633                 }
634                 else
635                     t.value = TOKdot;
636                 return;
637             case '&':
638                 p++;
639                 if (*p == '=')
640                 {
641                     p++;
642                     t.value = TOKandass;
643                 }
644                 else if (*p == '&')
645                 {
646                     p++;
647                     t.value = TOKandand;
648                 }
649                 else
650                     t.value = TOKand;
651                 return;
652             case '|':
653                 p++;
654                 if (*p == '=')
655                 {
656                     p++;
657                     t.value = TOKorass;
658                 }
659                 else if (*p == '|')
660                 {
661                     p++;
662                     t.value = TOKoror;
663                 }
664                 else
665                     t.value = TOKor;
666                 return;
667             case '-':
668                 p++;
669                 if (*p == '=')
670                 {
671                     p++;
672                     t.value = TOKminass;
673                 }
674                 else if (*p == '-')
675                 {
676                     p++;
677                     t.value = TOKminusminus;
678                 }
679                 else
680                     t.value = TOKmin;
681                 return;
682             case '+':
683                 p++;
684                 if (*p == '=')
685                 {
686                     p++;
687                     t.value = TOKaddass;
688                 }
689                 else if (*p == '+')
690                 {
691                     p++;
692                     t.value = TOKplusplus;
693                 }
694                 else
695                     t.value = TOKadd;
696                 return;
697             case '<':
698                 p++;
699                 if (*p == '=')
700                 {
701                     p++;
702                     t.value = TOKle; // <=
703                 }
704                 else if (*p == '<')
705                 {
706                     p++;
707                     if (*p == '=')
708                     {
709                         p++;
710                         t.value = TOKshlass; // <<=
711                     }
712                     else
713                         t.value = TOKshl; // <<
714                 }
715                 else if (*p == '>')
716                 {
717                     p++;
718                     if (*p == '=')
719                     {
720                         p++;
721                         t.value = TOKleg; // <>=
722                     }
723                     else
724                         t.value = TOKlg; // <>
725                 }
726                 else
727                     t.value = TOKlt; // <
728                 return;
729             case '>':
730                 p++;
731                 if (*p == '=')
732                 {
733                     p++;
734                     t.value = TOKge; // >=
735                 }
736                 else if (*p == '>')
737                 {
738                     p++;
739                     if (*p == '=')
740                     {
741                         p++;
742                         t.value = TOKshrass; // >>=
743                     }
744                     else if (*p == '>')
745                     {
746                         p++;
747                         if (*p == '=')
748                         {
749                             p++;
750                             t.value = TOKushrass; // >>>=
751                         }
752                         else
753                             t.value = TOKushr; // >>>
754                     }
755                     else
756                         t.value = TOKshr; // >>
757                 }
758                 else
759                     t.value = TOKgt; // >
760                 return;
761             case '!':
762                 p++;
763                 if (*p == '=')
764                 {
765                     p++;
766                     t.value = TOKnotequal; // !=
767                 }
768                 else if (*p == '<')
769                 {
770                     p++;
771                     if (*p == '>')
772                     {
773                         p++;
774                         if (*p == '=')
775                         {
776                             p++;
777                             t.value = TOKunord; // !<>=
778                         }
779                         else
780                             t.value = TOKue; // !<>
781                     }
782                     else if (*p == '=')
783                     {
784                         p++;
785                         t.value = TOKug; // !<=
786                     }
787                     else
788                         t.value = TOKuge; // !<
789                 }
790                 else if (*p == '>')
791                 {
792                     p++;
793                     if (*p == '=')
794                     {
795                         p++;
796                         t.value = TOKul; // !>=
797                     }
798                     else
799                         t.value = TOKule; // !>
800                 }
801                 else
802                     t.value = TOKnot; // !
803                 return;
804             case '=':
805                 p++;
806                 if (*p == '=')
807                 {
808                     p++;
809                     t.value = TOKequal; // ==
810                 }
811                 else if (*p == '>')
812                 {
813                     p++;
814                     t.value = TOKgoesto; // =>
815                 }
816                 else
817                     t.value = TOKassign; // =
818                 return;
819             case '~':
820                 p++;
821                 if (*p == '=')
822                 {
823                     p++;
824                     t.value = TOKcatass; // ~=
825                 }
826                 else
827                     t.value = TOKtilde; // ~
828                 return;
829             case '^':
830                 p++;
831                 if (*p == '^')
832                 {
833                     p++;
834                     if (*p == '=')
835                     {
836                         p++;
837                         t.value = TOKpowass; // ^^=
838                     }
839                     else
840                         t.value = TOKpow; // ^^
841                 }
842                 else if (*p == '=')
843                 {
844                     p++;
845                     t.value = TOKxorass; // ^=
846                 }
847                 else
848                     t.value = TOKxor; // ^
849                 return;
850             case '(':
851                 p++;
852                 t.value = TOKlparen;
853                 return;
854             case ')':
855                 p++;
856                 t.value = TOKrparen;
857                 return;
858             case '[':
859                 p++;
860                 t.value = TOKlbracket;
861                 return;
862             case ']':
863                 p++;
864                 t.value = TOKrbracket;
865                 return;
866             case '{':
867                 p++;
868                 t.value = TOKlcurly;
869                 return;
870             case '}':
871                 p++;
872                 t.value = TOKrcurly;
873                 return;
874             case '?':
875                 p++;
876                 t.value = TOKquestion;
877                 return;
878             case ',':
879                 p++;
880                 t.value = TOKcomma;
881                 return;
882             case ';':
883                 p++;
884                 t.value = TOKsemicolon;
885                 return;
886             case ':':
887                 p++;
888                 t.value = TOKcolon;
889                 return;
890             case '$':
891                 p++;
892                 t.value = TOKdollar;
893                 return;
894             case '@':
895                 p++;
896                 t.value = TOKat;
897                 return;
898             case '*':
899                 p++;
900                 if (*p == '=')
901                 {
902                     p++;
903                     t.value = TOKmulass;
904                 }
905                 else
906                     t.value = TOKmul;
907                 return;
908             case '%':
909                 p++;
910                 if (*p == '=')
911                 {
912                     p++;
913                     t.value = TOKmodass;
914                 }
915                 else
916                     t.value = TOKmod;
917                 return;
918             case '#':
919                 {
920                     p++;
921                     Token n;
922                     scan(&n);
923                     if (n.value == TOKidentifier && n.ident == Id.line)
924                     {
925                         poundLine();
926                         continue;
927                     }
928                     else
929                     {
930                         t.value = TOKpound;
931                         return;
932                     }
933                 }
934             default:
935                 {
936                     uint c = *p;
937                     if (c & 0x80)
938                     {
939                         c = decodeUTF();
940                         // Check for start of unicode identifier
941                         if (isUniAlpha(c))
942                             goto case_ident;
943                         if (c == PS || c == LS)
944                         {
945                             endOfLine();
946                             p++;
947                             continue;
948                         }
949                     }
950                     if (c < 0x80 && isprint(c))
951                         error("character '%c' is not a valid token", c);
952                     else
953                         error("character 0x%02x is not a valid token", c);
954                     p++;
955                     continue;
956                 }
957             }
958         }
959     }
960 
961     final Token* peek(Token* ct)
962     {
963         Token* t;
964         if (ct.next)
965             t = ct.next;
966         else
967         {
968             t = Token.alloc();
969             scan(t);
970             ct.next = t;
971         }
972         return t;
973     }
974 
975     /*********************************
976      * tk is on the opening (.
977      * Look ahead and return token that is past the closing ).
978      */
979     final Token* peekPastParen(Token* tk)
980     {
981         //printf("peekPastParen()\n");
982         int parens = 1;
983         int curlynest = 0;
984         while (1)
985         {
986             tk = peek(tk);
987             //tk->print();
988             switch (tk.value)
989             {
990             case TOKlparen:
991                 parens++;
992                 continue;
993             case TOKrparen:
994                 --parens;
995                 if (parens)
996                     continue;
997                 tk = peek(tk);
998                 break;
999             case TOKlcurly:
1000                 curlynest++;
1001                 continue;
1002             case TOKrcurly:
1003                 if (--curlynest >= 0)
1004                     continue;
1005                 break;
1006             case TOKsemicolon:
1007                 if (curlynest)
1008                     continue;
1009                 break;
1010             case TOKeof:
1011                 break;
1012             default:
1013                 continue;
1014             }
1015             return tk;
1016         }
1017     }
1018 
1019     /*******************************************
1020      * Parse escape sequence.
1021      */
1022     final uint escapeSequence()
1023     {
1024         uint c = *p;
1025         int n;
1026         int ndigits;
1027         switch (c)
1028         {
1029         case '\'':
1030         case '"':
1031         case '?':
1032         case '\\':
1033         Lconsume:
1034             p++;
1035             break;
1036         case 'a':
1037             c = 7;
1038             goto Lconsume;
1039         case 'b':
1040             c = 8;
1041             goto Lconsume;
1042         case 'f':
1043             c = 12;
1044             goto Lconsume;
1045         case 'n':
1046             c = 10;
1047             goto Lconsume;
1048         case 'r':
1049             c = 13;
1050             goto Lconsume;
1051         case 't':
1052             c = 9;
1053             goto Lconsume;
1054         case 'v':
1055             c = 11;
1056             goto Lconsume;
1057         case 'u':
1058             ndigits = 4;
1059             goto Lhex;
1060         case 'U':
1061             ndigits = 8;
1062             goto Lhex;
1063         case 'x':
1064             ndigits = 2;
1065         Lhex:
1066             p++;
1067             c = *p;
1068             if (ishex(cast(char)c))
1069             {
1070                 uint v;
1071                 n = 0;
1072                 v = 0;
1073                 while (1)
1074                 {
1075                     if (isdigit(cast(char)c))
1076                         c -= '0';
1077                     else if (islower(c))
1078                         c -= 'a' - 10;
1079                     else
1080                         c -= 'A' - 10;
1081                     v = v * 16 + c;
1082                     c = *++p;
1083                     if (++n == ndigits)
1084                         break;
1085                     if (!ishex(cast(char)c))
1086                     {
1087                         error("escape hex sequence has %d hex digits instead of %d", n, ndigits);
1088                         break;
1089                     }
1090                 }
1091                 if (ndigits != 2 && !utf_isValidDchar(v))
1092                 {
1093                     error("invalid UTF character \\U%08x", v);
1094                     v = '?'; // recover with valid UTF character
1095                 }
1096                 c = v;
1097             }
1098             else
1099                 error("undefined escape hex sequence \\%c", c);
1100             break;
1101         case '&':
1102             // named character entity
1103             for (const(char)* idstart = ++p; 1; p++)
1104             {
1105                 switch (*p)
1106                 {
1107                 case ';':
1108                     c = HtmlNamedEntity(idstart, p - idstart);
1109                     if (c == ~0)
1110                     {
1111                         error("unnamed character entity &%.*s;", cast(int)(p - idstart), idstart);
1112                         c = ' ';
1113                     }
1114                     p++;
1115                     break;
1116                 default:
1117                     if (isalpha(*p) || (p != idstart && isdigit(*p)))
1118                         continue;
1119                     error("unterminated named entity &%.*s;", cast(int)(p - idstart + 1), idstart);
1120                     break;
1121                 }
1122                 break;
1123             }
1124             break;
1125         case 0:
1126         case 0x1A:
1127             // end of file
1128             c = '\\';
1129             break;
1130         default:
1131             if (isoctal(cast(char)c))
1132             {
1133                 uint v;
1134                 n = 0;
1135                 v = 0;
1136                 do
1137                 {
1138                     v = v * 8 + (c - '0');
1139                     c = *++p;
1140                 }
1141                 while (++n < 3 && isoctal(cast(char)c));
1142                 c = v;
1143                 if (c > 0xFF)
1144                     error("escape octal sequence \\%03o is larger than \\377", c);
1145             }
1146             else
1147                 error("undefined escape sequence \\%c", c);
1148             break;
1149         }
1150         return c;
1151     }
1152 
1153     /**************************************
1154      */
1155     final TOK wysiwygStringConstant(Token* t, int tc)
1156     {
1157         uint c;
1158         Loc start = loc();
1159         p++;
1160         stringbuffer.reset();
1161         while (1)
1162         {
1163             c = *p++;
1164             switch (c)
1165             {
1166             case '\n':
1167                 endOfLine();
1168                 break;
1169             case '\r':
1170                 if (*p == '\n')
1171                     continue;
1172                 // ignore
1173                 c = '\n'; // treat EndOfLine as \n character
1174                 endOfLine();
1175                 break;
1176             case 0:
1177             case 0x1A:
1178                 error("unterminated string constant starting at %s", start.toChars());
1179                 t.ustring = cast(char*)"";
1180                 t.len = 0;
1181                 t.postfix = 0;
1182                 return TOKstring;
1183             case '"':
1184             case '`':
1185                 if (c == tc)
1186                 {
1187                     t.len = cast(uint)stringbuffer.offset;
1188                     stringbuffer.writeByte(0);
1189                     t.ustring = cast(char*)mem.xmalloc(stringbuffer.offset);
1190                     memcpy(t.ustring, stringbuffer.data, stringbuffer.offset);
1191                     stringPostfix(t);
1192                     return TOKstring;
1193                 }
1194                 break;
1195             default:
1196                 if (c & 0x80)
1197                 {
1198                     p--;
1199                     uint u = decodeUTF();
1200                     p++;
1201                     if (u == PS || u == LS)
1202                         endOfLine();
1203                     stringbuffer.writeUTF8(u);
1204                     continue;
1205                 }
1206                 break;
1207             }
1208             stringbuffer.writeByte(c);
1209         }
1210     }
1211 
1212     /**************************************
1213      * Lex hex strings:
1214      *      x"0A ae 34FE BD"
1215      */
1216     final TOK hexStringConstant(Token* t)
1217     {
1218         uint c;
1219         Loc start = loc();
1220         uint n = 0;
1221         uint v = ~0; // dead assignment, needed to suppress warning
1222         p++;
1223         stringbuffer.reset();
1224         while (1)
1225         {
1226             c = *p++;
1227             switch (c)
1228             {
1229             case ' ':
1230             case '\t':
1231             case '\v':
1232             case '\f':
1233                 continue;
1234                 // skip white space
1235             case '\r':
1236                 if (*p == '\n')
1237                     continue;
1238                 // ignore
1239                 // Treat isolated '\r' as if it were a '\n'
1240             case '\n':
1241                 endOfLine();
1242                 continue;
1243             case 0:
1244             case 0x1A:
1245                 error("unterminated string constant starting at %s", start.toChars());
1246                 t.ustring = cast(char*)"";
1247                 t.len = 0;
1248                 t.postfix = 0;
1249                 return TOKxstring;
1250             case '"':
1251                 if (n & 1)
1252                 {
1253                     error("odd number (%d) of hex characters in hex string", n);
1254                     stringbuffer.writeByte(v);
1255                 }
1256                 t.len = cast(uint)stringbuffer.offset;
1257                 stringbuffer.writeByte(0);
1258                 t.ustring = cast(char*)mem.xmalloc(stringbuffer.offset);
1259                 memcpy(t.ustring, stringbuffer.data, stringbuffer.offset);
1260                 stringPostfix(t);
1261                 return TOKxstring;
1262             default:
1263                 if (c >= '0' && c <= '9')
1264                     c -= '0';
1265                 else if (c >= 'a' && c <= 'f')
1266                     c -= 'a' - 10;
1267                 else if (c >= 'A' && c <= 'F')
1268                     c -= 'A' - 10;
1269                 else if (c & 0x80)
1270                 {
1271                     p--;
1272                     uint u = decodeUTF();
1273                     p++;
1274                     if (u == PS || u == LS)
1275                         endOfLine();
1276                     else
1277                         error("non-hex character \\u%04x in hex string", u);
1278                 }
1279                 else
1280                     error("non-hex character '%c' in hex string", c);
1281                 if (n & 1)
1282                 {
1283                     v = (v << 4) | c;
1284                     stringbuffer.writeByte(v);
1285                 }
1286                 else
1287                     v = c;
1288                 n++;
1289                 break;
1290             }
1291         }
1292     }
1293 
1294     /**************************************
1295      * Lex delimited strings:
1296      *      q"(foo(xxx))"   // "foo(xxx)"
1297      *      q"[foo(]"       // "foo("
1298      *      q"/foo]/"       // "foo]"
1299      *      q"HERE
1300      *      foo
1301      *      HERE"           // "foo\n"
1302      * Input:
1303      *      p is on the "
1304      */
1305     final TOK delimitedStringConstant(Token* t)
1306     {
1307         uint c;
1308         Loc start = loc();
1309         uint delimleft = 0;
1310         uint delimright = 0;
1311         uint nest = 1;
1312         uint nestcount = ~0; // dead assignment, needed to suppress warning
1313         Identifier hereid = null;
1314         uint blankrol = 0;
1315         uint startline = 0;
1316         p++;
1317         stringbuffer.reset();
1318         while (1)
1319         {
1320             c = *p++;
1321             //printf("c = '%c'\n", c);
1322             switch (c)
1323             {
1324             case '\n':
1325             Lnextline:
1326                 endOfLine();
1327                 startline = 1;
1328                 if (blankrol)
1329                 {
1330                     blankrol = 0;
1331                     continue;
1332                 }
1333                 if (hereid)
1334                 {
1335                     stringbuffer.writeUTF8(c);
1336                     continue;
1337                 }
1338                 break;
1339             case '\r':
1340                 if (*p == '\n')
1341                     continue;
1342                 // ignore
1343                 c = '\n'; // treat EndOfLine as \n character
1344                 goto Lnextline;
1345             case 0:
1346             case 0x1A:
1347                 error("unterminated delimited string constant starting at %s", start.toChars());
1348                 t.ustring = cast(char*)"";
1349                 t.len = 0;
1350                 t.postfix = 0;
1351                 return TOKstring;
1352             default:
1353                 if (c & 0x80)
1354                 {
1355                     p--;
1356                     c = decodeUTF();
1357                     p++;
1358                     if (c == PS || c == LS)
1359                         goto Lnextline;
1360                 }
1361                 break;
1362             }
1363             if (delimleft == 0)
1364             {
1365                 delimleft = c;
1366                 nest = 1;
1367                 nestcount = 1;
1368                 if (c == '(')
1369                     delimright = ')';
1370                 else if (c == '{')
1371                     delimright = '}';
1372                 else if (c == '[')
1373                     delimright = ']';
1374                 else if (c == '<')
1375                     delimright = '>';
1376                 else if (isalpha(c) || c == '_' || (c >= 0x80 && isUniAlpha(c)))
1377                 {
1378                     // Start of identifier; must be a heredoc
1379                     Token tok;
1380                     p--;
1381                     scan(&tok); // read in heredoc identifier
1382                     if (tok.value != TOKidentifier)
1383                     {
1384                         error("identifier expected for heredoc, not %s", tok.toChars());
1385                         delimright = c;
1386                     }
1387                     else
1388                     {
1389                         hereid = tok.ident;
1390                         //printf("hereid = '%s'\n", hereid->toChars());
1391                         blankrol = 1;
1392                     }
1393                     nest = 0;
1394                 }
1395                 else
1396                 {
1397                     delimright = c;
1398                     nest = 0;
1399                     if (isspace(c))
1400                         error("delimiter cannot be whitespace");
1401                 }
1402             }
1403             else
1404             {
1405                 if (blankrol)
1406                 {
1407                     error("heredoc rest of line should be blank");
1408                     blankrol = 0;
1409                     continue;
1410                 }
1411                 if (nest == 1)
1412                 {
1413                     if (c == delimleft)
1414                         nestcount++;
1415                     else if (c == delimright)
1416                     {
1417                         nestcount--;
1418                         if (nestcount == 0)
1419                             goto Ldone;
1420                     }
1421                 }
1422                 else if (c == delimright)
1423                     goto Ldone;
1424                 if (startline && isalpha(c) && hereid)
1425                 {
1426                     Token tok;
1427                     const(char)* psave = p;
1428                     p--;
1429                     scan(&tok); // read in possible heredoc identifier
1430                     //printf("endid = '%s'\n", tok.ident->toChars());
1431                     if (tok.value == TOKidentifier && tok.ident.equals(hereid))
1432                     {
1433                         /* should check that rest of line is blank
1434                          */
1435                         goto Ldone;
1436                     }
1437                     p = psave;
1438                 }
1439                 stringbuffer.writeUTF8(c);
1440                 startline = 0;
1441             }
1442         }
1443     Ldone:
1444         if (*p == '"')
1445             p++;
1446         else if (hereid)
1447             error("delimited string must end in %s\"", hereid.toChars());
1448         else
1449             error("delimited string must end in %c\"", delimright);
1450         t.len = cast(uint)stringbuffer.offset;
1451         stringbuffer.writeByte(0);
1452         t.ustring = cast(char*)mem.xmalloc(stringbuffer.offset);
1453         memcpy(t.ustring, stringbuffer.data, stringbuffer.offset);
1454         stringPostfix(t);
1455         return TOKstring;
1456     }
1457 
1458     /**************************************
1459      * Lex delimited strings:
1460      *      q{ foo(xxx) } // " foo(xxx) "
1461      *      q{foo(}       // "foo("
1462      *      q{{foo}"}"}   // "{foo}"}""
1463      * Input:
1464      *      p is on the q
1465      */
1466     final TOK tokenStringConstant(Token* t)
1467     {
1468         uint nest = 1;
1469         Loc start = loc();
1470         const(char)* pstart = ++p;
1471         while (1)
1472         {
1473             Token tok;
1474             scan(&tok);
1475             switch (tok.value)
1476             {
1477             case TOKlcurly:
1478                 nest++;
1479                 continue;
1480             case TOKrcurly:
1481                 if (--nest == 0)
1482                 {
1483                     t.len = cast(uint)(p - 1 - pstart);
1484                     t.ustring = cast(char*)mem.xmalloc(t.len + 1);
1485                     memcpy(t.ustring, pstart, t.len);
1486                     t.ustring[t.len] = 0;
1487                     stringPostfix(t);
1488                     return TOKstring;
1489                 }
1490                 continue;
1491             case TOKeof:
1492                 error("unterminated token string constant starting at %s", start.toChars());
1493                 t.ustring = cast(char*)"";
1494                 t.len = 0;
1495                 t.postfix = 0;
1496                 return TOKstring;
1497             default:
1498                 continue;
1499             }
1500         }
1501     }
1502 
1503     /**************************************
1504      */
1505     final TOK escapeStringConstant(Token* t, int wide)
1506     {
1507         uint c;
1508         Loc start = loc();
1509         p++;
1510         stringbuffer.reset();
1511         while (1)
1512         {
1513             c = *p++;
1514             switch (c)
1515             {
1516             case '\\':
1517                 switch (*p)
1518                 {
1519                 case 'u':
1520                 case 'U':
1521                 case '&':
1522                     c = escapeSequence();
1523                     stringbuffer.writeUTF8(c);
1524                     continue;
1525                 default:
1526                     c = escapeSequence();
1527                     break;
1528                 }
1529                 break;
1530             case '\n':
1531                 endOfLine();
1532                 break;
1533             case '\r':
1534                 if (*p == '\n')
1535                     continue;
1536                 // ignore
1537                 c = '\n'; // treat EndOfLine as \n character
1538                 endOfLine();
1539                 break;
1540             case '"':
1541                 t.len = cast(uint)stringbuffer.offset;
1542                 stringbuffer.writeByte(0);
1543                 t.ustring = cast(char*)mem.xmalloc(stringbuffer.offset);
1544                 memcpy(t.ustring, stringbuffer.data, stringbuffer.offset);
1545                 stringPostfix(t);
1546                 return TOKstring;
1547             case 0:
1548             case 0x1A:
1549                 p--;
1550                 error("unterminated string constant starting at %s", start.toChars());
1551                 t.ustring = cast(char*)"";
1552                 t.len = 0;
1553                 t.postfix = 0;
1554                 return TOKstring;
1555             default:
1556                 if (c & 0x80)
1557                 {
1558                     p--;
1559                     c = decodeUTF();
1560                     if (c == LS || c == PS)
1561                     {
1562                         c = '\n';
1563                         endOfLine();
1564                     }
1565                     p++;
1566                     stringbuffer.writeUTF8(c);
1567                     continue;
1568                 }
1569                 break;
1570             }
1571             stringbuffer.writeByte(c);
1572         }
1573     }
1574 
1575     /**************************************
1576      */
1577     final TOK charConstant(Token* t, int wide)
1578     {
1579         uint c;
1580         TOK tk = TOKcharv;
1581         //printf("Lexer::charConstant\n");
1582         p++;
1583         c = *p++;
1584         switch (c)
1585         {
1586         case '\\':
1587             switch (*p)
1588             {
1589             case 'u':
1590                 t.uns64value = escapeSequence();
1591                 tk = TOKwcharv;
1592                 break;
1593             case 'U':
1594             case '&':
1595                 t.uns64value = escapeSequence();
1596                 tk = TOKdcharv;
1597                 break;
1598             default:
1599                 t.uns64value = escapeSequence();
1600                 break;
1601             }
1602             break;
1603         case '\n':
1604         L1:
1605             endOfLine();
1606         case '\r':
1607         case 0:
1608         case 0x1A:
1609         case '\'':
1610             error("unterminated character constant");
1611             t.uns64value = '?';
1612             return tk;
1613         default:
1614             if (c & 0x80)
1615             {
1616                 p--;
1617                 c = decodeUTF();
1618                 p++;
1619                 if (c == LS || c == PS)
1620                     goto L1;
1621                 if (c < 0xD800 || (c >= 0xE000 && c < 0xFFFE))
1622                     tk = TOKwcharv;
1623                 else
1624                     tk = TOKdcharv;
1625             }
1626             t.uns64value = c;
1627             break;
1628         }
1629         if (*p != '\'')
1630         {
1631             error("unterminated character constant");
1632             t.uns64value = '?';
1633             return tk;
1634         }
1635         p++;
1636         return tk;
1637     }
1638 
1639     /***************************************
1640      * Get postfix of string literal.
1641      */
1642     final void stringPostfix(Token* t)
1643     {
1644         switch (*p)
1645         {
1646         case 'c':
1647         case 'w':
1648         case 'd':
1649             t.postfix = *p;
1650             p++;
1651             break;
1652         default:
1653             t.postfix = 0;
1654             break;
1655         }
1656     }
1657 
1658     /**************************************
1659      * Read in a number.
1660      * If it's an integer, store it in tok.TKutok.Vlong.
1661      *      integers can be decimal, octal or hex
1662      *      Handle the suffixes U, UL, LU, L, etc.
1663      * If it's double, store it in tok.TKutok.Vdouble.
1664      * Returns:
1665      *      TKnum
1666      *      TKdouble,...
1667      */
1668     final TOK number(Token* t)
1669     {
1670         int base = 10;
1671         const(char)* start = p;
1672         uint c;
1673         uinteger_t n = 0; // unsigned >=64 bit integer type
1674         int d;
1675         bool err = false;
1676         bool overflow = false;
1677         c = *p;
1678         if (c == '0')
1679         {
1680             ++p;
1681             c = *p;
1682             switch (c)
1683             {
1684             case '0':
1685             case '1':
1686             case '2':
1687             case '3':
1688             case '4':
1689             case '5':
1690             case '6':
1691             case '7':
1692                 n = c - '0';
1693                 ++p;
1694                 base = 8;
1695                 break;
1696             case 'x':
1697             case 'X':
1698                 ++p;
1699                 base = 16;
1700                 break;
1701             case 'b':
1702             case 'B':
1703                 ++p;
1704                 base = 2;
1705                 break;
1706             case '.':
1707                 if (p[1] == '.')
1708                     goto Ldone;
1709                 // if ".."
1710                 if (isalpha(p[1]) || p[1] == '_' || p[1] & 0x80)
1711                     goto Ldone;
1712                 // if ".identifier" or ".unicode"
1713                 goto Lreal;
1714                 // '.' is part of current token
1715             case 'i':
1716             case 'f':
1717             case 'F':
1718                 goto Lreal;
1719             case '_':
1720                 ++p;
1721                 base = 8;
1722                 break;
1723             case 'L':
1724                 if (p[1] == 'i')
1725                     goto Lreal;
1726                 break;
1727             default:
1728                 break;
1729             }
1730         }
1731         while (1)
1732         {
1733             c = *p;
1734             switch (c)
1735             {
1736             case '0':
1737             case '1':
1738                 ++p;
1739                 d = c - '0';
1740                 break;
1741             case '2':
1742             case '3':
1743             case '4':
1744             case '5':
1745             case '6':
1746             case '7':
1747                 if (base == 2 && !err)
1748                 {
1749                     error("binary digit expected");
1750                     err = true;
1751                 }
1752                 ++p;
1753                 d = c - '0';
1754                 break;
1755             case '8':
1756             case '9':
1757                 ++p;
1758                 if (base < 10 && !err)
1759                 {
1760                     error("radix %d digit expected, not '%c'", base, c);
1761                     err = true;
1762                 }
1763                 d = c - '0';
1764                 break;
1765             case 'a':
1766             case 'b':
1767             case 'c':
1768             case 'd':
1769             case 'e':
1770             case 'f':
1771             case 'A':
1772             case 'B':
1773             case 'C':
1774             case 'D':
1775             case 'E':
1776             case 'F':
1777                 ++p;
1778                 if (base != 16)
1779                 {
1780                     if (c == 'e' || c == 'E' || c == 'f' || c == 'F')
1781                         goto Lreal;
1782                     if (!err)
1783                     {
1784                         error("radix %d digit expected, not '%c'", base, c);
1785                         err = true;
1786                     }
1787                 }
1788                 if (c >= 'a')
1789                     d = c + 10 - 'a';
1790                 else
1791                     d = c + 10 - 'A';
1792                 break;
1793             case 'L':
1794                 if (p[1] == 'i')
1795                     goto Lreal;
1796                 goto Ldone;
1797             case '.':
1798                 if (p[1] == '.')
1799                     goto Ldone;
1800                 // if ".."
1801                 if (base == 10 && (isalpha(p[1]) || p[1] == '_' || p[1] & 0x80))
1802                     goto Ldone;
1803                 // if ".identifier" or ".unicode"
1804                 goto Lreal;
1805                 // otherwise as part of a floating point literal
1806             case 'p':
1807             case 'P':
1808             case 'i':
1809             Lreal:
1810                 p = start;
1811                 return inreal(t);
1812             case '_':
1813                 ++p;
1814                 continue;
1815             default:
1816                 goto Ldone;
1817             }
1818             uinteger_t n2 = n * base;
1819             if ((n2 / base != n || n2 + d < n))
1820             {
1821                 overflow = true;
1822             }
1823             n = n2 + d;
1824             // if n needs more than 64 bits
1825             if (n.sizeof > 8 && n > 0xFFFFFFFFFFFFFFFFUL)
1826             {
1827                 overflow = true;
1828             }
1829         }
1830     Ldone:
1831         if (overflow && !err)
1832         {
1833             error("integer overflow");
1834             err = true;
1835         }
1836         enum FLAGS : int
1837         {
1838             FLAGS_none = 0,
1839             FLAGS_decimal = 1, // decimal
1840             FLAGS_unsigned = 2, // u or U suffix
1841             FLAGS_long = 4, // L suffix
1842         }
1843 
1844         alias FLAGS_none = FLAGS.FLAGS_none;
1845         alias FLAGS_decimal = FLAGS.FLAGS_decimal;
1846         alias FLAGS_unsigned = FLAGS.FLAGS_unsigned;
1847         alias FLAGS_long = FLAGS.FLAGS_long;
1848         ;
1849         FLAGS flags = (base == 10) ? FLAGS_decimal : FLAGS_none;
1850         // Parse trailing 'u', 'U', 'l' or 'L' in any combination
1851         const(char)* psuffix = p;
1852         while (1)
1853         {
1854             char f;
1855             switch (*p)
1856             {
1857             case 'U':
1858             case 'u':
1859                 f = FLAGS_unsigned;
1860                 goto L1;
1861             case 'l':
1862                 f = FLAGS_long;
1863                 error("lower case integer suffix 'l' is not allowed. Please use 'L' instead");
1864                 goto L1;
1865             case 'L':
1866                 f = FLAGS_long;
1867             L1:
1868                 p++;
1869                 if ((flags & f) && !err)
1870                 {
1871                     error("unrecognized token");
1872                     err = true;
1873                 }
1874                 flags = cast(FLAGS)(flags | f);
1875                 continue;
1876             default:
1877                 break;
1878             }
1879             break;
1880         }
1881         if (base == 8 && n >= 8)
1882             error("octal literals 0%llo%.*s are no longer supported, use std.conv.octal!%llo%.*s instead", n, p - psuffix, psuffix, n, p - psuffix, psuffix);
1883         TOK result;
1884         switch (flags)
1885         {
1886         case FLAGS_none:
1887             /* Octal or Hexadecimal constant.
1888              * First that fits: int, uint, long, ulong
1889              */
1890             if (n & 0x8000000000000000L)
1891                 result = TOKuns64v;
1892             else if (n & 0xFFFFFFFF00000000L)
1893                 result = TOKint64v;
1894             else if (n & 0x80000000)
1895                 result = TOKuns32v;
1896             else
1897                 result = TOKint32v;
1898             break;
1899         case FLAGS_decimal:
1900             /* First that fits: int, long, long long
1901              */
1902             if (n & 0x8000000000000000L)
1903             {
1904                 if (!err)
1905                 {
1906                     error("signed integer overflow");
1907                     err = true;
1908                 }
1909                 result = TOKuns64v;
1910             }
1911             else if (n & 0xFFFFFFFF80000000L)
1912                 result = TOKint64v;
1913             else
1914                 result = TOKint32v;
1915             break;
1916         case FLAGS_unsigned:
1917         case FLAGS_decimal | FLAGS_unsigned:
1918             /* First that fits: uint, ulong
1919              */
1920             if (n & 0xFFFFFFFF00000000L)
1921                 result = TOKuns64v;
1922             else
1923                 result = TOKuns32v;
1924             break;
1925         case FLAGS_decimal | FLAGS_long:
1926             if (n & 0x8000000000000000L)
1927             {
1928                 if (!err)
1929                 {
1930                     error("signed integer overflow");
1931                     err = true;
1932                 }
1933                 result = TOKuns64v;
1934             }
1935             else
1936                 result = TOKint64v;
1937             break;
1938         case FLAGS_long:
1939             if (n & 0x8000000000000000L)
1940                 result = TOKuns64v;
1941             else
1942                 result = TOKint64v;
1943             break;
1944         case FLAGS_unsigned | FLAGS_long:
1945         case FLAGS_decimal | FLAGS_unsigned | FLAGS_long:
1946             result = TOKuns64v;
1947             break;
1948         default:
1949             debug
1950             {
1951                 printf("%x\n", flags);
1952             }
1953             assert(0);
1954         }
1955         t.uns64value = n;
1956         return result;
1957     }
1958 
1959     /**************************************
1960      * Read in characters, converting them to real.
1961      * Bugs:
1962      *      Exponent overflow not detected.
1963      *      Too much requested precision is not detected.
1964      */
1965     final TOK inreal(Token* t)
1966     {
1967         //printf("Lexer::inreal()\n");
1968         debug
1969         {
1970             assert(*p == '.' || isdigit(*p));
1971         }
1972         stringbuffer.reset();
1973         const(char)* pstart = p;
1974         char hex = 0;
1975         uint c = *p++;
1976         // Leading '0x'
1977         if (c == '0')
1978         {
1979             c = *p++;
1980             if (c == 'x' || c == 'X')
1981             {
1982                 hex = true;
1983                 c = *p++;
1984             }
1985         }
1986         // Digits to left of '.'
1987         while (1)
1988         {
1989             if (c == '.')
1990             {
1991                 c = *p++;
1992                 break;
1993             }
1994             if (isdigit(c) || (hex && isxdigit(c)) || c == '_')
1995             {
1996                 c = *p++;
1997                 continue;
1998             }
1999             break;
2000         }
2001         // Digits to right of '.'
2002         while (1)
2003         {
2004             if (isdigit(c) || (hex && isxdigit(c)) || c == '_')
2005             {
2006                 c = *p++;
2007                 continue;
2008             }
2009             break;
2010         }
2011         if (c == 'e' || c == 'E' || (hex && (c == 'p' || c == 'P')))
2012         {
2013             c = *p++;
2014             if (c == '-' || c == '+')
2015             {
2016                 c = *p++;
2017             }
2018             bool anyexp = false;
2019             while (1)
2020             {
2021                 if (isdigit(c))
2022                 {
2023                     anyexp = true;
2024                     c = *p++;
2025                     continue;
2026                 }
2027                 if (c == '_')
2028                 {
2029                     c = *p++;
2030                     continue;
2031                 }
2032                 if (!anyexp)
2033                     error("missing exponent");
2034                 break;
2035             }
2036         }
2037         else if (hex)
2038             error("exponent required for hex float");
2039         --p;
2040         while (pstart < p)
2041         {
2042             if (*pstart != '_')
2043                 stringbuffer.writeByte(*pstart);
2044             ++pstart;
2045         }
2046         stringbuffer.writeByte(0);
2047         TOK result;
2048         t.float80value = Port.strtold(cast(char*)stringbuffer.data, null);
2049         errno = 0;
2050         switch (*p)
2051         {
2052         case 'F':
2053         case 'f':
2054             // Only interested in errno return
2055             cast(void)Port.strtof(cast(char*)stringbuffer.data, null);
2056             result = TOKfloat32v;
2057             p++;
2058             break;
2059         default:
2060             /* Should do our own strtod(), since dmc and linux gcc
2061              * accept 2.22507e-308, while apple gcc will only take
2062              * 2.22508e-308. Not sure who is right.
2063              */
2064             // Only interested in errno return
2065             cast(void)Port.strtod(cast(char*)stringbuffer.data, null);
2066             result = TOKfloat64v;
2067             break;
2068         case 'l':
2069             error("use 'L' suffix instead of 'l'");
2070         case 'L':
2071             result = TOKfloat80v;
2072             p++;
2073             break;
2074         }
2075         if (*p == 'i' || *p == 'I')
2076         {
2077             if (*p == 'I')
2078                 error("use 'i' suffix instead of 'I'");
2079             p++;
2080             switch (result)
2081             {
2082             case TOKfloat32v:
2083                 result = TOKimaginary32v;
2084                 break;
2085             case TOKfloat64v:
2086                 result = TOKimaginary64v;
2087                 break;
2088             case TOKfloat80v:
2089                 result = TOKimaginary80v;
2090                 break;
2091             default:
2092                 break;
2093             }
2094         }
2095         if (errno == ERANGE)
2096         {
2097             const(char)* suffix = (result == TOKfloat32v || result == TOKimaginary32v) ? "f" : "";
2098             error(scanloc, "number '%s%s' is not representable", cast(char*)stringbuffer.data, suffix);
2099         }
2100         debug
2101         {
2102             switch (result)
2103             {
2104             case TOKfloat32v:
2105             case TOKfloat64v:
2106             case TOKfloat80v:
2107             case TOKimaginary32v:
2108             case TOKimaginary64v:
2109             case TOKimaginary80v:
2110                 break;
2111             default:
2112                 assert(0);
2113             }
2114         }
2115         return result;
2116     }
2117 
2118     final Loc loc()
2119     {
2120         scanloc.charnum = cast(uint)(1 + p - line);
2121         return scanloc;
2122     }
2123 
2124     final void error(const(char)* format, ...)
2125     {
2126         va_list ap;
2127         va_start(ap, format);
2128         .verror(token.loc, format, ap);
2129         va_end(ap);
2130         errors = true;
2131     }
2132 
2133     final void error(Loc loc, const(char)* format, ...)
2134     {
2135         va_list ap;
2136         va_start(ap, format);
2137         .verror(loc, format, ap);
2138         va_end(ap);
2139         errors = true;
2140     }
2141 
2142     final void deprecation(const(char)* format, ...)
2143     {
2144         va_list ap;
2145         va_start(ap, format);
2146         .vdeprecation(token.loc, format, ap);
2147         va_end(ap);
2148         if (global.params.useDeprecated == 0)
2149             errors = true;
2150     }
2151 
2152     /*********************************************
2153      * parse:
2154      *      #line linnum [filespec]
2155      * also allow __LINE__ for linnum, and __FILE__ for filespec
2156      */
2157     final void poundLine()
2158     {
2159         Token tok;
2160         int linnum = this.scanloc.linnum;
2161         char* filespec = null;
2162         Loc loc = this.loc();
2163         scan(&tok);
2164         if (tok.value == TOKint32v || tok.value == TOKint64v)
2165         {
2166             int lin = cast(int)(tok.uns64value - 1);
2167             if (lin != tok.uns64value - 1)
2168                 error("line number %lld out of range", cast(ulong)tok.uns64value);
2169             else
2170                 linnum = lin;
2171         }
2172         else if (tok.value == TOKline)
2173         {
2174         }
2175         else
2176             goto Lerr;
2177         while (1)
2178         {
2179             switch (*p)
2180             {
2181             case 0:
2182             case 0x1A:
2183             case '\n':
2184             Lnewline:
2185                 this.scanloc.linnum = linnum;
2186                 if (filespec)
2187                     this.scanloc.filename = filespec;
2188                 return;
2189             case '\r':
2190                 p++;
2191                 if (*p != '\n')
2192                 {
2193                     p--;
2194                     goto Lnewline;
2195                 }
2196                 continue;
2197             case ' ':
2198             case '\t':
2199             case '\v':
2200             case '\f':
2201                 p++;
2202                 continue;
2203                 // skip white space
2204             case '_':
2205                 if (memcmp(p, cast(char*)"__FILE__", 8) == 0)
2206                 {
2207                     p += 8;
2208                     filespec = mem.xstrdup(scanloc.filename);
2209                     continue;
2210                 }
2211                 goto Lerr;
2212             case '"':
2213                 if (filespec)
2214                     goto Lerr;
2215                 stringbuffer.reset();
2216                 p++;
2217                 while (1)
2218                 {
2219                     uint c;
2220                     c = *p;
2221                     switch (c)
2222                     {
2223                     case '\n':
2224                     case '\r':
2225                     case 0:
2226                     case 0x1A:
2227                         goto Lerr;
2228                     case '"':
2229                         stringbuffer.writeByte(0);
2230                         filespec = mem.xstrdup(cast(char*)stringbuffer.data);
2231                         p++;
2232                         break;
2233                     default:
2234                         if (c & 0x80)
2235                         {
2236                             uint u = decodeUTF();
2237                             if (u == PS || u == LS)
2238                                 goto Lerr;
2239                         }
2240                         stringbuffer.writeByte(c);
2241                         p++;
2242                         continue;
2243                     }
2244                     break;
2245                 }
2246                 continue;
2247             default:
2248                 if (*p & 0x80)
2249                 {
2250                     uint u = decodeUTF();
2251                     if (u == PS || u == LS)
2252                         goto Lnewline;
2253                 }
2254                 goto Lerr;
2255             }
2256         }
2257     Lerr:
2258         error(loc, "#line integer [\"filespec\"]\\n expected");
2259     }
2260 
2261     /********************************************
2262      * Decode UTF character.
2263      * Issue error messages for invalid sequences.
2264      * Return decoded character, advance p to last character in UTF sequence.
2265      */
2266     final uint decodeUTF()
2267     {
2268         dchar_t u;
2269         char c;
2270         const(char)* s = p;
2271         size_t len;
2272         size_t idx;
2273         const(char)* msg;
2274         c = *s;
2275         assert(c & 0x80);
2276         // Check length of remaining string up to 6 UTF-8 characters
2277         for (len = 1; len < 6 && s[len]; len++)
2278         {
2279         }
2280         idx = 0;
2281         msg = utf_decodeChar(s, len, &idx, &u);
2282         p += idx - 1;
2283         if (msg)
2284         {
2285             error("%s", msg);
2286         }
2287         return u;
2288     }
2289 
2290     /***************************************************
2291      * Parse doc comment embedded between t->ptr and p.
2292      * Remove trailing blanks and tabs from lines.
2293      * Replace all newlines with \n.
2294      * Remove leading comment character from each line.
2295      * Decide if it's a lineComment or a blockComment.
2296      * Append to previous one for this token.
2297      */
2298     final void getDocComment(Token* t, uint lineComment)
2299     {
2300         /* ct tells us which kind of comment it is: '/', '*', or '+'
2301          */
2302         char ct = t.ptr[2];
2303         /* Start of comment text skips over / * *, / + +, or / / /
2304          */
2305         const(char)* q = t.ptr + 3; // start of comment text
2306         const(char)* qend = p;
2307         if (ct == '*' || ct == '+')
2308             qend -= 2;
2309         /* Scan over initial row of ****'s or ++++'s or ////'s
2310          */
2311         for (; q < qend; q++)
2312         {
2313             if (*q != ct)
2314                 break;
2315         }
2316         /* Remove leading spaces until start of the comment
2317          */
2318         int linestart = 0;
2319         if (ct == '/')
2320         {
2321             while (q < qend && (*q == ' ' || *q == '\t'))
2322                 ++q;
2323         }
2324         else if (q < qend)
2325         {
2326             if (*q == '\r')
2327             {
2328                 ++q;
2329                 if (q < qend && *q == '\n')
2330                     ++q;
2331                 linestart = 1;
2332             }
2333             else if (*q == '\n')
2334             {
2335                 ++q;
2336                 linestart = 1;
2337             }
2338         }
2339         /* Remove trailing row of ****'s or ++++'s
2340          */
2341         if (ct != '/')
2342         {
2343             for (; q < qend; qend--)
2344             {
2345                 if (qend[-1] != ct)
2346                     break;
2347             }
2348         }
2349         /* Comment is now [q .. qend].
2350          * Canonicalize it into buf[].
2351          */
2352         OutBuffer buf;
2353         for (; q < qend; q++)
2354         {
2355             char c = *q;
2356             switch (c)
2357             {
2358             case '*':
2359             case '+':
2360                 if (linestart && c == ct)
2361                 {
2362                     linestart = 0;
2363                     /* Trim preceding whitespace up to preceding \n
2364                      */
2365                     while (buf.offset && (buf.data[buf.offset - 1] == ' ' || buf.data[buf.offset - 1] == '\t'))
2366                         buf.offset--;
2367                     continue;
2368                 }
2369                 break;
2370             case ' ':
2371             case '\t':
2372                 break;
2373             case '\r':
2374                 if (q[1] == '\n')
2375                     continue;
2376                 // skip the \r
2377                 goto Lnewline;
2378             default:
2379                 if (c == 226)
2380                 {
2381                     // If LS or PS
2382                     if (q[1] == 128 && (q[2] == 168 || q[2] == 169))
2383                     {
2384                         q += 2;
2385                         goto Lnewline;
2386                     }
2387                 }
2388                 linestart = 0;
2389                 break;
2390             Lnewline:
2391                 c = '\n'; // replace all newlines with \n
2392             case '\n':
2393                 linestart = 1;
2394                 /* Trim trailing whitespace
2395                  */
2396                 while (buf.offset && (buf.data[buf.offset - 1] == ' ' || buf.data[buf.offset - 1] == '\t'))
2397                     buf.offset--;
2398                 break;
2399             }
2400             buf.writeByte(c);
2401         }
2402         /* Trim trailing whitespace (if the last line does not have newline)
2403          */
2404         if (buf.offset && (buf.data[buf.offset - 1] == ' ' || buf.data[buf.offset - 1] == '\t'))
2405         {
2406             while (buf.offset && (buf.data[buf.offset - 1] == ' ' || buf.data[buf.offset - 1] == '\t'))
2407                 buf.offset--;
2408         }
2409         // Always end with a newline
2410         if (!buf.offset || buf.data[buf.offset - 1] != '\n')
2411             buf.writeByte('\n');
2412         buf.writeByte(0);
2413         // It's a line comment if the start of the doc comment comes
2414         // after other non-whitespace on the same line.
2415         const(char)** dc = (lineComment && anyToken) ? &t.lineComment : &t.blockComment;
2416         // Combine with previous doc comment, if any
2417         if (*dc)
2418             *dc = combineComments(*dc, cast(char*)buf.data);
2419         else
2420             *dc = cast(char*)buf.extractData();
2421     }
2422 
2423     /**********************************
2424      * Determine if string is a valid Identifier.
2425      * Placed here because of commonality with Lexer functionality.
2426      * Returns:
2427      *      0       invalid
2428      */
2429     final static bool isValidIdentifier(const(char)* p)
2430     {
2431         size_t len;
2432         size_t idx;
2433         if (!p || !*p)
2434             goto Linvalid;
2435         if (*p >= '0' && *p <= '9') // beware of isdigit() on signed chars
2436             goto Linvalid;
2437         len = strlen(p);
2438         idx = 0;
2439         while (p[idx])
2440         {
2441             dchar_t dc;
2442             const(char)* q = utf_decodeChar(cast(char*)p, len, &idx, &dc);
2443             if (q)
2444                 goto Linvalid;
2445             if (!((dc >= 0x80 && isUniAlpha(dc)) || isalnum(dc) || dc == '_'))
2446                 goto Linvalid;
2447         }
2448         return true;
2449     Linvalid:
2450         return false;
2451     }
2452 
2453     /********************************************
2454      * Combine two document comments into one,
2455      * separated by a newline.
2456      */
2457     final static const(char)* combineComments(const(char)* c1, const(char)* c2)
2458     {
2459         //printf("Lexer::combineComments('%s', '%s')\n", c1, c2);
2460         const(char)* c = c2;
2461         if (c1)
2462         {
2463             c = c1;
2464             if (c2)
2465             {
2466                 size_t len1 = strlen(cast(char*)c1);
2467                 size_t len2 = strlen(cast(char*)c2);
2468                 int insertNewLine = 0;
2469                 if (len1 && c1[len1 - 1] != '\n')
2470                 {
2471                     ++len1;
2472                     insertNewLine = 1;
2473                 }
2474                 char* p = cast(char*)mem.xmalloc(len1 + 1 + len2 + 1);
2475                 memcpy(p, c1, len1 - insertNewLine);
2476                 if (insertNewLine)
2477                     p[len1 - 1] = '\n';
2478                 p[len1] = '\n';
2479                 memcpy(p + len1 + 1, c2, len2);
2480                 p[len1 + 1 + len2] = 0;
2481                 c = p;
2482             }
2483         }
2484         return c;
2485     }
2486 
2487 private:
2488     final void endOfLine()
2489     {
2490         scanloc.linnum++;
2491         line = p;
2492     }
2493 }