dmdscript.lexer source code

1 /* Digital Mars DMDScript source code.
2  * Copyright (c) 2000-2002 by Chromium Communications
3  * D version Copyright (c) 2004-2010 by Digital Mars
4  * Distributed under the Boost Software License, Version 1.0.
5  * (See accompanying file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
6  * written by Walter Bright
7  * http://www.digitalmars.com
8  *
9  * D2 port by Dmitry Olshansky 
10  *
11  * DMDScript is implemented in the D Programming Language,
12  * http://www.digitalmars.com/d/
13  *
14  * For a C++ implementation of DMDScript, including COM support, see
15  * http://www.digitalmars.com/dscript/cppscript.html
16  */
17 
18 /* Lexical Analyzer
19  */
20 
21 module dmdscript.lexer;
22 
23 import std.range;
24 import std.algorithm;
25 import std.stdio;
26 import std.string;
27 import std.utf;
28 import std.outbuffer;
29 import std.ascii;
30 import core.stdc.stdlib;
31 
32 import dmdscript.script;
33 import dmdscript.text;
34 import dmdscript.identifier;
35 import dmdscript.scopex;
36 import dmdscript.errmsgs;
37 import dmdscript.utf;
38 
39 /* Tokens:
40         (	)
41         [	]
42         {	}
43         <	>	<=	>=	==	!=
44         ===     !==
45         <<	>>	<<=	>>=	>>>	>>>=
46  +	-	+=	-=
47  *	/	%	*=	/=	%=
48         &	|   ^	&=	|=	^=
49         =	!	~
50  ++	--
51         .	:	,
52         ?	&&	||
53  */
54 
55 alias int TOK;
56 
57 enum
58 {
59     TOKreserved,
60 
61     // Other
62     TOKlparen, TOKrparen,
63     TOKlbracket, TOKrbracket,
64     TOKlbrace, TOKrbrace,
65     TOKcolon, TOKneg,
66     TOKpos,
67     TOKsemicolon, TOKeof,
68     TOKarray, TOKcall,
69     TOKarraylit, TOKobjectlit,
70     TOKcomma, TOKassert,
71 
72     // Operators
73     TOKless, TOKgreater,
74     TOKlessequal, TOKgreaterequal,
75     TOKequal, TOKnotequal,
76     TOKidentity, TOKnonidentity,
77     TOKshiftleft, TOKshiftright,
78     TOKshiftleftass, TOKshiftrightass,
79     TOKushiftright, TOKushiftrightass,
80     TOKplus, TOKminus, TOKplusass, TOKminusass,
81     TOKmultiply, TOKdivide, TOKpercent,
82     TOKmultiplyass, TOKdivideass, TOKpercentass,
83     TOKand, TOKor, TOKxor,
84     TOKandass, TOKorass, TOKxorass,
85     TOKassign, TOKnot, TOKtilde,
86     TOKplusplus, TOKminusminus, TOKdot,
87     TOKquestion, TOKandand, TOKoror,
88 
89     // Leaf operators
90     TOKnumber, TOKidentifier, TOKstring,
91     TOKregexp, TOKreal,
92 
93     // Keywords
94     TOKbreak, TOKcase, TOKcontinue,
95     TOKdefault, TOKdelete, TOKdo,
96     TOKelse, TOKexport, TOKfalse,
97     TOKfor, TOKfunction, TOKif,
98     TOKimport, TOKin, TOKnew,
99     TOKnull, TOKreturn, 
100 	TOKswitch, TOKthis, TOKtrue, 
101 	TOKtypeof, TOKvar, TOKvoid, 
102 	TOKwhile, TOKwith,
103 
104     // Reserved for ECMA extensions
105     TOKcatch, TOKclass,
106     TOKconst, TOKdebugger,
107     TOKenum, TOKextends,
108     TOKfinally, TOKsuper,
109     TOKthrow, TOKtry,
110 
111     // Java keywords reserved for unknown reasons
112     TOKabstract, TOKboolean,
113     TOKbyte, TOKchar,
114     TOKdouble, TOKfinal,
115     TOKfloat, TOKgoto,
116     TOKimplements, TOKinstanceof,
117     TOKint, TOKinterface,
118     TOKlong, TOKnative,
119     TOKpackage, TOKprivate,
120     TOKprotected, TOKpublic,
121     TOKshort, TOKstatic,
122     TOKsynchronized,
123     TOKtransient,
124 
125     TOKmax
126 };
127 
128 int isoctal(dchar c)
129 {
130     return('0' <= c && c <= '7');
131 }
132 int isasciidigit(dchar c)
133 {
134     return('0' <= c && c <= '9');
135 }
136 int isasciilower(dchar c)
137 {
138     return('a' <= c && c <= 'z');
139 }
140 int isasciiupper(dchar c)
141 {
142     return('A' <= c && c <= 'Z');
143 }
144 int ishex(dchar c)
145 {
146     return
147         ('0' <= c && c <= '9') ||
148         ('a' <= c && c <= 'f') ||
149         ('A' <= c && c <= 'F');
150 }
151 
152 
153 /******************************************************/
154 
155 struct Token
156 {
157     Token *next;
158            immutable(tchar) *ptr;       // pointer to first character of this token within buffer
159     uint   linnum;
160     TOK    value;
161            immutable(tchar) *sawLineTerminator; // where we saw the last line terminator
162     union
163     {
164         number_t    intvalue;
165         real_t      realvalue;
166         d_string    string;
167         Identifier *ident;
168     };
169 
170     static d_string[TOKmax] tochars;
171 
172     void print()
173     {
174         writefln(toString());
175     }
176 
177     d_string toString()
178     {
179         d_string p;
180 
181         switch(value)
182         {
183         case TOKnumber:
184             p = std..string.format("%d", intvalue);
185             break;
186 
187         case TOKreal:
188             long l = cast(long)realvalue;
189             if(l == realvalue)
190                 p = std..string.format("%s", l);
191             else
192                 p = std..string.format("%s", realvalue);
193             break;
194 
195         case TOKstring:
196         case TOKregexp:
197             p = string;
198             break;
199 
200         case TOKidentifier:
201             p = ident.toString();
202             break;
203 
204         default:
205             p = toString(value);
206             break;
207         }
208         return p;
209     }
210 
211     static d_string toString(TOK value)
212     {
213         d_string p;
214 
215         p = tochars[value];
216         if(!p)
217             p = std..string.format("TOK%d", value);
218         return p;
219     }
220 }
221 
222 
223 
224 
225 /*******************************************************************/
226 
227 class Lexer
228 {
229     Identifier[d_string] stringtable;
230     Token* freelist;
231 
232     d_string sourcename;        // for error message strings
233 
234     d_string base;              // pointer to start of buffer
235     immutable(char) * end;      // past end of buffer
236     immutable(char) * p;        // current character
237     uint currentline;
238     Token token;
239     OutBuffer stringbuffer;
240     int useStringtable;         // use for Identifiers
241 
242     ErrInfo errinfo;            // syntax error information
243     static bool inited;
244 
245 
246     Token*  allocToken()
247     {
248         Token *t;
249 
250         if(freelist)
251         {
252             t = freelist;
253             freelist = t.next;
254             return t;
255         }
256 
257         return new Token();
258     }
259 
260 
261     this(d_string sourcename, d_string base, int useStringtable)
262     {
263         import core.stdc.string : memset;
264         //writefln("Lexer::Lexer(base = '%s')\n",base);
265         if(!inited)
266             init();
267 
268         memset(&token, 0, token.sizeof);
269         this.useStringtable = useStringtable;
270         this.sourcename = sourcename;
271         if(!base.length || (base[$ - 1] != 0 && base[$ - 1] != 0x1A))
272             base ~= cast(tchar)0x1A;
273         this.base = base;
274         this.end = base.ptr + base.length;
275         p = base.ptr;
276         currentline = 1;
277         freelist = null;
278     }
279 
280 
281     ~this()
282     {
283         //writef(L"~Lexer()\n");
284         freelist = null;
285         sourcename = null;
286         base = null;
287         end = null;
288         p = null;
289     }
290 
291     dchar get(immutable(tchar)* p)
292     {
293         size_t idx = p - base.ptr;
294         return std.utf.decode(base, idx);
295     }
296 
297     immutable(tchar) * inc(immutable(tchar) * p)
298     {
299         size_t idx = p - base.ptr;
300         std.utf.decode(base, idx);
301         return base.ptr + idx;
302     }
303 
304     void error(ARGS...)(int msgnum, ARGS args)
305     {
306         error(errmsgtbl[msgnum], args);
307     }
308 
309     void error(ARGS...)(.string fmt, ARGS args)
310     {
311         import std.format : format, formattedWrite;
312 
313         uint linnum = 1;
314         immutable(tchar) * s;
315         immutable(tchar) * slinestart;
316         immutable(tchar) * slineend;
317         d_string buf;
318 
319         //FuncLog funclog(L"Lexer.error()");
320         //writefln("TEXT START ------------\n%ls\nTEXT END ------------------", base);
321 
322         // Find the beginning of the line
323         slinestart = base.ptr;
324         for(s = base.ptr; s != p; s++)
325         {
326             if(*s == '\n')
327             {
328                 linnum++;
329                 slinestart = s + 1;
330             }
331         }
332 
333         // Find the end of the line
334         for(;; )
335         {
336             switch(*s)
337             {
338             case '\n':
339             case 0:
340             case 0x1A:
341                 break;
342             default:
343                 s++;
344                 continue;
345             }
346             break;
347         }
348         slineend = s;
349 
350         buf = format("%s(%d) : Error: ", sourcename, linnum);
351 
352         void putc(dchar c)
353         {
354             dmdscript.utf.encode(buf, c);
355         }
356 
357         formattedWrite(&putc, fmt, args);
358 
359         if(!errinfo.message)
360         {
361             size_t len;
362 
363             errinfo.message = buf;
364             errinfo.linnum = linnum;
365             errinfo.charpos = cast(uint)(p - slinestart);
366 
367             len = slineend - slinestart;
368             errinfo.srcline = slinestart[0 .. len];
369         }
370 
371         // Consume input until the end
372         while(*p != 0x1A && *p != 0)
373             p++;
374         token.next = null;              // dump any lookahead
375 
376         version(none)
377         {
378             writefln(errinfo.message);
379             fflush(stdout);
380             exit(EXIT_FAILURE);
381         }
382     }
383 
384     /************************************************
385      * Given source text, convert loc to a string for the corresponding line.
386      */
387 
388     static d_string locToSrcline(immutable(char) *src, Loc loc)
389     {
390         immutable(char) * slinestart;
391         immutable(char) * slineend;
392         immutable(char) * s;
393         uint linnum = 1;
394         size_t len;
395 
396         if(!src)
397             return null;
398         slinestart = src;
399         for(s = src;; s++)
400         {
401             switch(*s)
402             {
403             case '\n':
404                 if(linnum == loc)
405                 {
406                     slineend = s;
407                     break;
408                 }
409                 slinestart = s + 1;
410                 linnum++;
411                 continue;
412 
413             case 0:
414             case 0x1A:
415                 slineend = s;
416                 break;
417 
418             default:
419                 continue;
420             }
421             break;
422         }
423 
424         // Remove trailing \r's
425         while(slinestart < slineend && slineend[-1] == '\r')
426             --slineend;
427 
428         len = slineend - slinestart;
429         return slinestart[0 .. len];
430     }
431 
432 
433     TOK nextToken()
434     {
435         Token *t;
436 
437         if(token.next)
438         {
439             t = token.next;
440             token = *t;
441             t.next = freelist;
442             freelist = t;
443         }
444         else
445         {
446             scan(&token);
447         }
448         //token.print();
449         return token.value;
450     }
451 
452     Token *peek(Token *ct)
453     {
454         Token *t;
455 
456         if(ct.next)
457             t = ct.next;
458         else
459         {
460             t = allocToken();
461             scan(t);
462             t.next = null;
463             ct.next = t;
464         }
465         return t;
466     }
467 
468     void insertSemicolon(immutable(tchar) *loc)
469     {
470         // Push current token back into the input, and
471         // create a new current token that is a semicolon
472         Token *t;
473 
474         t = allocToken();
475         *t = token;
476         token.next = t;
477         token.value = TOKsemicolon;
478         token.ptr = loc;
479         token.sawLineTerminator = null;
480     }
481 
482     /**********************************
483      * Horrible kludge to support disambiguating TOKregexp from TOKdivide.
484      * The idea is, if we are looking for a TOKdivide, and find instead
485      * a TOKregexp, we back up and rescan.
486      */
487 
488     void rescan()
489     {
490         token.next = null;      // no lookahead
491         // should put on freelist
492         p = token.ptr + 1;
493     }
494 
495 
496     /****************************
497      * Turn next token in buffer into a token.
498      */
499 
500     void scan(Token *t)
501     {
502         static import std.ascii;
503         static import std.uni;
504 
505         tchar c;
506         dchar d;
507         d_string id;
508 
509         //writefln("Lexer.scan()");
510         t.sawLineTerminator = null;
511         for(;; )
512         {
513             t.ptr = p;
514             //t.linnum = currentline;
515             //writefln("p = %x",cast(uint)p);
516             //writefln("p = %x, *p = x%02x, '%s'",cast(uint)p,*p,*p);
517             switch(*p)
518             {
519             case 0:
520             case 0x1A:
521                 t.value = TOKeof;               // end of file
522                 return;
523 
524             case ' ':
525             case '\t':
526             case '\v':
527             case '\f':
528             case 0xA0:                          // no-break space
529                 p++;
530                 continue;                       // skip white space
531 
532             case '\n':                          // line terminator
533                 currentline++;
534                 goto case;
535             case '\r':
536                 t.sawLineTerminator = p;
537                 p++;
538                 continue;
539 
540             case '"':
541             case '\'':
542                 t..string = string(*p);
543                 t.value = TOKstring;
544                 return;
545 
546             case '0':       case '1':   case '2':   case '3':   case '4':
547             case '5':       case '6':   case '7':   case '8':   case '9':
548                 t.value = number(t);
549                 return;
550 
551             case 'a':       case 'b':   case 'c':   case 'd':   case 'e':
552             case 'f':       case 'g':   case 'h':   case 'i':   case 'j':
553             case 'k':       case 'l':   case 'm':   case 'n':   case 'o':
554             case 'p':       case 'q':   case 'r':   case 's':   case 't':
555             case 'u':       case 'v':   case 'w':   case 'x':   case 'y':
556             case 'z':
557             case 'A':       case 'B':   case 'C':   case 'D':   case 'E':
558             case 'F':       case 'G':   case 'H':   case 'I':   case 'J':
559             case 'K':       case 'L':   case 'M':   case 'N':   case 'O':
560             case 'P':       case 'Q':   case 'R':   case 'S':   case 'T':
561             case 'U':       case 'V':   case 'W':   case 'X':   case 'Y':
562             case 'Z':
563             case '_':
564             case '$':
565                 Lidentifier:
566                 {
567                   id = null;
568 
569                   static bool isidletter(dchar d)
570                   {
571                       return std.ascii.isAlphaNum(d) || d == '_' || d == '$' || (d >= 0x80 && std.uni.isAlpha(d));
572                   }
573 
574                   do
575                   {
576                       p = inc(p);
577                       d = get(p);
578                       if(d == '\\' && p[1] == 'u')
579                       {
580                           Lidentifier2:
581                           id = t.ptr[0 .. p - t.ptr].idup;
582                           auto ps = p;
583                           p++;
584                           d = unicode();
585                           if(!isidletter(d))
586                           {
587                               p = ps;
588                               break;
589                           }
590                           dmdscript.utf.encode(id, d);
591                           for(;; )
592                           {
593                               d = get(p);
594                               if(d == '\\' && p[1] == 'u')
595                               {
596                                   auto pstart = p;
597                                   p++;
598                                   d = unicode();
599                                   if(isidletter(d))
600                                       dmdscript.utf.encode(id, d);
601                                   else
602                                   {
603                                       p = pstart;
604                                       goto Lidentifier3;
605                                   }
606                               }
607                               else if(isidletter(d))
608                               {
609                                   dmdscript.utf.encode(id, d);
610                                   p = inc(p);
611                               }
612                               else
613                                   goto Lidentifier3;
614                           }
615                       }
616                   } while(isidletter(d));
617                   id = t.ptr[0 .. p - t.ptr];
618                   Lidentifier3:
619                   //printf("id = '%.*s'\n", id);
620                   t.value = isKeyword(id);
621                   if(t.value)
622                       return;
623                   if(useStringtable)
624                   {     //Identifier* i = &stringtable[id];
625                       Identifier* i = id in stringtable;
626                       if(!i)
627                       {
628                           stringtable[id] = Identifier.init;
629                           i = id in stringtable;
630                       }
631                       i.value.putVstring(id);
632                       i.value.hashString();
633                       t.ident = i;
634                   }
635                   else
636                       t.ident = Identifier.build(id);
637                   t.value = TOKidentifier;
638                   return; }
639 
640             case '/':
641                 p++;
642                 c = *p;
643                 if(c == '=')
644                 {
645                     p++;
646                     t.value = TOKdivideass;
647                     return;
648                 }
649                 else if(c == '*')
650                 {
651                     p++;
652                     for(;; p++)
653                     {
654                         c = *p;
655                         Lcomment:
656                         switch(c)
657                         {
658                         case '*':
659                             p++;
660                             c = *p;
661                             if(c == '/')
662                             {
663                                 p++;
664                                 break;
665                             }
666                             goto Lcomment;
667 
668                         case '\n':
669                             currentline++;
670                             goto case;
671                         case '\r':
672                             t.sawLineTerminator = p;
673                             continue;
674 
675                         case 0:
676                         case 0x1A:
677                             error(ERR_BAD_C_COMMENT);
678                             t.value = TOKeof;
679                             return;
680 
681                         default:
682                             continue;
683                         }
684                         break;
685                     }
686                     continue;
687                 }
688                 else if(c == '/')
689                 {
690                     auto r = p[0..end-p];
691                     uint j;
692                     do{
693                         r.popFront();
694                         j = startsWith(r,'\n','\r','\0',0x1A,'\u2028','\u2029');
695                         
696                     }while(!j);
697                     p = &r[0];
698                     switch(j){
699                         case 1: 
700                             currentline++;
701                             goto case;
702                         case 2: case 5: case 6:
703                             t.sawLineTerminator = p;
704                             break;
705                         case 3: case 4:
706                             t.value = TOKeof;
707                             return;
708                         default:
709                             assert(0);                            
710                     }
711                     p = inc(p);
712                     continue;
713                     /*for(;; )
714                     {
715                         p++;
716                         switch(*p)
717                         {
718                         case '\n':
719                             currentline++;
720                         case '\r':
721                             t.sawLineTerminator = p;
722                             break;
723 
724                         case 0:
725                         case 0x1A:                              // end of file
726                             t.value = TOKeof;
727                             return;
728 
729                         default:
730                             continue;
731                         }
732                         break;
733                     }
734                     p++;
735                     continue;*/
736                 }
737                 else if((t..string = regexp()) != null)
738                     t.value = TOKregexp;
739                 else
740                     t.value = TOKdivide;
741                 return;
742 
743             case '.':
744                 immutable(tchar) * q;
745                 q = p + 1;
746                 c = *q;
747                 if(std.ascii.isDigit(c))
748                     t.value = number(t);
749                 else
750                 {
751                     t.value = TOKdot;
752                     p = q;
753                 }
754                 return;
755 
756             case '&':
757                 p++;
758                 c = *p;
759                 if(c == '=')
760                 {
761                     p++;
762                     t.value = TOKandass;
763                 }
764                 else if(c == '&')
765                 {
766                     p++;
767                     t.value = TOKandand;
768                 }
769                 else
770                     t.value = TOKand;
771                 return;
772 
773             case '|':
774                 p++;
775                 c = *p;
776                 if(c == '=')
777                 {
778                     p++;
779                     t.value = TOKorass;
780                 }
781                 else if(c == '|')
782                 {
783                     p++;
784                     t.value = TOKoror;
785                 }
786                 else
787                     t.value = TOKor;
788                 return;
789 
790             case '-':
791                 p++;
792                 c = *p;
793                 if(c == '=')
794                 {
795                     p++;
796                     t.value = TOKminusass;
797                 }
798                 else if(c == '-')
799                 {
800                     p++;
801 
802                     // If the last token in the file is -. then
803                     // treat it as EOF. This is to accept broken
804                     // scripts that forgot to protect the closing -.
805                     // with a // comment.
806                     if(*p == '>')
807                     {
808                         // Scan ahead to see if it's the last token
809                         immutable(tchar) * q;
810 
811                         q = p;
812                         for(;; )
813                         {
814                             switch(*++q)
815                             {
816                             case 0:
817                             case 0x1A:
818                                 t.value = TOKeof;
819                                 p = q;
820                                 return;
821 
822                             case ' ':
823                             case '\t':
824                             case '\v':
825                             case '\f':
826                             case '\n':
827                             case '\r':
828                             case 0xA0:                  // no-break space
829                                 continue;
830 
831                             default:
832                                 assert(0);
833                             }
834                         }
835                     }
836                     t.value = TOKminusminus;
837                 }
838                 else
839                     t.value = TOKminus;
840                 return;
841 
842             case '+':
843                 p++;
844                 c = *p;
845                 if(c == '=')
846                 {
847                     p++;
848                     t.value = TOKplusass;
849                 }
850                 else if(c == '+')
851                 {
852                     p++;
853                     t.value = TOKplusplus;
854                 }
855                 else
856                     t.value = TOKplus;
857                 return;
858 
859             case '<':
860                 p++;
861                 c = *p;
862                 if(c == '=')
863                 {
864                     p++;
865                     t.value = TOKlessequal;
866                 }
867                 else if(c == '<')
868                 {
869                     p++;
870                     c = *p;
871                     if(c == '=')
872                     {
873                         p++;
874                         t.value = TOKshiftleftass;
875                     }
876                     else
877                         t.value = TOKshiftleft;
878                 }
879                 else if(c == '!' && p[1] == '-' && p[2] == '-')
880                 {       // Special comment to end of line
881                     p += 2;
882                     for(;; )
883                     {
884                         p++;
885                         switch(*p)
886                         {
887                         case '\n':
888                             currentline++;
889                             goto case;
890                         case '\r':
891                             t.sawLineTerminator = p;
892                             break;
893 
894                         case 0:
895                         case 0x1A:                              // end of file
896                             error(ERR_BAD_HTML_COMMENT);
897                             t.value = TOKeof;
898                             return;
899 
900                         default:
901                             continue;
902                         }
903                         break;
904                     }
905                     p++;
906                     continue;
907                 }
908                 else
909                     t.value = TOKless;
910                 return;
911 
912             case '>':
913                 p++;
914                 c = *p;
915                 if(c == '=')
916                 {
917                     p++;
918                     t.value = TOKgreaterequal;
919                 }
920                 else if(c == '>')
921                 {
922                     p++;
923                     c = *p;
924                     if(c == '=')
925                     {
926                         p++;
927                         t.value = TOKshiftrightass;
928                     }
929                     else if(c == '>')
930                     {
931                         p++;
932                         c = *p;
933                         if(c == '=')
934                         {
935                             p++;
936                             t.value = TOKushiftrightass;
937                         }
938                         else
939                             t.value = TOKushiftright;
940                     }
941                     else
942                         t.value = TOKshiftright;
943                 }
944                 else
945                     t.value = TOKgreater;
946                 return;
947 
948             case '(': p++; t.value = TOKlparen;    return;
949             case ')': p++; t.value = TOKrparen;    return;
950             case '[': p++; t.value = TOKlbracket;  return;
951             case ']': p++; t.value = TOKrbracket;  return;
952             case '{': p++; t.value = TOKlbrace;    return;
953             case '}': p++; t.value = TOKrbrace;    return;
954             case '~': p++; t.value = TOKtilde;     return;
955             case '?': p++; t.value = TOKquestion;  return;
956             case ',': p++; t.value = TOKcomma;     return;
957             case ';': p++; t.value = TOKsemicolon; return;
958             case ':': p++; t.value = TOKcolon;     return;
959 
960             case '*':
961                 p++;
962                 c = *p;
963                 if(c == '=')
964                 {
965                     p++;
966                     t.value = TOKmultiplyass;
967                 }
968                 else
969                     t.value = TOKmultiply;
970                 return;
971 
972             case '%':
973                 p++;
974                 c = *p;
975                 if(c == '=')
976                 {
977                     p++;
978                     t.value = TOKpercentass;
979                 }
980                 else
981                     t.value = TOKpercent;
982                 return;
983 
984             case '^':
985                 p++;
986                 c = *p;
987                 if(c == '=')
988                 {
989                     p++;
990                     t.value = TOKxorass;
991                 }
992                 else
993                     t.value = TOKxor;
994                 return;
995 
996             case '=':
997                 p++;
998                 c = *p;
999                 if(c == '=')
1000                 {
1001                     p++;
1002                     c = *p;
1003                     if(c == '=')
1004                     {
1005                         p++;
1006                         t.value = TOKidentity;
1007                     }
1008                     else
1009                         t.value = TOKequal;
1010                 }
1011                 else
1012                     t.value = TOKassign;
1013                 return;
1014 
1015             case '!':
1016                 p++;
1017                 c = *p;
1018                 if(c == '=')
1019                 {
1020                     p++;
1021                     c = *p;
1022                     if(c == '=')
1023                     {
1024                         p++;
1025                         t.value = TOKnonidentity;
1026                     }
1027                     else
1028                         t.value = TOKnotequal;
1029                 }
1030                 else
1031                     t.value = TOKnot;
1032                 return;
1033 
1034             case '\\':
1035                 if(p[1] == 'u')
1036                 {
1037                     // \uXXXX starts an identifier
1038                     goto Lidentifier2;
1039                 }
1040                 goto default;
1041             default:
1042                 d = get(p);
1043                 if(d >= 0x80 && std.uni.isAlpha(d))
1044                     goto Lidentifier;
1045                 else if(isStrWhiteSpaceChar(d))
1046                 {
1047                     p = inc(p);            //also skip unicode whitespace
1048                     continue;
1049                 }
1050                 else
1051                 {
1052                     if(std.ascii.isPrintable(d))
1053                         error(errmsgtbl[ERR_BAD_CHAR_C], d);
1054                     else
1055                         error(errmsgtbl[ERR_BAD_CHAR_X], d);
1056                 }
1057                 continue;
1058             }
1059         }
1060     }
1061 
1062     /*******************************************
1063      * Parse escape sequence.
1064      */
1065 
1066     dchar escapeSequence()
1067     {
1068         uint c;
1069         int n;
1070 
1071         c = *p;
1072         p++;
1073         switch(c)
1074         {
1075         case '\'':
1076         case '"':
1077         case '?':
1078         case '\\':
1079             break;
1080         case 'a':
1081             c = 7;
1082             break;
1083         case 'b':
1084             c = 8;
1085             break;
1086         case 'f':
1087             c = 12;
1088             break;
1089         case 'n':
1090             c = 10;
1091             break;
1092         case 'r':
1093             c = 13;
1094             break;
1095         case 't':
1096             c = 9;
1097             break;
1098 
1099         case 'v':
1100             version(JSCRIPT_ESCAPEV_BUG)
1101             {
1102             }
1103             else
1104             {
1105                 c = 11;
1106             }
1107             break;
1108 
1109         case 'x':
1110             c = *p;
1111             p++;
1112             if(ishex(c))
1113             {
1114                 uint v;
1115 
1116                 n = 0;
1117                 v = 0;
1118                 for(;; )
1119                 {
1120                     if(std.ascii.isDigit(c))
1121                         c -= '0';
1122                     else if(std.ascii.isLower(c))
1123                         c -= 'a' - 10;
1124                     else            // 'A' <= c && c <= 'Z'
1125                         c -= 'A' - 10;
1126                     v = v * 16 + c;
1127                     c = *p;
1128                     if(++n >= 2 || !ishex(c))
1129                         break;
1130                     p++;
1131                 }
1132                 if(n == 1)
1133                     error(ERR_BAD_HEX_SEQUENCE);
1134                 c = v;
1135             }
1136             else
1137                 error(errmsgtbl[ERR_UNDEFINED_ESC_SEQUENCE], c);
1138             break;
1139 
1140         default:
1141             if(c > 0x7F)
1142             {
1143                 p--;
1144                 c = get(p);
1145                 p = inc(p);
1146             }
1147             if(isoctal(c))
1148             {
1149                 uint v;
1150 
1151                 n = 0;
1152                 v = 0;
1153                 for(;; )
1154                 {
1155                     v = v * 8 + (c - '0');
1156                     c = *p;
1157                     if(++n >= 3 || !isoctal(c))
1158                         break;
1159                     p++;
1160                 }
1161                 c = v;
1162             }
1163             // Don't throw error, just accept it
1164             //error("undefined escape sequence \\%c\n",c);
1165             break;
1166         }
1167         return c;
1168     }
1169 
1170     /**************************************
1171      */
1172 
1173     d_string string(tchar quote)
1174     {
1175         tchar c;
1176         dchar d;
1177         d_string stringbuffer;
1178 
1179         //printf("Lexer.string('%c')\n", quote);
1180         p++;
1181         for(;; )
1182         {
1183             c = *p;
1184             switch(c)
1185             {
1186             case '"':
1187             case '\'':
1188                 p++;
1189                 if(c == quote)
1190                     return stringbuffer;
1191                 break;
1192 
1193             case '\\':
1194                 p++;
1195                 if(*p == 'u')
1196                     d = unicode();
1197                 else
1198                     d = escapeSequence();
1199                 dmdscript.utf.encode(stringbuffer, d);
1200                 continue;
1201 
1202             case '\n':
1203             case '\r':
1204                 p++;
1205                 error(errmsgtbl[ERR_STRING_NO_END_QUOTE], quote);
1206                 return null;
1207 
1208             case 0:
1209             case 0x1A:
1210                 error(ERR_UNTERMINATED_STRING);
1211                 return null;
1212 
1213             default:
1214                 p++;
1215                 break;
1216             }
1217             stringbuffer ~= c;
1218         }
1219         assert(0);
1220     }
1221 
1222     /**************************************
1223      * Scan regular expression. Return null with buffer
1224      * pointer intact if it is not a regexp.
1225      */
1226 
1227     d_string regexp()
1228     {
1229         tchar c;
1230         immutable(tchar) * s;
1231         immutable(tchar) * start;
1232 
1233         /*
1234             RegExpLiteral:  RegExpBody RegExpFlags
1235               RegExpFlags:
1236                   empty
1237          |  RegExpFlags ContinuingIdentifierCharacter
1238               RegExpBody:  / RegExpFirstChar RegExpChars /
1239               RegExpFirstChar:
1240                   OrdinaryRegExpFirstChar
1241          |  \ NonTerminator
1242               OrdinaryRegExpFirstChar:  NonTerminator except \ | / | *
1243               RegExpChars:
1244                   empty
1245          |  RegExpChars RegExpChar
1246               RegExpChar:
1247                   OrdinaryRegExpChar
1248          |  \ NonTerminator
1249               OrdinaryRegExpChar: NonTerminator except \ | /
1250          */
1251 
1252         //writefln("Lexer.regexp()\n");
1253         start = p - 1;
1254         s = p;
1255 
1256         // Do RegExpBody
1257         for(;; )
1258         {
1259             c = *s;
1260             s++;
1261             switch(c)
1262             {
1263             case '\\':
1264                 if(s == p)
1265                     return null;
1266                 c = *s;
1267                 switch(c)
1268                 {
1269                 case '\r':
1270                 case '\n':                      // new line
1271                 case 0:                         // end of file
1272                 case 0x1A:                      // end of file
1273                     return null;                // not a regexp
1274                 default:
1275                     break;
1276                 }
1277                 s++;
1278                 continue;
1279 
1280             case '/':
1281                 if(s == p + 1)
1282                     return null;
1283                 break;
1284 
1285             case '\r':
1286             case '\n':                          // new line
1287             case 0:                             // end of file
1288             case 0x1A:                          // end of file
1289                 return null;                    // not a regexp
1290 
1291             case '*':
1292                 if(s == p + 1)
1293                     return null;
1294                 goto default;
1295             default:
1296                 continue;
1297             }
1298             break;
1299         }
1300 
1301         // Do RegExpFlags
1302         for(;; )
1303         {
1304             c = *s;
1305             if(std.ascii.isAlphaNum(c) || c == '_' || c == '$')
1306             {
1307                 s++;
1308             }
1309             else
1310                 break;
1311         }
1312 
1313         // Finish pattern & return it
1314         p = s;
1315         return start[0 .. s - start].idup;
1316     }
1317 
1318     /***************************************
1319      */
1320 
1321     dchar unicode()
1322     {
1323         dchar value;
1324         uint n;
1325         dchar c;
1326 
1327         value = 0;
1328         p++;
1329         for(n = 0; n < 4; n++)
1330         {
1331             c = *p;
1332             if(!ishex(c))
1333             {
1334                 error(ERR_BAD_U_SEQUENCE);
1335                 break;
1336             }
1337             p++;
1338             if(std.ascii.isDigit(c))
1339                 c -= '0';
1340             else if(isasciilower(c))
1341                 c -= 'a' - 10;
1342             else    // 'A' <= c && c <= 'Z'
1343                 c -= 'A' - 10;
1344             value <<= 4;
1345             value |= c;
1346         }
1347         return value;
1348     }
1349 
1350     /********************************************
1351      * Read a number.
1352      */
1353 
1354     TOK number(Token *t)
1355     {
1356         immutable(tchar) * start;
1357         number_t intvalue;
1358         real realvalue;
1359         int base = 10;
1360         tchar c;
1361 
1362         start = p;
1363         for(;; )
1364         {
1365             c = *p;
1366             p++;
1367             switch(c)
1368             {
1369             case '0':
1370                 // ECMA grammar implies that numbers with leading 0
1371                 // like 015 are illegal. But other scripts allow them.
1372                 if(p - start == 1)              // if leading 0
1373                     base = 8;
1374                 goto case;
1375             case '1': case '2': case '3': case '4': case '5':
1376             case '6': case '7':
1377                 break;
1378 
1379             case '8': case '9':                         // decimal digits
1380                 if(base == 8)                           // and octal base
1381                     base = 10;                          // means back to decimal base
1382                 break;
1383 
1384             default:
1385                 p--;
1386                 Lnumber:
1387                 if(base == 0)
1388                     base = 10;
1389                 intvalue = 0;
1390                 foreach(tchar v; start[0 .. p - start])
1391                 {
1392                     if('0' <= v && v <= '9')
1393                         v -= '0';
1394                     else if('a' <= v && v <= 'f')
1395                         v -= ('a' - 10);
1396                     else if('A' <= v && v <= 'F')
1397                         v -= ('A' - 10);
1398                     else
1399                         assert(0);
1400                     assert(v < base);
1401                     if((number_t.max - v) / base < intvalue)
1402                     {
1403                         realvalue = 0;
1404                         foreach(tchar w; start[0 .. p - start])
1405                         {
1406                             if('0' <= w && w <= '9')
1407                                 w -= '0';
1408                             else if('a' <= w && w <= 'f')
1409                                 w -= ('a' - 10);
1410                             else if('A' <= w && w <= 'F')
1411                                 w -= ('A' - 10);
1412                             else
1413                                 assert(0);
1414                             realvalue *= base;
1415                             realvalue += v;
1416                         }
1417                         t.realvalue = realvalue;
1418                         return TOKreal;
1419                     }
1420                     intvalue *= base;
1421                     intvalue += v;
1422                 }
1423                 t.realvalue = cast(double)intvalue;
1424                 return TOKreal;
1425 
1426             case 'x':
1427             case 'X':
1428                 if(p - start != 2 || !ishex(*p))
1429                     goto Lerr;
1430                 do
1431                     p++;
1432                 while(ishex(*p));
1433                 start += 2;
1434                 base = 16;
1435                 goto Lnumber;
1436 
1437             case '.':
1438                 while(std.ascii.isDigit(*p))
1439                     p++;
1440                 if(*p == 'e' || *p == 'E')
1441                 {
1442                     p++;
1443                     goto Lexponent;
1444                 }
1445                 goto Ldouble;
1446 
1447             case 'e':
1448             case 'E':
1449                 Lexponent:
1450                 if(*p == '+' || *p == '-')
1451                     p++;
1452                 if(!std.ascii.isDigit(*p))
1453                     goto Lerr;
1454                 do
1455                     p++;
1456                 while(std.ascii.isDigit(*p));
1457                 goto Ldouble;
1458 
1459                 Ldouble:
1460                 // convert double
1461                 realvalue = core.stdc.stdlib.strtod(toStringz(start[0 .. p - start]), null);
1462                 t.realvalue = realvalue;
1463                 return TOKreal;
1464             }
1465         }
1466 
1467         Lerr:
1468         error(ERR_UNRECOGNIZED_N_LITERAL);
1469         return TOKeof;
1470     }
1471 
1472     static TOK isKeyword(const (tchar)[] s)
1473     {
1474         if(s[0] >= 'a' && s[0] <= 'w')
1475             switch(s.length)
1476             {
1477             case 2:
1478                 if(s[0] == 'i')
1479                 {
1480                     if(s[1] == 'f')
1481                         return TOKif;
1482                     if(s[1] == 'n')
1483                         return TOKin;
1484                 }
1485                 else if(s[0] == 'd' && s[1] == 'o')
1486                     return TOKdo;
1487                 break;
1488 
1489             case 3:
1490                 switch(s[0])
1491                 {
1492                 case 'f':
1493                     if(s[1] == 'o' && s[2] == 'r')
1494                         return TOKfor;
1495                     break;
1496                 case 'i':
1497                     if(s[1] == 'n' && s[2] == 't')
1498                         return TOKint;
1499                     break;
1500                 case 'n':
1501                     if(s[1] == 'e' && s[2] == 'w')
1502                         return TOKnew;
1503                     break;
1504                 case 't':
1505                     if(s[1] == 'r' && s[2] == 'y')
1506                         return TOKtry;
1507                     break;
1508                 case 'v':
1509                     if(s[1] == 'a' && s[2] == 'r')
1510                         return TOKvar;
1511                     break;
1512                 default:
1513                     break;
1514                 }
1515                 break;
1516 
1517             case 4:
1518                 switch(s[0])
1519                 {
1520                 case 'b':
1521                     if(s[1] == 'y' && s[2] == 't' && s[3] == 'e')
1522                         return TOKbyte;
1523                     break;
1524                 case 'c':
1525                     if(s[1] == 'a' && s[2] == 's' && s[3] == 'e')
1526                         return TOKcase;
1527                     if(s[1] == 'h' && s[2] == 'a' && s[3] == 'r')
1528                         return TOKchar;
1529                     break;
1530                 case 'e':
1531                     if(s[1] == 'l' && s[2] == 's' && s[3] == 'e')
1532                         return TOKelse;
1533                     if(s[1] == 'n' && s[2] == 'u' && s[3] == 'm')
1534                         return TOKenum;
1535                     break;
1536                 case 'g':
1537                     if(s[1] == 'o' && s[2] == 't' && s[3] == 'o')
1538                         return TOKgoto;
1539                     break;
1540                 case 'l':
1541                     if(s[1] == 'o' && s[2] == 'n' && s[3] == 'g')
1542                         return TOKlong;
1543                     break;
1544                 case 'n':
1545                     if(s[1] == 'u' && s[2] == 'l' && s[3] == 'l')
1546                         return TOKnull;
1547                     break;
1548                 case 't':
1549                     if(s[1] == 'h' && s[2] == 'i' && s[3] == 's')
1550                         return TOKthis;
1551                     if(s[1] == 'r' && s[2] == 'u' && s[3] == 'e')
1552                         return TOKtrue;
1553                     break;
1554                 case 'w':
1555                     if(s[1] == 'i' && s[2] == 't' && s[3] == 'h')
1556                         return TOKwith;
1557                     break;
1558                 case 'v':
1559                     if(s[1] == 'o' && s[2] == 'i' && s[3] == 'd')
1560                         return TOKvoid;
1561                     break;
1562                 default:
1563                     break;
1564                 }
1565                 break;
1566 
1567             case 5:
1568                 switch(s)
1569                 {
1570                 case "break":               return TOKbreak;
1571                 case "catch":               return TOKcatch;
1572                 case "class":               return TOKclass;
1573                 case "const":               return TOKconst;
1574                 case "false":               return TOKfalse;
1575                 case "final":               return TOKfinal;
1576                 case "float":               return TOKfloat;
1577                 case "short":               return TOKshort;
1578                 case "super":               return TOKsuper;
1579                 case "throw":               return TOKthrow;
1580                 case "while":               return TOKwhile;
1581                 default:
1582                     break;
1583                 }
1584                 break;
1585 
1586             case 6:
1587                 switch(s)
1588                 {
1589                 case "delete":              return TOKdelete;
1590                 case "double":              return TOKdouble;
1591                 case "export":              return TOKexport;
1592                 case "import":              return TOKimport;
1593                 case "native":              return TOKnative;
1594                 case "public":              return TOKpublic;
1595                 case "return":              return TOKreturn;
1596                 case "static":              return TOKstatic;
1597                 case "switch":              return TOKswitch;
1598                 case "typeof":              return TOKtypeof;
1599                 default:
1600                     break;
1601                 }
1602                 break;
1603 
1604             case 7:
1605                 switch(s)
1606                 {
1607                 case "boolean":             return TOKboolean;
1608                 case "default":             return TOKdefault;
1609                 case "extends":             return TOKextends;
1610                 case "finally":             return TOKfinally;
1611                 case "package":             return TOKpackage;
1612                 case "private":             return TOKprivate;
1613                 default:
1614                     break;
1615                 }
1616                 break;
1617 
1618             case 8:
1619                 switch(s)
1620                 {
1621                 case "abstract":    return TOKabstract;
1622                 case "continue":    return TOKcontinue;
1623                 case "debugger":    return TOKdebugger;
1624                 case "function":    return TOKfunction;
1625                 default:
1626                     break;
1627                 }
1628                 break;
1629 
1630             case 9:
1631                 switch(s)
1632                 {
1633                 case "interface":   return TOKinterface;
1634                 case "protected":   return TOKprotected;
1635                 case "transient":   return TOKtransient;
1636                 default:
1637                     break;
1638                 }
1639                 break;
1640 
1641             case 10:
1642                 switch(s)
1643                 {
1644                 case "implements":  return TOKimplements;
1645                 case "instanceof":  return TOKinstanceof;
1646                 default:
1647                     break;
1648                 }
1649                 break;
1650 
1651             case 12:
1652                 if(s == "synchronized")
1653                     return TOKsynchronized;
1654                 break;
1655 
1656             default:
1657                 break;
1658             }
1659         return TOKreserved;             // not a keyword
1660     }
1661 }
1662 
1663 
1664 /****************************************
1665  */
1666 
1667 struct Keyword
1668 {
1669     string name;
1670     TOK    value;
1671 }
1672 
1673 static immutable Keyword[] keywords =
1674 [
1675 //    {	"",		TOK		},
1676 
1677     { "break", TOKbreak },
1678     { "case", TOKcase },
1679     { "continue", TOKcontinue },
1680     { "default", TOKdefault },
1681     { "delete", TOKdelete },
1682     { "do", TOKdo },
1683     { "else", TOKelse },
1684     { "export", TOKexport },
1685     { "false", TOKfalse },
1686     { "for", TOKfor },
1687     { "function", TOKfunction },
1688     { "if", TOKif },
1689     { "import", TOKimport },
1690     { "in", TOKin },
1691     { "new", TOKnew },
1692     { "null", TOKnull },
1693     { "return", TOKreturn },
1694     { "switch", TOKswitch },
1695     { "this", TOKthis },
1696     { "true", TOKtrue },
1697     { "typeof", TOKtypeof },
1698     { "var", TOKvar },
1699     { "void", TOKvoid },
1700     { "while", TOKwhile },
1701     { "with", TOKwith },
1702 
1703     { "catch", TOKcatch },
1704     { "class", TOKclass },
1705     { "const", TOKconst },
1706     { "debugger", TOKdebugger },
1707     { "enum", TOKenum },
1708     { "extends", TOKextends },
1709     { "finally", TOKfinally },
1710     { "super", TOKsuper },
1711     { "throw", TOKthrow },
1712     { "try", TOKtry },
1713 
1714     { "abstract", TOKabstract },
1715     { "boolean", TOKboolean },
1716     { "byte", TOKbyte },
1717     { "char", TOKchar },
1718     { "double", TOKdouble },
1719     { "final", TOKfinal },
1720     { "float", TOKfloat },
1721     { "goto", TOKgoto },
1722     { "implements", TOKimplements },
1723     { "instanceof", TOKinstanceof },
1724     { "int", TOKint },
1725     { "interface", TOKinterface },
1726     { "long", TOKlong },
1727     { "native", TOKnative },
1728     { "package", TOKpackage },
1729     { "private", TOKprivate },
1730     { "protected", TOKprotected },
1731     { "public", TOKpublic },
1732     { "short", TOKshort },
1733     { "static", TOKstatic },
1734     { "synchronized", TOKsynchronized },
1735     { "transient", TOKtransient },
1736 ];
1737 
1738 void init()
1739 {
1740     uint u;
1741     TOK v;
1742 
1743     for(u = 0; u < keywords.length; u++)
1744     {
1745         d_string s;
1746 
1747         //writefln("keyword[%d] = '%s'", u, keywords[u].name);
1748         s = keywords[u].name;
1749         v = keywords[u].value;
1750 
1751         //writefln("tochars[%d] = '%s'", v, s);
1752         Token.tochars[v] = s;
1753     }
1754 
1755     Token.tochars[TOKreserved] = "reserved";
1756     Token.tochars[TOKeof] = "EOF";
1757     Token.tochars[TOKlbrace] = "{";
1758     Token.tochars[TOKrbrace] = "}";
1759     Token.tochars[TOKlparen] = "(";
1760     Token.tochars[TOKrparen] = "";
1761     Token.tochars[TOKlbracket] = "[";
1762     Token.tochars[TOKrbracket] = "]";
1763     Token.tochars[TOKcolon] = ":";
1764     Token.tochars[TOKsemicolon] = ";";
1765     Token.tochars[TOKcomma] = ",";
1766     Token.tochars[TOKor] = "|";
1767     Token.tochars[TOKorass] = "|=";
1768     Token.tochars[TOKxor] = "^";
1769     Token.tochars[TOKxorass] = "^=";
1770     Token.tochars[TOKassign] = "=";
1771     Token.tochars[TOKless] = "<";
1772     Token.tochars[TOKgreater] = ">";
1773     Token.tochars[TOKlessequal] = "<=";
1774     Token.tochars[TOKgreaterequal] = ">=";
1775     Token.tochars[TOKequal] = "==";
1776     Token.tochars[TOKnotequal] = "!=";
1777     Token.tochars[TOKidentity] = "===";
1778     Token.tochars[TOKnonidentity] = "!==";
1779     Token.tochars[TOKshiftleft] = "<<";
1780     Token.tochars[TOKshiftright] = ">>";
1781     Token.tochars[TOKushiftright] = ">>>";
1782     Token.tochars[TOKplus] = "+";
1783     Token.tochars[TOKplusass] = "+=";
1784     Token.tochars[TOKminus] = "-";
1785     Token.tochars[TOKminusass] = "-=";
1786     Token.tochars[TOKmultiply] = "*";
1787     Token.tochars[TOKmultiplyass] = "*=";
1788     Token.tochars[TOKdivide] = "/";
1789     Token.tochars[TOKdivideass] = "/=";
1790     Token.tochars[TOKpercent] = "%";
1791     Token.tochars[TOKpercentass] = "%=";
1792     Token.tochars[TOKand] = "&";
1793     Token.tochars[TOKandass] = "&=";
1794     Token.tochars[TOKdot] = ".";
1795     Token.tochars[TOKquestion] = "?";
1796     Token.tochars[TOKtilde] = "~";
1797     Token.tochars[TOKnot] = "!";
1798     Token.tochars[TOKandand] = "&&";
1799     Token.tochars[TOKoror] = "||";
1800     Token.tochars[TOKplusplus] = "++";
1801     Token.tochars[TOKminusminus] = "--";
1802     Token.tochars[TOKcall] = "CALL";
1803 
1804     Lexer.inited = true;
1805 }
1806