dmdscript.lexer source code

1 /* Digital Mars DMDScript source code.
2  * Copyright (c) 2000-2002 by Chromium Communications
3  * D version Copyright (c) 2004-2010 by Digital Mars
4  * Distributed under the Boost Software License, Version 1.0.
5  * (See accompanying file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
6  * written by Walter Bright
7  * http://www.digitalmars.com
8  *
9  * D2 port by Dmitry Olshansky 
10  *
11  * DMDScript is implemented in the D Programming Language,
12  * http://www.digitalmars.com/d/
13  *
14  * For a C++ implementation of DMDScript, including COM support, see
15  * http://www.digitalmars.com/dscript/cppscript.html
16  */
17 
18 /* Lexical Analyzer
19  */
20 
21 module dmdscript.lexer;
22 
23 import std.range;
24 import std.algorithm;
25 import std.stdio;
26 import std..string;
27 import std.utf;
28 import std.outbuffer;
29 import std.ascii;
30 import std.c.stdlib;
31 
32 import dmdscript.script;
33 import dmdscript.text;
34 import dmdscript.identifier;
35 import dmdscript.scopex;
36 import dmdscript.errmsgs;
37 import dmdscript.utf;
38 
39 /* Tokens:
40         (	)
41         [	]
42         {	}
43         <	>	<=	>=	==	!=
44         ===     !==
45         <<	>>	<<=	>>=	>>>	>>>=
46  +	-	+=	-=
47  *	/	%	*=	/=	%=
48         &	|   ^	&=	|=	^=
49         =	!	~
50  ++	--
51         .	:	,
52         ?	&&	||
53  */
54 
55 alias int TOK;
56 
57 enum
58 {
59     TOKreserved,
60 
61     // Other
62     TOKlparen, TOKrparen,
63     TOKlbracket, TOKrbracket,
64     TOKlbrace, TOKrbrace,
65     TOKcolon, TOKneg,
66     TOKpos,
67     TOKsemicolon, TOKeof,
68     TOKarray, TOKcall,
69     TOKarraylit, TOKobjectlit,
70     TOKcomma, TOKassert,
71 
72     // Operators
73     TOKless, TOKgreater,
74     TOKlessequal, TOKgreaterequal,
75     TOKequal, TOKnotequal,
76     TOKidentity, TOKnonidentity,
77     TOKshiftleft, TOKshiftright,
78     TOKshiftleftass, TOKshiftrightass,
79     TOKushiftright, TOKushiftrightass,
80     TOKplus, TOKminus, TOKplusass, TOKminusass,
81     TOKmultiply, TOKdivide, TOKpercent,
82     TOKmultiplyass, TOKdivideass, TOKpercentass,
83     TOKand, TOKor, TOKxor,
84     TOKandass, TOKorass, TOKxorass,
85     TOKassign, TOKnot, TOKtilde,
86     TOKplusplus, TOKminusminus, TOKdot,
87     TOKquestion, TOKandand, TOKoror,
88 
89     // Leaf operators
90     TOKnumber, TOKidentifier, TOKstring,
91     TOKregexp, TOKreal,
92 
93     // Keywords
94     TOKbreak, TOKcase, TOKcontinue,
95     TOKdefault, TOKdelete, TOKdo,
96     TOKelse, TOKexport, TOKfalse,
97     TOKfor, TOKfunction, TOKif,
98     TOKimport, TOKin, TOKnew,
99     TOKnull, TOKreturn, 
100 	TOKswitch, TOKthis, TOKtrue, 
101 	TOKtypeof, TOKvar, TOKvoid, 
102 	TOKwhile, TOKwith,
103 
104     // Reserved for ECMA extensions
105     TOKcatch, TOKclass,
106     TOKconst, TOKdebugger,
107     TOKenum, TOKextends,
108     TOKfinally, TOKsuper,
109     TOKthrow, TOKtry,
110 
111     // Java keywords reserved for unknown reasons
112     TOKabstract, TOKboolean,
113     TOKbyte, TOKchar,
114     TOKdouble, TOKfinal,
115     TOKfloat, TOKgoto,
116     TOKimplements, TOKinstanceof,
117     TOKint, TOKinterface,
118     TOKlong, TOKnative,
119     TOKpackage, TOKprivate,
120     TOKprotected, TOKpublic,
121     TOKshort, TOKstatic,
122     TOKsynchronized, TOKthrows,
123     TOKtransient,
124 
125     TOKmax
126 };
127 
128 int isoctal(dchar c)
129 {
130     return('0' <= c && c <= '7');
131 }
132 int isasciidigit(dchar c)
133 {
134     return('0' <= c && c <= '9');
135 }
136 int isasciilower(dchar c)
137 {
138     return('a' <= c && c <= 'z');
139 }
140 int isasciiupper(dchar c)
141 {
142     return('A' <= c && c <= 'Z');
143 }
144 int ishex(dchar c)
145 {
146     return
147         ('0' <= c && c <= '9') ||
148         ('a' <= c && c <= 'f') ||
149         ('A' <= c && c <= 'F');
150 }
151 
152 
153 /******************************************************/
154 
155 struct Token
156 {
157     Token *next;
158            immutable(tchar) *ptr;       // pointer to first character of this token within buffer
159     uint   linnum;
160     TOK    value;
161            immutable(tchar) *sawLineTerminator; // where we saw the last line terminator
162     union
163     {
164         number_t    intvalue;
165         real_t      realvalue;
166         d_string    string;
167         Identifier *ident;
168     };
169 
170     static d_string tochars[TOKmax];
171 
172     static Token*   alloc(Lexer* lex)
173     {
174         Token *t;
175 
176         if(lex.freelist)
177         {
178             t = lex.freelist;
179             lex.freelist = t.next;
180             return t;
181         }
182 
183         return new Token();
184     }
185 
186     void print()
187     {
188         writefln(toString());
189     }
190 
191     d_string toString()
192     {
193         d_string p;
194 
195         switch(value)
196         {
197         case TOKnumber:
198             p = std..string.format("%d", intvalue);
199             break;
200 
201         case TOKreal:
202             long l = cast(long)realvalue;
203             if(l == realvalue)
204                 p = std..string.format("%s", l);
205             else
206                 p = std..string.format("%s", realvalue);
207             break;
208 
209         case TOKstring:
210         case TOKregexp:
211             p = string;
212             break;
213 
214         case TOKidentifier:
215             p = ident.toString();
216             break;
217 
218         default:
219             p = toString(value);
220             break;
221         }
222         return p;
223     }
224 
225     static d_string toString(TOK value)
226     {
227         d_string p;
228 
229         p = tochars[value];
230         if(!p)
231             p = std..string.format("TOK%d", value);
232         return p;
233     }
234 }
235 
236 
237 
238 
239 /*******************************************************************/
240 
241 class Lexer
242 {
243     Identifier[d_string] stringtable;
244     Token* freelist;
245 
246     d_string sourcename;        // for error message strings
247 
248     d_string base;              // pointer to start of buffer
249     immutable(char) * end;      // past end of buffer
250     immutable(char) * p;        // current character
251     uint currentline;
252     Token token;
253     OutBuffer stringbuffer;
254     int useStringtable;         // use for Identifiers
255 
256     ErrInfo errinfo;            // syntax error information
257     static bool inited;
258 
259     this(d_string sourcename, d_string base, int useStringtable)
260     {
261         //writefln("Lexer::Lexer(base = '%s')\n",base);
262         if(!inited)
263             init();
264 
265         std.c..string.memset(&token, 0, token.sizeof);
266         this.useStringtable = useStringtable;
267         this.sourcename = sourcename;
268         if(!base.length || (base[$ - 1] != 0 && base[$ - 1] != 0x1A))
269             base ~= cast(tchar)0x1A;
270         this.base = base;
271         this.end = base.ptr + base.length;
272         p = base.ptr;
273         currentline = 1;
274         freelist = null;
275     }
276 
277 
278     ~this()
279     {
280         //writef(L"~Lexer()\n");
281         freelist = null;
282         sourcename = null;
283         base = null;
284         end = null;
285         p = null;
286     }
287 
288     dchar get(immutable(tchar)* p)
289     {
290         size_t idx = p - base.ptr;
291         return std.utf.decode(base, idx);
292     }
293 
294     immutable(tchar) * inc(immutable(tchar) * p)
295     {
296         size_t idx = p - base.ptr;
297         std.utf.decode(base, idx);
298         return base.ptr + idx;
299     }
300 
301     void error(int msgnum)
302     {
303         error(errmsgtbl[msgnum]);
304     }
305 
306     void error(...)
307     {
308         uint linnum = 1;
309         immutable(tchar) * s;
310         immutable(tchar) * slinestart;
311         immutable(tchar) * slineend;
312         d_string buf;
313 
314         //FuncLog funclog(L"Lexer.error()");
315         //writefln("TEXT START ------------\n%ls\nTEXT END ------------------", base);
316 
317         // Find the beginning of the line
318         slinestart = base.ptr;
319         for(s = base.ptr; s != p; s++)
320         {
321             if(*s == '\n')
322             {
323                 linnum++;
324                 slinestart = s + 1;
325             }
326         }
327 
328         // Find the end of the line
329         for(;; )
330         {
331             switch(*s)
332             {
333             case '\n':
334             case 0:
335             case 0x1A:
336                 break;
337             default:
338                 s++;
339                 continue;
340             }
341             break;
342         }
343         slineend = s;
344 
345         buf = std..string.format("%s(%d) : Error: ", sourcename, linnum);
346 
347         void putc(dchar c)
348         {
349             dmdscript.utf.encode(buf, c);
350         }
351 
352         std.format.doFormat(&putc, _arguments, _argptr);
353 
354         if(!errinfo.message)
355         {
356             uint len;
357 
358             errinfo.message = buf;
359             errinfo.linnum = linnum;
360             errinfo.charpos = p - slinestart;
361 
362             len = slineend - slinestart;
363             errinfo.srcline = slinestart[0 .. len];
364         }
365 
366         // Consume input until the end
367         while(*p != 0x1A && *p != 0)
368             p++;
369         token.next = null;              // dump any lookahead
370 
371         version(none)
372         {
373             writefln(errinfo.message);
374             fflush(stdout);
375             exit(EXIT_FAILURE);
376         }
377     }
378 
379     /************************************************
380      * Given source text, convert loc to a string for the corresponding line.
381      */
382 
383     static d_string locToSrcline(immutable(char) *src, Loc loc)
384     {
385         immutable(char) * slinestart;
386         immutable(char) * slineend;
387         immutable(char) * s;
388         uint linnum = 1;
389         uint len;
390 
391         if(!src)
392             return null;
393         slinestart = src;
394         for(s = src;; s++)
395         {
396             switch(*s)
397             {
398             case '\n':
399                 if(linnum == loc)
400                 {
401                     slineend = s;
402                     break;
403                 }
404                 slinestart = s + 1;
405                 linnum++;
406                 continue;
407 
408             case 0:
409             case 0x1A:
410                 slineend = s;
411                 break;
412 
413             default:
414                 continue;
415             }
416             break;
417         }
418 
419         // Remove trailing \r's
420         while(slinestart < slineend && slineend[-1] == '\r')
421             --slineend;
422 
423         len = slineend - slinestart;
424         return slinestart[0 .. len];
425     }
426 
427 
428     TOK nextToken()
429     {
430         Token *t;
431 
432         if(token.next)
433         {
434             t = token.next;
435             token = *t;
436             t.next = freelist;
437             freelist = t;
438         }
439         else
440         {
441             scan(&token);
442         }
443         //token.print();
444         return token.value;
445     }
446 
447     Token *peek(Token *ct)
448     {
449         Token *t;
450 
451         if(ct.next)
452             t = ct.next;
453         else
454         {
455             t = Token.alloc(&this);
456             scan(t);
457             t.next = null;
458             ct.next = t;
459         }
460         return t;
461     }
462 
463     void insertSemicolon(immutable(tchar) *loc)
464     {
465         // Push current token back into the input, and
466         // create a new current token that is a semicolon
467         Token *t;
468 
469         t = Token.alloc(&this);
470         *t = token;
471         token.next = t;
472         token.value = TOKsemicolon;
473         token.ptr = loc;
474         token.sawLineTerminator = null;
475     }
476 
477     /**********************************
478      * Horrible kludge to support disambiguating TOKregexp from TOKdivide.
479      * The idea is, if we are looking for a TOKdivide, and find instead
480      * a TOKregexp, we back up and rescan.
481      */
482 
483     void rescan()
484     {
485         token.next = null;      // no lookahead
486         // should put on freelist
487         p = token.ptr + 1;
488     }
489 
490 
491     /****************************
492      * Turn next token in buffer into a token.
493      */
494 
495     void scan(Token *t)
496     {
497         tchar c;
498         dchar d;
499         d_string id;
500 
501         //writefln("Lexer.scan()");
502         t.sawLineTerminator = null;
503         for(;; )
504         {
505             t.ptr = p;
506             //t.linnum = currentline;
507             //writefln("p = %x",cast(uint)p);
508             //writefln("p = %x, *p = x%02x, '%s'",cast(uint)p,*p,*p);
509             switch(*p)
510             {
511             case 0:
512             case 0x1A:
513                 t.value = TOKeof;               // end of file
514                 return;
515 
516             case ' ':
517             case '\t':
518             case '\v':
519             case '\f':
520             case 0xA0:                          // no-break space
521                 p++;
522                 continue;                       // skip white space
523 
524             case '\n':                          // line terminator
525                 currentline++;
526                 goto case;
527             case '\r':
528                 t.sawLineTerminator = p;
529                 p++;
530                 continue;
531 
532             case '"':
533             case '\'':
534                 t..string = string(*p);
535                 t.value = TOKstring;
536                 return;
537 
538             case '0':       case '1':   case '2':   case '3':   case '4':
539             case '5':       case '6':   case '7':   case '8':   case '9':
540                 t.value = number(t);
541                 return;
542 
543             case 'a':       case 'b':   case 'c':   case 'd':   case 'e':
544             case 'f':       case 'g':   case 'h':   case 'i':   case 'j':
545             case 'k':       case 'l':   case 'm':   case 'n':   case 'o':
546             case 'p':       case 'q':   case 'r':   case 's':   case 't':
547             case 'u':       case 'v':   case 'w':   case 'x':   case 'y':
548             case 'z':
549             case 'A':       case 'B':   case 'C':   case 'D':   case 'E':
550             case 'F':       case 'G':   case 'H':   case 'I':   case 'J':
551             case 'K':       case 'L':   case 'M':   case 'N':   case 'O':
552             case 'P':       case 'Q':   case 'R':   case 'S':   case 'T':
553             case 'U':       case 'V':   case 'W':   case 'X':   case 'Y':
554             case 'Z':
555             case '_':
556             case '$':
557                 Lidentifier:
558                 {
559                   id = null;
560 
561                   static bool isidletter(dchar d)
562                   {
563                       return std.ascii.isAlphaNum(d) || d == '_' || d == '$' || (d >= 0x80 && std.uni.isAlpha(d));
564                   }
565 
566                   do
567                   {
568                       p = inc(p);
569                       d = get(p);
570                       if(d == '\\' && p[1] == 'u')
571                       {
572                           Lidentifier2:
573                           id = t.ptr[0 .. p - t.ptr].idup;
574                           auto ps = p;
575                           p++;
576                           d = unicode();
577                           if(!isidletter(d))
578                           {
579                               p = ps;
580                               break;
581                           }
582                           dmdscript.utf.encode(id, d);
583                           for(;; )
584                           {
585                               d = get(p);
586                               if(d == '\\' && p[1] == 'u')
587                               {
588                                   auto pstart = p;
589                                   p++;
590                                   d = unicode();
591                                   if(isidletter(d))
592                                       dmdscript.utf.encode(id, d);
593                                   else
594                                   {
595                                       p = pstart;
596                                       goto Lidentifier3;
597                                   }
598                               }
599                               else if(isidletter(d))
600                               {
601                                   dmdscript.utf.encode(id, d);
602                                   p = inc(p);
603                               }
604                               else
605                                   goto Lidentifier3;
606                           }
607                       }
608                   } while(isidletter(d));
609                   id = t.ptr[0 .. p - t.ptr];
610                   Lidentifier3:
611                   //printf("id = '%.*s'\n", id);
612                   t.value = isKeyword(id);
613                   if(t.value)
614                       return;
615                   if(useStringtable)
616                   {     //Identifier* i = &stringtable[id];
617                       Identifier* i = id in stringtable;
618                       if(!i)
619                       {
620                           stringtable[id] = Identifier.init;
621                           i = id in stringtable;
622                       }
623                       i.value.putVstring(id);
624                       i.value.toHash();
625                       t.ident = i;
626                   }
627                   else
628                       t.ident = Identifier.build(id);
629                   t.value = TOKidentifier;
630                   return; }
631 
632             case '/':
633                 p++;
634                 c = *p;
635                 if(c == '=')
636                 {
637                     p++;
638                     t.value = TOKdivideass;
639                     return;
640                 }
641                 else if(c == '*')
642                 {
643                     p++;
644                     for(;; p++)
645                     {
646                         c = *p;
647                         Lcomment:
648                         switch(c)
649                         {
650                         case '*':
651                             p++;
652                             c = *p;
653                             if(c == '/')
654                             {
655                                 p++;
656                                 break;
657                             }
658                             goto Lcomment;
659 
660                         case '\n':
661                             currentline++;
662                             goto case;
663                         case '\r':
664                             t.sawLineTerminator = p;
665                             continue;
666 
667                         case 0:
668                         case 0x1A:
669                             error(ERR_BAD_C_COMMENT);
670                             t.value = TOKeof;
671                             return;
672 
673                         default:
674                             continue;
675                         }
676                         break;
677                     }
678                     continue;
679                 }
680                 else if(c == '/')
681                 {
682                     auto r = p[0..end-p];
683                     uint j;
684                     do{
685                         r.popFront();
686                         j = startsWith(r,'\n','\r','\0',0x1A,'\u2028','\u2029');
687                         
688                     }while(!j);
689                     p = &r[0];
690                     switch(j){
691                         case 1: 
692                             currentline++;
693                             goto case;
694                         case 2: case 5: case 6:
695                             t.sawLineTerminator = p;
696                             break;
697                         case 3: case 4:
698                             t.value = TOKeof;
699                             return;
700                         default:
701                             assert(0);                            
702                     }
703                     p = inc(p);
704                     continue;
705                     /*for(;; )
706                     {
707                         p++;
708                         switch(*p)
709                         {
710                         case '\n':
711                             currentline++;
712                         case '\r':
713                             t.sawLineTerminator = p;
714                             break;
715 
716                         case 0:
717                         case 0x1A:                              // end of file
718                             t.value = TOKeof;
719                             return;
720 
721                         default:
722                             continue;
723                         }
724                         break;
725                     }
726                     p++;
727                     continue;*/
728                 }
729                 else if((t..string = regexp()) != null)
730                     t.value = TOKregexp;
731                 else
732                     t.value = TOKdivide;
733                 return;
734 
735             case '.':
736                 immutable(tchar) * q;
737                 q = p + 1;
738                 c = *q;
739                 if(std.ascii.isDigit(c))
740                     t.value = number(t);
741                 else
742                 {
743                     t.value = TOKdot;
744                     p = q;
745                 }
746                 return;
747 
748             case '&':
749                 p++;
750                 c = *p;
751                 if(c == '=')
752                 {
753                     p++;
754                     t.value = TOKandass;
755                 }
756                 else if(c == '&')
757                 {
758                     p++;
759                     t.value = TOKandand;
760                 }
761                 else
762                     t.value = TOKand;
763                 return;
764 
765             case '|':
766                 p++;
767                 c = *p;
768                 if(c == '=')
769                 {
770                     p++;
771                     t.value = TOKorass;
772                 }
773                 else if(c == '|')
774                 {
775                     p++;
776                     t.value = TOKoror;
777                 }
778                 else
779                     t.value = TOKor;
780                 return;
781 
782             case '-':
783                 p++;
784                 c = *p;
785                 if(c == '=')
786                 {
787                     p++;
788                     t.value = TOKminusass;
789                 }
790                 else if(c == '-')
791                 {
792                     p++;
793 
794                     // If the last token in the file is -. then
795                     // treat it as EOF. This is to accept broken
796                     // scripts that forgot to protect the closing -.
797                     // with a // comment.
798                     if(*p == '>')
799                     {
800                         // Scan ahead to see if it's the last token
801                         immutable(tchar) * q;
802 
803                         q = p;
804                         for(;; )
805                         {
806                             switch(*++q)
807                             {
808                             case 0:
809                             case 0x1A:
810                                 t.value = TOKeof;
811                                 p = q;
812                                 return;
813 
814                             case ' ':
815                             case '\t':
816                             case '\v':
817                             case '\f':
818                             case '\n':
819                             case '\r':
820                             case 0xA0:                  // no-break space
821                                 continue;
822 
823                             default:
824                                 assert(0);
825                             }
826                         }
827                     }
828                     t.value = TOKminusminus;
829                 }
830                 else
831                     t.value = TOKminus;
832                 return;
833 
834             case '+':
835                 p++;
836                 c = *p;
837                 if(c == '=')
838                 {
839                     p++;
840                     t.value = TOKplusass;
841                 }
842                 else if(c == '+')
843                 {
844                     p++;
845                     t.value = TOKplusplus;
846                 }
847                 else
848                     t.value = TOKplus;
849                 return;
850 
851             case '<':
852                 p++;
853                 c = *p;
854                 if(c == '=')
855                 {
856                     p++;
857                     t.value = TOKlessequal;
858                 }
859                 else if(c == '<')
860                 {
861                     p++;
862                     c = *p;
863                     if(c == '=')
864                     {
865                         p++;
866                         t.value = TOKshiftleftass;
867                     }
868                     else
869                         t.value = TOKshiftleft;
870                 }
871                 else if(c == '!' && p[1] == '-' && p[2] == '-')
872                 {       // Special comment to end of line
873                     p += 2;
874                     for(;; )
875                     {
876                         p++;
877                         switch(*p)
878                         {
879                         case '\n':
880                             currentline++;
881                             goto case;
882                         case '\r':
883                             t.sawLineTerminator = p;
884                             break;
885 
886                         case 0:
887                         case 0x1A:                              // end of file
888                             error(ERR_BAD_HTML_COMMENT);
889                             t.value = TOKeof;
890                             return;
891 
892                         default:
893                             continue;
894                         }
895                         break;
896                     }
897                     p++;
898                     continue;
899                 }
900                 else
901                     t.value = TOKless;
902                 return;
903 
904             case '>':
905                 p++;
906                 c = *p;
907                 if(c == '=')
908                 {
909                     p++;
910                     t.value = TOKgreaterequal;
911                 }
912                 else if(c == '>')
913                 {
914                     p++;
915                     c = *p;
916                     if(c == '=')
917                     {
918                         p++;
919                         t.value = TOKshiftrightass;
920                     }
921                     else if(c == '>')
922                     {
923                         p++;
924                         c = *p;
925                         if(c == '=')
926                         {
927                             p++;
928                             t.value = TOKushiftrightass;
929                         }
930                         else
931                             t.value = TOKushiftright;
932                     }
933                     else
934                         t.value = TOKshiftright;
935                 }
936                 else
937                     t.value = TOKgreater;
938                 return;
939 
940             case '(': p++; t.value = TOKlparen;    return;
941             case ')': p++; t.value = TOKrparen;    return;
942             case '[': p++; t.value = TOKlbracket;  return;
943             case ']': p++; t.value = TOKrbracket;  return;
944             case '{': p++; t.value = TOKlbrace;    return;
945             case '}': p++; t.value = TOKrbrace;    return;
946             case '~': p++; t.value = TOKtilde;     return;
947             case '?': p++; t.value = TOKquestion;  return;
948             case ',': p++; t.value = TOKcomma;     return;
949             case ';': p++; t.value = TOKsemicolon; return;
950             case ':': p++; t.value = TOKcolon;     return;
951 
952             case '*':
953                 p++;
954                 c = *p;
955                 if(c == '=')
956                 {
957                     p++;
958                     t.value = TOKmultiplyass;
959                 }
960                 else
961                     t.value = TOKmultiply;
962                 return;
963 
964             case '%':
965                 p++;
966                 c = *p;
967                 if(c == '=')
968                 {
969                     p++;
970                     t.value = TOKpercentass;
971                 }
972                 else
973                     t.value = TOKpercent;
974                 return;
975 
976             case '^':
977                 p++;
978                 c = *p;
979                 if(c == '=')
980                 {
981                     p++;
982                     t.value = TOKxorass;
983                 }
984                 else
985                     t.value = TOKxor;
986                 return;
987 
988             case '=':
989                 p++;
990                 c = *p;
991                 if(c == '=')
992                 {
993                     p++;
994                     c = *p;
995                     if(c == '=')
996                     {
997                         p++;
998                         t.value = TOKidentity;
999                     }
1000                     else
1001                         t.value = TOKequal;
1002                 }
1003                 else
1004                     t.value = TOKassign;
1005                 return;
1006 
1007             case '!':
1008                 p++;
1009                 c = *p;
1010                 if(c == '=')
1011                 {
1012                     p++;
1013                     c = *p;
1014                     if(c == '=')
1015                     {
1016                         p++;
1017                         t.value = TOKnonidentity;
1018                     }
1019                     else
1020                         t.value = TOKnotequal;
1021                 }
1022                 else
1023                     t.value = TOKnot;
1024                 return;
1025 
1026             case '\\':
1027                 if(p[1] == 'u')
1028                 {
1029                     // \uXXXX starts an identifier
1030                     goto Lidentifier2;
1031                 }
1032                 goto default;
1033             default:
1034                 d = get(p);
1035                 if(d >= 0x80 && std.uni.isAlpha(d))
1036                     goto Lidentifier;
1037                 else if(isStrWhiteSpaceChar(d))
1038                 {
1039                     p = inc(p);            //also skip unicode whitespace
1040                     continue;
1041                 }
1042                 else
1043                 {
1044                     if(std.ascii.isPrintable(d))
1045                         error(errmsgtbl[ERR_BAD_CHAR_C], d);
1046                     else
1047                         error(errmsgtbl[ERR_BAD_CHAR_X], d);
1048                 }
1049                 continue;
1050             }
1051         }
1052     }
1053 
1054     /*******************************************
1055      * Parse escape sequence.
1056      */
1057 
1058     dchar escapeSequence()
1059     {
1060         uint c;
1061         int n;
1062 
1063         c = *p;
1064         p++;
1065         switch(c)
1066         {
1067         case '\'':
1068         case '"':
1069         case '?':
1070         case '\\':
1071             break;
1072         case 'a':
1073             c = 7;
1074             break;
1075         case 'b':
1076             c = 8;
1077             break;
1078         case 'f':
1079             c = 12;
1080             break;
1081         case 'n':
1082             c = 10;
1083             break;
1084         case 'r':
1085             c = 13;
1086             break;
1087         case 't':
1088             c = 9;
1089             break;
1090 
1091         case 'v':
1092             version(JSCRIPT_ESCAPEV_BUG)
1093             {
1094             }
1095             else
1096             {
1097                 c = 11;
1098             }
1099             break;
1100 
1101         case 'x':
1102             c = *p;
1103             p++;
1104             if(ishex(c))
1105             {
1106                 uint v;
1107 
1108                 n = 0;
1109                 v = 0;
1110                 for(;; )
1111                 {
1112                     if(std.ascii.isDigit(c))
1113                         c -= '0';
1114                     else if(std.ascii.isLower(c))
1115                         c -= 'a' - 10;
1116                     else            // 'A' <= c && c <= 'Z'
1117                         c -= 'A' - 10;
1118                     v = v * 16 + c;
1119                     c = *p;
1120                     if(++n >= 2 || !ishex(c))
1121                         break;
1122                     p++;
1123                 }
1124                 if(n == 1)
1125                     error(ERR_BAD_HEX_SEQUENCE);
1126                 c = v;
1127             }
1128             else
1129                 error(errmsgtbl[ERR_UNDEFINED_ESC_SEQUENCE], c);
1130             break;
1131 
1132         default:
1133             if(c > 0x7F)
1134             {
1135                 p--;
1136                 c = get(p);
1137                 p = inc(p);
1138             }
1139             if(isoctal(c))
1140             {
1141                 uint v;
1142 
1143                 n = 0;
1144                 v = 0;
1145                 for(;; )
1146                 {
1147                     v = v * 8 + (c - '0');
1148                     c = *p;
1149                     if(++n >= 3 || !isoctal(c))
1150                         break;
1151                     p++;
1152                 }
1153                 c = v;
1154             }
1155             // Don't throw error, just accept it
1156             //error("undefined escape sequence \\%c\n",c);
1157             break;
1158         }
1159         return c;
1160     }
1161 
1162     /**************************************
1163      */
1164 
1165     d_string string(tchar quote)
1166     {
1167         tchar c;
1168         dchar d;
1169         d_string stringbuffer;
1170 
1171         //printf("Lexer.string('%c')\n", quote);
1172         p++;
1173         for(;; )
1174         {
1175             c = *p;
1176             switch(c)
1177             {
1178             case '"':
1179             case '\'':
1180                 p++;
1181                 if(c == quote)
1182                     return stringbuffer;
1183                 break;
1184 
1185             case '\\':
1186                 p++;
1187                 if(*p == 'u')
1188                     d = unicode();
1189                 else
1190                     d = escapeSequence();
1191                 dmdscript.utf.encode(stringbuffer, d);
1192                 continue;
1193 
1194             case '\n':
1195             case '\r':
1196                 p++;
1197                 error(errmsgtbl[ERR_STRING_NO_END_QUOTE], quote);
1198                 return null;
1199 
1200             case 0:
1201             case 0x1A:
1202                 error(ERR_UNTERMINATED_STRING);
1203                 return null;
1204 
1205             default:
1206                 p++;
1207                 break;
1208             }
1209             stringbuffer ~= c;
1210         }
1211         assert(0);
1212     }
1213 
1214     /**************************************
1215      * Scan regular expression. Return null with buffer
1216      * pointer intact if it is not a regexp.
1217      */
1218 
1219     d_string regexp()
1220     {
1221         tchar c;
1222         immutable(tchar) * s;
1223         immutable(tchar) * start;
1224 
1225         /*
1226             RegExpLiteral:  RegExpBody RegExpFlags
1227               RegExpFlags:
1228                   empty
1229          |  RegExpFlags ContinuingIdentifierCharacter
1230               RegExpBody:  / RegExpFirstChar RegExpChars /
1231               RegExpFirstChar:
1232                   OrdinaryRegExpFirstChar
1233          |  \ NonTerminator
1234               OrdinaryRegExpFirstChar:  NonTerminator except \ | / | *
1235               RegExpChars:
1236                   empty
1237          |  RegExpChars RegExpChar
1238               RegExpChar:
1239                   OrdinaryRegExpChar
1240          |  \ NonTerminator
1241               OrdinaryRegExpChar: NonTerminator except \ | /
1242          */
1243 
1244         //writefln("Lexer.regexp()\n");
1245         start = p - 1;
1246         s = p;
1247 
1248         // Do RegExpBody
1249         for(;; )
1250         {
1251             c = *s;
1252             s++;
1253             switch(c)
1254             {
1255             case '\\':
1256                 if(s == p)
1257                     return null;
1258                 c = *s;
1259                 switch(c)
1260                 {
1261                 case '\r':
1262                 case '\n':                      // new line
1263                 case 0:                         // end of file
1264                 case 0x1A:                      // end of file
1265                     return null;                // not a regexp
1266                 default:
1267                     break;
1268                 }
1269                 s++;
1270                 continue;
1271 
1272             case '/':
1273                 if(s == p + 1)
1274                     return null;
1275                 break;
1276 
1277             case '\r':
1278             case '\n':                          // new line
1279             case 0:                             // end of file
1280             case 0x1A:                          // end of file
1281                 return null;                    // not a regexp
1282 
1283             case '*':
1284                 if(s == p + 1)
1285                     return null;
1286                 goto default;
1287             default:
1288                 continue;
1289             }
1290             break;
1291         }
1292 
1293         // Do RegExpFlags
1294         for(;; )
1295         {
1296             c = *s;
1297             if(std.ascii.isAlphaNum(c) || c == '_' || c == '$')
1298             {
1299                 s++;
1300             }
1301             else
1302                 break;
1303         }
1304 
1305         // Finish pattern & return it
1306         p = s;
1307         return start[0 .. s - start].idup;
1308     }
1309 
1310     /***************************************
1311      */
1312 
1313     dchar unicode()
1314     {
1315         dchar value;
1316         uint n;
1317         dchar c;
1318 
1319         value = 0;
1320         p++;
1321         for(n = 0; n < 4; n++)
1322         {
1323             c = *p;
1324             if(!ishex(c))
1325             {
1326                 error(ERR_BAD_U_SEQUENCE);
1327                 break;
1328             }
1329             p++;
1330             if(std.ascii.isDigit(c))
1331                 c -= '0';
1332             else if(isasciilower(c))
1333                 c -= 'a' - 10;
1334             else    // 'A' <= c && c <= 'Z'
1335                 c -= 'A' - 10;
1336             value <<= 4;
1337             value |= c;
1338         }
1339         return value;
1340     }
1341 
1342     /********************************************
1343      * Read a number.
1344      */
1345 
1346     TOK number(Token *t)
1347     {
1348         immutable(tchar) * start;
1349         number_t intvalue;
1350         real realvalue;
1351         int base = 10;
1352         tchar c;
1353 
1354         start = p;
1355         for(;; )
1356         {
1357             c = *p;
1358             p++;
1359             switch(c)
1360             {
1361             case '0':
1362                 // ECMA grammar implies that numbers with leading 0
1363                 // like 015 are illegal. But other scripts allow them.
1364                 if(p - start == 1)              // if leading 0
1365                     base = 8;
1366                 goto case;
1367             case '1': case '2': case '3': case '4': case '5':
1368             case '6': case '7':
1369                 break;
1370 
1371             case '8': case '9':                         // decimal digits
1372                 if(base == 8)                           // and octal base
1373                     base = 10;                          // means back to decimal base
1374                 break;
1375 
1376             default:
1377                 p--;
1378                 Lnumber:
1379                 if(base == 0)
1380                     base = 10;
1381                 intvalue = 0;
1382                 foreach(tchar v; start[0 .. p - start])
1383                 {
1384                     if('0' <= v && v <= '9')
1385                         v -= '0';
1386                     else if('a' <= v && v <= 'f')
1387                         v -= ('a' - 10);
1388                     else if('A' <= v && v <= 'F')
1389                         v -= ('A' - 10);
1390                     else
1391                         assert(0);
1392                     assert(v < base);
1393                     if((number_t.max - v) / base < intvalue)
1394                     {
1395                         realvalue = 0;
1396                         foreach(tchar w; start[0 .. p - start])
1397                         {
1398                             if('0' <= w && w <= '9')
1399                                 w -= '0';
1400                             else if('a' <= w && w <= 'f')
1401                                 w -= ('a' - 10);
1402                             else if('A' <= w && w <= 'F')
1403                                 w -= ('A' - 10);
1404                             else
1405                                 assert(0);
1406                             realvalue *= base;
1407                             realvalue += v;
1408                         }
1409                         t.realvalue = realvalue;
1410                         return TOKreal;
1411                     }
1412                     intvalue *= base;
1413                     intvalue += v;
1414                 }
1415                 t.realvalue = cast(double)intvalue;
1416                 return TOKreal;
1417 
1418             case 'x':
1419             case 'X':
1420                 if(p - start != 2 || !ishex(*p))
1421                     goto Lerr;
1422                 do
1423                     p++;
1424                 while(ishex(*p));
1425                 start += 2;
1426                 base = 16;
1427                 goto Lnumber;
1428 
1429             case '.':
1430                 while(std.ascii.isDigit(*p))
1431                     p++;
1432                 if(*p == 'e' || *p == 'E')
1433                 {
1434                     p++;
1435                     goto Lexponent;
1436                 }
1437                 goto Ldouble;
1438 
1439             case 'e':
1440             case 'E':
1441                 Lexponent:
1442                 if(*p == '+' || *p == '-')
1443                     p++;
1444                 if(!std.ascii.isDigit(*p))
1445                     goto Lerr;
1446                 do
1447                     p++;
1448                 while(std.ascii.isDigit(*p));
1449                 goto Ldouble;
1450 
1451                 Ldouble:
1452                 // convert double
1453                 realvalue = std.c.stdlib.strtod(toStringz(start[0 .. p - start]), null);
1454                 t.realvalue = realvalue;
1455                 return TOKreal;
1456             }
1457         }
1458 
1459         Lerr:
1460         error(ERR_UNRECOGNIZED_N_LITERAL);
1461         return TOKeof;
1462     }
1463 
1464     static TOK isKeyword(const (tchar)[] s)
1465     {
1466         if(s[0] >= 'a' && s[0] <= 'w')
1467             switch(s.length)
1468             {
1469             case 2:
1470                 if(s[0] == 'i')
1471                 {
1472                     if(s[1] == 'f')
1473                         return TOKif;
1474                     if(s[1] == 'n')
1475                         return TOKin;
1476                 }
1477                 else if(s[0] == 'd' && s[1] == 'o')
1478                     return TOKdo;
1479                 break;
1480 
1481             case 3:
1482                 switch(s[0])
1483                 {
1484                 case 'f':
1485                     if(s[1] == 'o' && s[2] == 'r')
1486                         return TOKfor;
1487                     break;
1488                 case 'i':
1489                     if(s[1] == 'n' && s[2] == 't')
1490                         return TOKint;
1491                     break;
1492                 case 'n':
1493                     if(s[1] == 'e' && s[2] == 'w')
1494                         return TOKnew;
1495                     break;
1496                 case 't':
1497                     if(s[1] == 'r' && s[2] == 'y')
1498                         return TOKtry;
1499                     break;
1500                 case 'v':
1501                     if(s[1] == 'a' && s[2] == 'r')
1502                         return TOKvar;
1503                     break;
1504                 default:
1505                     break;
1506                 }
1507                 break;
1508 
1509             case 4:
1510                 switch(s[0])
1511                 {
1512                 case 'b':
1513                     if(s[1] == 'y' && s[2] == 't' && s[3] == 'e')
1514                         return TOKbyte;
1515                     break;
1516                 case 'c':
1517                     if(s[1] == 'a' && s[2] == 's' && s[3] == 'e')
1518                         return TOKcase;
1519                     if(s[1] == 'h' && s[2] == 'a' && s[3] == 'r')
1520                         return TOKchar;
1521                     break;
1522                 case 'e':
1523                     if(s[1] == 'l' && s[2] == 's' && s[3] == 'e')
1524                         return TOKelse;
1525                     if(s[1] == 'n' && s[2] == 'u' && s[3] == 'm')
1526                         return TOKenum;
1527                     break;
1528                 case 'g':
1529                     if(s[1] == 'o' && s[2] == 't' && s[3] == 'o')
1530                         return TOKgoto;
1531                     break;
1532                 case 'l':
1533                     if(s[1] == 'o' && s[2] == 'n' && s[3] == 'g')
1534                         return TOKlong;
1535                     break;
1536                 case 'n':
1537                     if(s[1] == 'u' && s[2] == 'l' && s[3] == 'l')
1538                         return TOKnull;
1539                     break;
1540                 case 't':
1541                     if(s[1] == 'h' && s[2] == 'i' && s[3] == 's')
1542                         return TOKthis;
1543                     if(s[1] == 'r' && s[2] == 'u' && s[3] == 'e')
1544                         return TOKtrue;
1545                     break;
1546                 case 'w':
1547                     if(s[1] == 'i' && s[2] == 't' && s[3] == 'h')
1548                         return TOKwith;
1549                     break;
1550                 case 'v':
1551                     if(s[1] == 'o' && s[2] == 'i' && s[3] == 'd')
1552                         return TOKvoid;
1553                     break;
1554                 default:
1555                     break;
1556                 }
1557                 break;
1558 
1559             case 5:
1560                 switch(s)
1561                 {
1562                 case "break":               return TOKbreak;
1563                 case "catch":               return TOKcatch;
1564                 case "class":               return TOKclass;
1565                 case "const":               return TOKconst;
1566                 case "false":               return TOKfalse;
1567                 case "final":               return TOKfinal;
1568                 case "float":               return TOKfloat;
1569                 case "short":               return TOKshort;
1570                 case "super":               return TOKsuper;
1571                 case "throw":               return TOKthrow;
1572                 case "while":               return TOKwhile;
1573                 default:
1574                     break;
1575                 }
1576                 break;
1577 
1578             case 6:
1579                 switch(s)
1580                 {
1581                 case "delete":              return TOKdelete;
1582                 case "double":              return TOKdouble;
1583                 case "export":              return TOKexport;
1584                 case "import":              return TOKimport;
1585                 case "native":              return TOKnative;
1586                 case "public":              return TOKpublic;
1587                 case "return":              return TOKreturn;
1588                 case "static":              return TOKstatic;
1589                 case "switch":              return TOKswitch;
1590                 case "throws":              return TOKthrows;
1591                 case "typeof":              return TOKtypeof;
1592                 default:
1593                     break;
1594                 }
1595                 break;
1596 
1597             case 7:
1598                 switch(s)
1599                 {
1600                 case "boolean":             return TOKboolean;
1601                 case "default":             return TOKdefault;
1602                 case "extends":             return TOKextends;
1603                 case "finally":             return TOKfinally;
1604                 case "package":             return TOKpackage;
1605                 case "private":             return TOKprivate;
1606                 default:
1607                     break;
1608                 }
1609                 break;
1610 
1611             case 8:
1612                 switch(s)
1613                 {
1614                 case "abstract":    return TOKabstract;
1615                 case "continue":    return TOKcontinue;
1616                 case "debugger":    return TOKdebugger;
1617                 case "function":    return TOKfunction;
1618                 default:
1619                     break;
1620                 }
1621                 break;
1622 
1623             case 9:
1624                 switch(s)
1625                 {
1626                 case "interface":   return TOKinterface;
1627                 case "protected":   return TOKprotected;
1628                 case "transient":   return TOKtransient;
1629                 default:
1630                     break;
1631                 }
1632                 break;
1633 
1634             case 10:
1635                 switch(s)
1636                 {
1637                 case "implements":  return TOKimplements;
1638                 case "instanceof":  return TOKinstanceof;
1639                 default:
1640                     break;
1641                 }
1642                 break;
1643 
1644             case 12:
1645                 if(s == "synchronized")
1646                     return TOKsynchronized;
1647                 break;
1648 
1649             default:
1650                 break;
1651             }
1652         return TOKreserved;             // not a keyword
1653     }
1654 }
1655 
1656 
1657 /****************************************
1658  */
1659 
1660 struct Keyword
1661 { string name;
1662   TOK    value; }
1663 
1664 static Keyword keywords[] =
1665 [
1666 //    {	"",		TOK		},
1667 
1668     { "break", TOKbreak },
1669     { "case", TOKcase },
1670     { "continue", TOKcontinue },
1671     { "default", TOKdefault },
1672     { "delete", TOKdelete },
1673     { "do", TOKdo },
1674     { "else", TOKelse },
1675     { "export", TOKexport },
1676     { "false", TOKfalse },
1677     { "for", TOKfor },
1678     { "function", TOKfunction },
1679     { "if", TOKif },
1680     { "import", TOKimport },
1681     { "in", TOKin },
1682     { "new", TOKnew },
1683     { "null", TOKnull },
1684     { "return", TOKreturn },
1685     { "switch", TOKswitch },
1686     { "this", TOKthis },
1687     { "true", TOKtrue },
1688     { "typeof", TOKtypeof },
1689     { "var", TOKvar },
1690     { "void", TOKvoid },
1691     { "while", TOKwhile },
1692     { "with", TOKwith },
1693 
1694     { "catch", TOKcatch },
1695     { "class", TOKclass },
1696     { "const", TOKconst },
1697     { "debugger", TOKdebugger },
1698     { "enum", TOKenum },
1699     { "extends", TOKextends },
1700     { "finally", TOKfinally },
1701     { "super", TOKsuper },
1702     { "throw", TOKthrow },
1703     { "try", TOKtry },
1704 
1705     { "abstract", TOKabstract },
1706     { "boolean", TOKboolean },
1707     { "byte", TOKbyte },
1708     { "char", TOKchar },
1709     { "double", TOKdouble },
1710     { "final", TOKfinal },
1711     { "float", TOKfloat },
1712     { "goto", TOKgoto },
1713     { "implements", TOKimplements },
1714     { "instanceof", TOKinstanceof },
1715     { "int", TOKint },
1716     { "interface", TOKinterface },
1717     { "long", TOKlong },
1718     { "native", TOKnative },
1719     { "package", TOKpackage },
1720     { "private", TOKprivate },
1721     { "protected", TOKprotected },
1722     { "public", TOKpublic },
1723     { "short", TOKshort },
1724     { "static", TOKstatic },
1725     { "synchronized", TOKsynchronized },
1726     { "throws", TOKthrows },
1727     { "transient", TOKtransient },
1728 ];
1729 
1730 void init()
1731 {
1732     uint u;
1733     TOK v;
1734 
1735     for(u = 0; u < keywords.length; u++)
1736     {
1737         d_string s;
1738 
1739         //writefln("keyword[%d] = '%s'", u, keywords[u].name);
1740         s = keywords[u].name;
1741         v = keywords[u].value;
1742 
1743         //writefln("tochars[%d] = '%s'", v, s);
1744         Token.tochars[v] = s;
1745     }
1746 
1747     Token.tochars[TOKreserved] = "reserved";
1748     Token.tochars[TOKeof] = "EOF";
1749     Token.tochars[TOKlbrace] = "{";
1750     Token.tochars[TOKrbrace] = "}";
1751     Token.tochars[TOKlparen] = "(";
1752     Token.tochars[TOKrparen] = "";
1753     Token.tochars[TOKlbracket] = "[";
1754     Token.tochars[TOKrbracket] = "]";
1755     Token.tochars[TOKcolon] = ":";
1756     Token.tochars[TOKsemicolon] = ";";
1757     Token.tochars[TOKcomma] = ",";
1758     Token.tochars[TOKor] = "|";
1759     Token.tochars[TOKorass] = "|=";
1760     Token.tochars[TOKxor] = "^";
1761     Token.tochars[TOKxorass] = "^=";
1762     Token.tochars[TOKassign] = "=";
1763     Token.tochars[TOKless] = "<";
1764     Token.tochars[TOKgreater] = ">";
1765     Token.tochars[TOKlessequal] = "<=";
1766     Token.tochars[TOKgreaterequal] = ">=";
1767     Token.tochars[TOKequal] = "==";
1768     Token.tochars[TOKnotequal] = "!=";
1769     Token.tochars[TOKidentity] = "===";
1770     Token.tochars[TOKnonidentity] = "!==";
1771     Token.tochars[TOKshiftleft] = "<<";
1772     Token.tochars[TOKshiftright] = ">>";
1773     Token.tochars[TOKushiftright] = ">>>";
1774     Token.tochars[TOKplus] = "+";
1775     Token.tochars[TOKplusass] = "+=";
1776     Token.tochars[TOKminus] = "-";
1777     Token.tochars[TOKminusass] = "-=";
1778     Token.tochars[TOKmultiply] = "*";
1779     Token.tochars[TOKmultiplyass] = "*=";
1780     Token.tochars[TOKdivide] = "/";
1781     Token.tochars[TOKdivideass] = "/=";
1782     Token.tochars[TOKpercent] = "%";
1783     Token.tochars[TOKpercentass] = "%=";
1784     Token.tochars[TOKand] = "&";
1785     Token.tochars[TOKandass] = "&=";
1786     Token.tochars[TOKdot] = ".";
1787     Token.tochars[TOKquestion] = "?";
1788     Token.tochars[TOKtilde] = "~";
1789     Token.tochars[TOKnot] = "!";
1790     Token.tochars[TOKandand] = "&&";
1791     Token.tochars[TOKoror] = "||";
1792     Token.tochars[TOKplusplus] = "++";
1793     Token.tochars[TOKminusminus] = "--";
1794     Token.tochars[TOKcall] = "CALL";
1795 
1796     Lexer.inited = true;
1797 }
1798