1 /* Digital Mars DMDScript source code. 2 * Copyright (c) 2000-2002 by Chromium Communications 3 * D version Copyright (c) 2004-2010 by Digital Mars 4 * Distributed under the Boost Software License, Version 1.0. 5 * (See accompanying file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) 6 * written by Walter Bright 7 * http://www.digitalmars.com 8 * 9 * D2 port by Dmitry Olshansky 10 * 11 * DMDScript is implemented in the D Programming Language, 12 * http://www.digitalmars.com/d/ 13 * 14 * For a C++ implementation of DMDScript, including COM support, see 15 * http://www.digitalmars.com/dscript/cppscript.html 16 */ 17 18 /* Lexical Analyzer 19 */ 20 21 module dmdscript.lexer; 22 23 import std.range; 24 import std.algorithm; 25 import std.stdio; 26 import std..string; 27 import std.utf; 28 import std.outbuffer; 29 import std.ascii; 30 import std.c.stdlib; 31 32 import dmdscript.script; 33 import dmdscript.text; 34 import dmdscript.identifier; 35 import dmdscript.scopex; 36 import dmdscript.errmsgs; 37 import dmdscript.utf; 38 39 /* Tokens: 40 ( ) 41 [ ] 42 { } 43 < > <= >= == != 44 === !== 45 << >> <<= >>= >>> >>>= 46 + - += -= 47 * / % *= /= %= 48 & | ^ &= |= ^= 49 = ! ~ 50 ++ -- 51 . : , 52 ? && || 53 */ 54 55 alias int TOK; 56 57 enum 58 { 59 TOKreserved, 60 61 // Other 62 TOKlparen, TOKrparen, 63 TOKlbracket, TOKrbracket, 64 TOKlbrace, TOKrbrace, 65 TOKcolon, TOKneg, 66 TOKpos, 67 TOKsemicolon, TOKeof, 68 TOKarray, TOKcall, 69 TOKarraylit, TOKobjectlit, 70 TOKcomma, TOKassert, 71 72 // Operators 73 TOKless, TOKgreater, 74 TOKlessequal, TOKgreaterequal, 75 TOKequal, TOKnotequal, 76 TOKidentity, TOKnonidentity, 77 TOKshiftleft, TOKshiftright, 78 TOKshiftleftass, TOKshiftrightass, 79 TOKushiftright, TOKushiftrightass, 80 TOKplus, TOKminus, TOKplusass, TOKminusass, 81 TOKmultiply, TOKdivide, TOKpercent, 82 TOKmultiplyass, TOKdivideass, TOKpercentass, 83 TOKand, TOKor, TOKxor, 84 TOKandass, TOKorass, TOKxorass, 85 TOKassign, TOKnot, TOKtilde, 86 TOKplusplus, TOKminusminus, TOKdot, 87 TOKquestion, TOKandand, TOKoror, 88 89 // Leaf operators 90 TOKnumber, TOKidentifier, TOKstring, 91 TOKregexp, TOKreal, 92 93 // Keywords 94 TOKbreak, TOKcase, TOKcontinue, 95 TOKdefault, TOKdelete, TOKdo, 96 TOKelse, TOKexport, TOKfalse, 97 TOKfor, TOKfunction, TOKif, 98 TOKimport, TOKin, TOKnew, 99 TOKnull, TOKreturn, 100 TOKswitch, TOKthis, TOKtrue, 101 TOKtypeof, TOKvar, TOKvoid, 102 TOKwhile, TOKwith, 103 104 // Reserved for ECMA extensions 105 TOKcatch, TOKclass, 106 TOKconst, TOKdebugger, 107 TOKenum, TOKextends, 108 TOKfinally, TOKsuper, 109 TOKthrow, TOKtry, 110 111 // Java keywords reserved for unknown reasons 112 TOKabstract, TOKboolean, 113 TOKbyte, TOKchar, 114 TOKdouble, TOKfinal, 115 TOKfloat, TOKgoto, 116 TOKimplements, TOKinstanceof, 117 TOKint, TOKinterface, 118 TOKlong, TOKnative, 119 TOKpackage, TOKprivate, 120 TOKprotected, TOKpublic, 121 TOKshort, TOKstatic, 122 TOKsynchronized, TOKthrows, 123 TOKtransient, 124 125 TOKmax 126 }; 127 128 int isoctal(dchar c) 129 { 130 return('0' <= c && c <= '7'); 131 } 132 int isasciidigit(dchar c) 133 { 134 return('0' <= c && c <= '9'); 135 } 136 int isasciilower(dchar c) 137 { 138 return('a' <= c && c <= 'z'); 139 } 140 int isasciiupper(dchar c) 141 { 142 return('A' <= c && c <= 'Z'); 143 } 144 int ishex(dchar c) 145 { 146 return 147 ('0' <= c && c <= '9') || 148 ('a' <= c && c <= 'f') || 149 ('A' <= c && c <= 'F'); 150 } 151 152 153 /******************************************************/ 154 155 struct Token 156 { 157 Token *next; 158 immutable(tchar) *ptr; // pointer to first character of this token within buffer 159 uint linnum; 160 TOK value; 161 immutable(tchar) *sawLineTerminator; // where we saw the last line terminator 162 union 163 { 164 number_t intvalue; 165 real_t realvalue; 166 d_string string; 167 Identifier *ident; 168 }; 169 170 static d_string tochars[TOKmax]; 171 172 static Token* alloc(Lexer* lex) 173 { 174 Token *t; 175 176 if(lex.freelist) 177 { 178 t = lex.freelist; 179 lex.freelist = t.next; 180 return t; 181 } 182 183 return new Token(); 184 } 185 186 void print() 187 { 188 writefln(toString()); 189 } 190 191 d_string toString() 192 { 193 d_string p; 194 195 switch(value) 196 { 197 case TOKnumber: 198 p = std..string.format("%d", intvalue); 199 break; 200 201 case TOKreal: 202 long l = cast(long)realvalue; 203 if(l == realvalue) 204 p = std..string.format("%s", l); 205 else 206 p = std..string.format("%s", realvalue); 207 break; 208 209 case TOKstring: 210 case TOKregexp: 211 p = string; 212 break; 213 214 case TOKidentifier: 215 p = ident.toString(); 216 break; 217 218 default: 219 p = toString(value); 220 break; 221 } 222 return p; 223 } 224 225 static d_string toString(TOK value) 226 { 227 d_string p; 228 229 p = tochars[value]; 230 if(!p) 231 p = std..string.format("TOK%d", value); 232 return p; 233 } 234 } 235 236 237 238 239 /*******************************************************************/ 240 241 class Lexer 242 { 243 Identifier[d_string] stringtable; 244 Token* freelist; 245 246 d_string sourcename; // for error message strings 247 248 d_string base; // pointer to start of buffer 249 immutable(char) * end; // past end of buffer 250 immutable(char) * p; // current character 251 uint currentline; 252 Token token; 253 OutBuffer stringbuffer; 254 int useStringtable; // use for Identifiers 255 256 ErrInfo errinfo; // syntax error information 257 static bool inited; 258 259 this(d_string sourcename, d_string base, int useStringtable) 260 { 261 //writefln("Lexer::Lexer(base = '%s')\n",base); 262 if(!inited) 263 init(); 264 265 std.c..string.memset(&token, 0, token.sizeof); 266 this.useStringtable = useStringtable; 267 this.sourcename = sourcename; 268 if(!base.length || (base[$ - 1] != 0 && base[$ - 1] != 0x1A)) 269 base ~= cast(tchar)0x1A; 270 this.base = base; 271 this.end = base.ptr + base.length; 272 p = base.ptr; 273 currentline = 1; 274 freelist = null; 275 } 276 277 278 ~this() 279 { 280 //writef(L"~Lexer()\n"); 281 freelist = null; 282 sourcename = null; 283 base = null; 284 end = null; 285 p = null; 286 } 287 288 dchar get(immutable(tchar)* p) 289 { 290 size_t idx = p - base.ptr; 291 return std.utf.decode(base, idx); 292 } 293 294 immutable(tchar) * inc(immutable(tchar) * p) 295 { 296 size_t idx = p - base.ptr; 297 std.utf.decode(base, idx); 298 return base.ptr + idx; 299 } 300 301 void error(int msgnum) 302 { 303 error(errmsgtbl[msgnum]); 304 } 305 306 void error(...) 307 { 308 uint linnum = 1; 309 immutable(tchar) * s; 310 immutable(tchar) * slinestart; 311 immutable(tchar) * slineend; 312 d_string buf; 313 314 //FuncLog funclog(L"Lexer.error()"); 315 //writefln("TEXT START ------------\n%ls\nTEXT END ------------------", base); 316 317 // Find the beginning of the line 318 slinestart = base.ptr; 319 for(s = base.ptr; s != p; s++) 320 { 321 if(*s == '\n') 322 { 323 linnum++; 324 slinestart = s + 1; 325 } 326 } 327 328 // Find the end of the line 329 for(;; ) 330 { 331 switch(*s) 332 { 333 case '\n': 334 case 0: 335 case 0x1A: 336 break; 337 default: 338 s++; 339 continue; 340 } 341 break; 342 } 343 slineend = s; 344 345 buf = std..string.format("%s(%d) : Error: ", sourcename, linnum); 346 347 void putc(dchar c) 348 { 349 dmdscript.utf.encode(buf, c); 350 } 351 352 std.format.doFormat(&putc, _arguments, _argptr); 353 354 if(!errinfo.message) 355 { 356 uint len; 357 358 errinfo.message = buf; 359 errinfo.linnum = linnum; 360 errinfo.charpos = p - slinestart; 361 362 len = slineend - slinestart; 363 errinfo.srcline = slinestart[0 .. len]; 364 } 365 366 // Consume input until the end 367 while(*p != 0x1A && *p != 0) 368 p++; 369 token.next = null; // dump any lookahead 370 371 version(none) 372 { 373 writefln(errinfo.message); 374 fflush(stdout); 375 exit(EXIT_FAILURE); 376 } 377 } 378 379 /************************************************ 380 * Given source text, convert loc to a string for the corresponding line. 381 */ 382 383 static d_string locToSrcline(immutable(char) *src, Loc loc) 384 { 385 immutable(char) * slinestart; 386 immutable(char) * slineend; 387 immutable(char) * s; 388 uint linnum = 1; 389 uint len; 390 391 if(!src) 392 return null; 393 slinestart = src; 394 for(s = src;; s++) 395 { 396 switch(*s) 397 { 398 case '\n': 399 if(linnum == loc) 400 { 401 slineend = s; 402 break; 403 } 404 slinestart = s + 1; 405 linnum++; 406 continue; 407 408 case 0: 409 case 0x1A: 410 slineend = s; 411 break; 412 413 default: 414 continue; 415 } 416 break; 417 } 418 419 // Remove trailing \r's 420 while(slinestart < slineend && slineend[-1] == '\r') 421 --slineend; 422 423 len = slineend - slinestart; 424 return slinestart[0 .. len]; 425 } 426 427 428 TOK nextToken() 429 { 430 Token *t; 431 432 if(token.next) 433 { 434 t = token.next; 435 token = *t; 436 t.next = freelist; 437 freelist = t; 438 } 439 else 440 { 441 scan(&token); 442 } 443 //token.print(); 444 return token.value; 445 } 446 447 Token *peek(Token *ct) 448 { 449 Token *t; 450 451 if(ct.next) 452 t = ct.next; 453 else 454 { 455 t = Token.alloc(&this); 456 scan(t); 457 t.next = null; 458 ct.next = t; 459 } 460 return t; 461 } 462 463 void insertSemicolon(immutable(tchar) *loc) 464 { 465 // Push current token back into the input, and 466 // create a new current token that is a semicolon 467 Token *t; 468 469 t = Token.alloc(&this); 470 *t = token; 471 token.next = t; 472 token.value = TOKsemicolon; 473 token.ptr = loc; 474 token.sawLineTerminator = null; 475 } 476 477 /********************************** 478 * Horrible kludge to support disambiguating TOKregexp from TOKdivide. 479 * The idea is, if we are looking for a TOKdivide, and find instead 480 * a TOKregexp, we back up and rescan. 481 */ 482 483 void rescan() 484 { 485 token.next = null; // no lookahead 486 // should put on freelist 487 p = token.ptr + 1; 488 } 489 490 491 /**************************** 492 * Turn next token in buffer into a token. 493 */ 494 495 void scan(Token *t) 496 { 497 tchar c; 498 dchar d; 499 d_string id; 500 501 //writefln("Lexer.scan()"); 502 t.sawLineTerminator = null; 503 for(;; ) 504 { 505 t.ptr = p; 506 //t.linnum = currentline; 507 //writefln("p = %x",cast(uint)p); 508 //writefln("p = %x, *p = x%02x, '%s'",cast(uint)p,*p,*p); 509 switch(*p) 510 { 511 case 0: 512 case 0x1A: 513 t.value = TOKeof; // end of file 514 return; 515 516 case ' ': 517 case '\t': 518 case '\v': 519 case '\f': 520 case 0xA0: // no-break space 521 p++; 522 continue; // skip white space 523 524 case '\n': // line terminator 525 currentline++; 526 goto case; 527 case '\r': 528 t.sawLineTerminator = p; 529 p++; 530 continue; 531 532 case '"': 533 case '\'': 534 t..string = string(*p); 535 t.value = TOKstring; 536 return; 537 538 case '0': case '1': case '2': case '3': case '4': 539 case '5': case '6': case '7': case '8': case '9': 540 t.value = number(t); 541 return; 542 543 case 'a': case 'b': case 'c': case 'd': case 'e': 544 case 'f': case 'g': case 'h': case 'i': case 'j': 545 case 'k': case 'l': case 'm': case 'n': case 'o': 546 case 'p': case 'q': case 'r': case 's': case 't': 547 case 'u': case 'v': case 'w': case 'x': case 'y': 548 case 'z': 549 case 'A': case 'B': case 'C': case 'D': case 'E': 550 case 'F': case 'G': case 'H': case 'I': case 'J': 551 case 'K': case 'L': case 'M': case 'N': case 'O': 552 case 'P': case 'Q': case 'R': case 'S': case 'T': 553 case 'U': case 'V': case 'W': case 'X': case 'Y': 554 case 'Z': 555 case '_': 556 case '$': 557 Lidentifier: 558 { 559 id = null; 560 561 static bool isidletter(dchar d) 562 { 563 return std.ascii.isAlphaNum(d) || d == '_' || d == '$' || (d >= 0x80 && std.uni.isAlpha(d)); 564 } 565 566 do 567 { 568 p = inc(p); 569 d = get(p); 570 if(d == '\\' && p[1] == 'u') 571 { 572 Lidentifier2: 573 id = t.ptr[0 .. p - t.ptr].idup; 574 auto ps = p; 575 p++; 576 d = unicode(); 577 if(!isidletter(d)) 578 { 579 p = ps; 580 break; 581 } 582 dmdscript.utf.encode(id, d); 583 for(;; ) 584 { 585 d = get(p); 586 if(d == '\\' && p[1] == 'u') 587 { 588 auto pstart = p; 589 p++; 590 d = unicode(); 591 if(isidletter(d)) 592 dmdscript.utf.encode(id, d); 593 else 594 { 595 p = pstart; 596 goto Lidentifier3; 597 } 598 } 599 else if(isidletter(d)) 600 { 601 dmdscript.utf.encode(id, d); 602 p = inc(p); 603 } 604 else 605 goto Lidentifier3; 606 } 607 } 608 } while(isidletter(d)); 609 id = t.ptr[0 .. p - t.ptr]; 610 Lidentifier3: 611 //printf("id = '%.*s'\n", id); 612 t.value = isKeyword(id); 613 if(t.value) 614 return; 615 if(useStringtable) 616 { //Identifier* i = &stringtable[id]; 617 Identifier* i = id in stringtable; 618 if(!i) 619 { 620 stringtable[id] = Identifier.init; 621 i = id in stringtable; 622 } 623 i.value.putVstring(id); 624 i.value.toHash(); 625 t.ident = i; 626 } 627 else 628 t.ident = Identifier.build(id); 629 t.value = TOKidentifier; 630 return; } 631 632 case '/': 633 p++; 634 c = *p; 635 if(c == '=') 636 { 637 p++; 638 t.value = TOKdivideass; 639 return; 640 } 641 else if(c == '*') 642 { 643 p++; 644 for(;; p++) 645 { 646 c = *p; 647 Lcomment: 648 switch(c) 649 { 650 case '*': 651 p++; 652 c = *p; 653 if(c == '/') 654 { 655 p++; 656 break; 657 } 658 goto Lcomment; 659 660 case '\n': 661 currentline++; 662 goto case; 663 case '\r': 664 t.sawLineTerminator = p; 665 continue; 666 667 case 0: 668 case 0x1A: 669 error(ERR_BAD_C_COMMENT); 670 t.value = TOKeof; 671 return; 672 673 default: 674 continue; 675 } 676 break; 677 } 678 continue; 679 } 680 else if(c == '/') 681 { 682 auto r = p[0..end-p]; 683 uint j; 684 do{ 685 r.popFront(); 686 j = startsWith(r,'\n','\r','\0',0x1A,'\u2028','\u2029'); 687 688 }while(!j); 689 p = &r[0]; 690 switch(j){ 691 case 1: 692 currentline++; 693 goto case; 694 case 2: case 5: case 6: 695 t.sawLineTerminator = p; 696 break; 697 case 3: case 4: 698 t.value = TOKeof; 699 return; 700 default: 701 assert(0); 702 } 703 p = inc(p); 704 continue; 705 /*for(;; ) 706 { 707 p++; 708 switch(*p) 709 { 710 case '\n': 711 currentline++; 712 case '\r': 713 t.sawLineTerminator = p; 714 break; 715 716 case 0: 717 case 0x1A: // end of file 718 t.value = TOKeof; 719 return; 720 721 default: 722 continue; 723 } 724 break; 725 } 726 p++; 727 continue;*/ 728 } 729 else if((t..string = regexp()) != null) 730 t.value = TOKregexp; 731 else 732 t.value = TOKdivide; 733 return; 734 735 case '.': 736 immutable(tchar) * q; 737 q = p + 1; 738 c = *q; 739 if(std.ascii.isDigit(c)) 740 t.value = number(t); 741 else 742 { 743 t.value = TOKdot; 744 p = q; 745 } 746 return; 747 748 case '&': 749 p++; 750 c = *p; 751 if(c == '=') 752 { 753 p++; 754 t.value = TOKandass; 755 } 756 else if(c == '&') 757 { 758 p++; 759 t.value = TOKandand; 760 } 761 else 762 t.value = TOKand; 763 return; 764 765 case '|': 766 p++; 767 c = *p; 768 if(c == '=') 769 { 770 p++; 771 t.value = TOKorass; 772 } 773 else if(c == '|') 774 { 775 p++; 776 t.value = TOKoror; 777 } 778 else 779 t.value = TOKor; 780 return; 781 782 case '-': 783 p++; 784 c = *p; 785 if(c == '=') 786 { 787 p++; 788 t.value = TOKminusass; 789 } 790 else if(c == '-') 791 { 792 p++; 793 794 // If the last token in the file is -. then 795 // treat it as EOF. This is to accept broken 796 // scripts that forgot to protect the closing -. 797 // with a // comment. 798 if(*p == '>') 799 { 800 // Scan ahead to see if it's the last token 801 immutable(tchar) * q; 802 803 q = p; 804 for(;; ) 805 { 806 switch(*++q) 807 { 808 case 0: 809 case 0x1A: 810 t.value = TOKeof; 811 p = q; 812 return; 813 814 case ' ': 815 case '\t': 816 case '\v': 817 case '\f': 818 case '\n': 819 case '\r': 820 case 0xA0: // no-break space 821 continue; 822 823 default: 824 assert(0); 825 } 826 } 827 } 828 t.value = TOKminusminus; 829 } 830 else 831 t.value = TOKminus; 832 return; 833 834 case '+': 835 p++; 836 c = *p; 837 if(c == '=') 838 { 839 p++; 840 t.value = TOKplusass; 841 } 842 else if(c == '+') 843 { 844 p++; 845 t.value = TOKplusplus; 846 } 847 else 848 t.value = TOKplus; 849 return; 850 851 case '<': 852 p++; 853 c = *p; 854 if(c == '=') 855 { 856 p++; 857 t.value = TOKlessequal; 858 } 859 else if(c == '<') 860 { 861 p++; 862 c = *p; 863 if(c == '=') 864 { 865 p++; 866 t.value = TOKshiftleftass; 867 } 868 else 869 t.value = TOKshiftleft; 870 } 871 else if(c == '!' && p[1] == '-' && p[2] == '-') 872 { // Special comment to end of line 873 p += 2; 874 for(;; ) 875 { 876 p++; 877 switch(*p) 878 { 879 case '\n': 880 currentline++; 881 goto case; 882 case '\r': 883 t.sawLineTerminator = p; 884 break; 885 886 case 0: 887 case 0x1A: // end of file 888 error(ERR_BAD_HTML_COMMENT); 889 t.value = TOKeof; 890 return; 891 892 default: 893 continue; 894 } 895 break; 896 } 897 p++; 898 continue; 899 } 900 else 901 t.value = TOKless; 902 return; 903 904 case '>': 905 p++; 906 c = *p; 907 if(c == '=') 908 { 909 p++; 910 t.value = TOKgreaterequal; 911 } 912 else if(c == '>') 913 { 914 p++; 915 c = *p; 916 if(c == '=') 917 { 918 p++; 919 t.value = TOKshiftrightass; 920 } 921 else if(c == '>') 922 { 923 p++; 924 c = *p; 925 if(c == '=') 926 { 927 p++; 928 t.value = TOKushiftrightass; 929 } 930 else 931 t.value = TOKushiftright; 932 } 933 else 934 t.value = TOKshiftright; 935 } 936 else 937 t.value = TOKgreater; 938 return; 939 940 case '(': p++; t.value = TOKlparen; return; 941 case ')': p++; t.value = TOKrparen; return; 942 case '[': p++; t.value = TOKlbracket; return; 943 case ']': p++; t.value = TOKrbracket; return; 944 case '{': p++; t.value = TOKlbrace; return; 945 case '}': p++; t.value = TOKrbrace; return; 946 case '~': p++; t.value = TOKtilde; return; 947 case '?': p++; t.value = TOKquestion; return; 948 case ',': p++; t.value = TOKcomma; return; 949 case ';': p++; t.value = TOKsemicolon; return; 950 case ':': p++; t.value = TOKcolon; return; 951 952 case '*': 953 p++; 954 c = *p; 955 if(c == '=') 956 { 957 p++; 958 t.value = TOKmultiplyass; 959 } 960 else 961 t.value = TOKmultiply; 962 return; 963 964 case '%': 965 p++; 966 c = *p; 967 if(c == '=') 968 { 969 p++; 970 t.value = TOKpercentass; 971 } 972 else 973 t.value = TOKpercent; 974 return; 975 976 case '^': 977 p++; 978 c = *p; 979 if(c == '=') 980 { 981 p++; 982 t.value = TOKxorass; 983 } 984 else 985 t.value = TOKxor; 986 return; 987 988 case '=': 989 p++; 990 c = *p; 991 if(c == '=') 992 { 993 p++; 994 c = *p; 995 if(c == '=') 996 { 997 p++; 998 t.value = TOKidentity; 999 } 1000 else 1001 t.value = TOKequal; 1002 } 1003 else 1004 t.value = TOKassign; 1005 return; 1006 1007 case '!': 1008 p++; 1009 c = *p; 1010 if(c == '=') 1011 { 1012 p++; 1013 c = *p; 1014 if(c == '=') 1015 { 1016 p++; 1017 t.value = TOKnonidentity; 1018 } 1019 else 1020 t.value = TOKnotequal; 1021 } 1022 else 1023 t.value = TOKnot; 1024 return; 1025 1026 case '\\': 1027 if(p[1] == 'u') 1028 { 1029 // \uXXXX starts an identifier 1030 goto Lidentifier2; 1031 } 1032 goto default; 1033 default: 1034 d = get(p); 1035 if(d >= 0x80 && std.uni.isAlpha(d)) 1036 goto Lidentifier; 1037 else if(isStrWhiteSpaceChar(d)) 1038 { 1039 p = inc(p); //also skip unicode whitespace 1040 continue; 1041 } 1042 else 1043 { 1044 if(std.ascii.isPrintable(d)) 1045 error(errmsgtbl[ERR_BAD_CHAR_C], d); 1046 else 1047 error(errmsgtbl[ERR_BAD_CHAR_X], d); 1048 } 1049 continue; 1050 } 1051 } 1052 } 1053 1054 /******************************************* 1055 * Parse escape sequence. 1056 */ 1057 1058 dchar escapeSequence() 1059 { 1060 uint c; 1061 int n; 1062 1063 c = *p; 1064 p++; 1065 switch(c) 1066 { 1067 case '\'': 1068 case '"': 1069 case '?': 1070 case '\\': 1071 break; 1072 case 'a': 1073 c = 7; 1074 break; 1075 case 'b': 1076 c = 8; 1077 break; 1078 case 'f': 1079 c = 12; 1080 break; 1081 case 'n': 1082 c = 10; 1083 break; 1084 case 'r': 1085 c = 13; 1086 break; 1087 case 't': 1088 c = 9; 1089 break; 1090 1091 case 'v': 1092 version(JSCRIPT_ESCAPEV_BUG) 1093 { 1094 } 1095 else 1096 { 1097 c = 11; 1098 } 1099 break; 1100 1101 case 'x': 1102 c = *p; 1103 p++; 1104 if(ishex(c)) 1105 { 1106 uint v; 1107 1108 n = 0; 1109 v = 0; 1110 for(;; ) 1111 { 1112 if(std.ascii.isDigit(c)) 1113 c -= '0'; 1114 else if(std.ascii.isLower(c)) 1115 c -= 'a' - 10; 1116 else // 'A' <= c && c <= 'Z' 1117 c -= 'A' - 10; 1118 v = v * 16 + c; 1119 c = *p; 1120 if(++n >= 2 || !ishex(c)) 1121 break; 1122 p++; 1123 } 1124 if(n == 1) 1125 error(ERR_BAD_HEX_SEQUENCE); 1126 c = v; 1127 } 1128 else 1129 error(errmsgtbl[ERR_UNDEFINED_ESC_SEQUENCE], c); 1130 break; 1131 1132 default: 1133 if(c > 0x7F) 1134 { 1135 p--; 1136 c = get(p); 1137 p = inc(p); 1138 } 1139 if(isoctal(c)) 1140 { 1141 uint v; 1142 1143 n = 0; 1144 v = 0; 1145 for(;; ) 1146 { 1147 v = v * 8 + (c - '0'); 1148 c = *p; 1149 if(++n >= 3 || !isoctal(c)) 1150 break; 1151 p++; 1152 } 1153 c = v; 1154 } 1155 // Don't throw error, just accept it 1156 //error("undefined escape sequence \\%c\n",c); 1157 break; 1158 } 1159 return c; 1160 } 1161 1162 /************************************** 1163 */ 1164 1165 d_string string(tchar quote) 1166 { 1167 tchar c; 1168 dchar d; 1169 d_string stringbuffer; 1170 1171 //printf("Lexer.string('%c')\n", quote); 1172 p++; 1173 for(;; ) 1174 { 1175 c = *p; 1176 switch(c) 1177 { 1178 case '"': 1179 case '\'': 1180 p++; 1181 if(c == quote) 1182 return stringbuffer; 1183 break; 1184 1185 case '\\': 1186 p++; 1187 if(*p == 'u') 1188 d = unicode(); 1189 else 1190 d = escapeSequence(); 1191 dmdscript.utf.encode(stringbuffer, d); 1192 continue; 1193 1194 case '\n': 1195 case '\r': 1196 p++; 1197 error(errmsgtbl[ERR_STRING_NO_END_QUOTE], quote); 1198 return null; 1199 1200 case 0: 1201 case 0x1A: 1202 error(ERR_UNTERMINATED_STRING); 1203 return null; 1204 1205 default: 1206 p++; 1207 break; 1208 } 1209 stringbuffer ~= c; 1210 } 1211 assert(0); 1212 } 1213 1214 /************************************** 1215 * Scan regular expression. Return null with buffer 1216 * pointer intact if it is not a regexp. 1217 */ 1218 1219 d_string regexp() 1220 { 1221 tchar c; 1222 immutable(tchar) * s; 1223 immutable(tchar) * start; 1224 1225 /* 1226 RegExpLiteral: RegExpBody RegExpFlags 1227 RegExpFlags: 1228 empty 1229 | RegExpFlags ContinuingIdentifierCharacter 1230 RegExpBody: / RegExpFirstChar RegExpChars / 1231 RegExpFirstChar: 1232 OrdinaryRegExpFirstChar 1233 | \ NonTerminator 1234 OrdinaryRegExpFirstChar: NonTerminator except \ | / | * 1235 RegExpChars: 1236 empty 1237 | RegExpChars RegExpChar 1238 RegExpChar: 1239 OrdinaryRegExpChar 1240 | \ NonTerminator 1241 OrdinaryRegExpChar: NonTerminator except \ | / 1242 */ 1243 1244 //writefln("Lexer.regexp()\n"); 1245 start = p - 1; 1246 s = p; 1247 1248 // Do RegExpBody 1249 for(;; ) 1250 { 1251 c = *s; 1252 s++; 1253 switch(c) 1254 { 1255 case '\\': 1256 if(s == p) 1257 return null; 1258 c = *s; 1259 switch(c) 1260 { 1261 case '\r': 1262 case '\n': // new line 1263 case 0: // end of file 1264 case 0x1A: // end of file 1265 return null; // not a regexp 1266 default: 1267 break; 1268 } 1269 s++; 1270 continue; 1271 1272 case '/': 1273 if(s == p + 1) 1274 return null; 1275 break; 1276 1277 case '\r': 1278 case '\n': // new line 1279 case 0: // end of file 1280 case 0x1A: // end of file 1281 return null; // not a regexp 1282 1283 case '*': 1284 if(s == p + 1) 1285 return null; 1286 goto default; 1287 default: 1288 continue; 1289 } 1290 break; 1291 } 1292 1293 // Do RegExpFlags 1294 for(;; ) 1295 { 1296 c = *s; 1297 if(std.ascii.isAlphaNum(c) || c == '_' || c == '$') 1298 { 1299 s++; 1300 } 1301 else 1302 break; 1303 } 1304 1305 // Finish pattern & return it 1306 p = s; 1307 return start[0 .. s - start].idup; 1308 } 1309 1310 /*************************************** 1311 */ 1312 1313 dchar unicode() 1314 { 1315 dchar value; 1316 uint n; 1317 dchar c; 1318 1319 value = 0; 1320 p++; 1321 for(n = 0; n < 4; n++) 1322 { 1323 c = *p; 1324 if(!ishex(c)) 1325 { 1326 error(ERR_BAD_U_SEQUENCE); 1327 break; 1328 } 1329 p++; 1330 if(std.ascii.isDigit(c)) 1331 c -= '0'; 1332 else if(isasciilower(c)) 1333 c -= 'a' - 10; 1334 else // 'A' <= c && c <= 'Z' 1335 c -= 'A' - 10; 1336 value <<= 4; 1337 value |= c; 1338 } 1339 return value; 1340 } 1341 1342 /******************************************** 1343 * Read a number. 1344 */ 1345 1346 TOK number(Token *t) 1347 { 1348 immutable(tchar) * start; 1349 number_t intvalue; 1350 real realvalue; 1351 int base = 10; 1352 tchar c; 1353 1354 start = p; 1355 for(;; ) 1356 { 1357 c = *p; 1358 p++; 1359 switch(c) 1360 { 1361 case '0': 1362 // ECMA grammar implies that numbers with leading 0 1363 // like 015 are illegal. But other scripts allow them. 1364 if(p - start == 1) // if leading 0 1365 base = 8; 1366 goto case; 1367 case '1': case '2': case '3': case '4': case '5': 1368 case '6': case '7': 1369 break; 1370 1371 case '8': case '9': // decimal digits 1372 if(base == 8) // and octal base 1373 base = 10; // means back to decimal base 1374 break; 1375 1376 default: 1377 p--; 1378 Lnumber: 1379 if(base == 0) 1380 base = 10; 1381 intvalue = 0; 1382 foreach(tchar v; start[0 .. p - start]) 1383 { 1384 if('0' <= v && v <= '9') 1385 v -= '0'; 1386 else if('a' <= v && v <= 'f') 1387 v -= ('a' - 10); 1388 else if('A' <= v && v <= 'F') 1389 v -= ('A' - 10); 1390 else 1391 assert(0); 1392 assert(v < base); 1393 if((number_t.max - v) / base < intvalue) 1394 { 1395 realvalue = 0; 1396 foreach(tchar w; start[0 .. p - start]) 1397 { 1398 if('0' <= w && w <= '9') 1399 w -= '0'; 1400 else if('a' <= w && w <= 'f') 1401 w -= ('a' - 10); 1402 else if('A' <= w && w <= 'F') 1403 w -= ('A' - 10); 1404 else 1405 assert(0); 1406 realvalue *= base; 1407 realvalue += v; 1408 } 1409 t.realvalue = realvalue; 1410 return TOKreal; 1411 } 1412 intvalue *= base; 1413 intvalue += v; 1414 } 1415 t.realvalue = cast(double)intvalue; 1416 return TOKreal; 1417 1418 case 'x': 1419 case 'X': 1420 if(p - start != 2 || !ishex(*p)) 1421 goto Lerr; 1422 do 1423 p++; 1424 while(ishex(*p)); 1425 start += 2; 1426 base = 16; 1427 goto Lnumber; 1428 1429 case '.': 1430 while(std.ascii.isDigit(*p)) 1431 p++; 1432 if(*p == 'e' || *p == 'E') 1433 { 1434 p++; 1435 goto Lexponent; 1436 } 1437 goto Ldouble; 1438 1439 case 'e': 1440 case 'E': 1441 Lexponent: 1442 if(*p == '+' || *p == '-') 1443 p++; 1444 if(!std.ascii.isDigit(*p)) 1445 goto Lerr; 1446 do 1447 p++; 1448 while(std.ascii.isDigit(*p)); 1449 goto Ldouble; 1450 1451 Ldouble: 1452 // convert double 1453 realvalue = std.c.stdlib.strtod(toStringz(start[0 .. p - start]), null); 1454 t.realvalue = realvalue; 1455 return TOKreal; 1456 } 1457 } 1458 1459 Lerr: 1460 error(ERR_UNRECOGNIZED_N_LITERAL); 1461 return TOKeof; 1462 } 1463 1464 static TOK isKeyword(const (tchar)[] s) 1465 { 1466 if(s[0] >= 'a' && s[0] <= 'w') 1467 switch(s.length) 1468 { 1469 case 2: 1470 if(s[0] == 'i') 1471 { 1472 if(s[1] == 'f') 1473 return TOKif; 1474 if(s[1] == 'n') 1475 return TOKin; 1476 } 1477 else if(s[0] == 'd' && s[1] == 'o') 1478 return TOKdo; 1479 break; 1480 1481 case 3: 1482 switch(s[0]) 1483 { 1484 case 'f': 1485 if(s[1] == 'o' && s[2] == 'r') 1486 return TOKfor; 1487 break; 1488 case 'i': 1489 if(s[1] == 'n' && s[2] == 't') 1490 return TOKint; 1491 break; 1492 case 'n': 1493 if(s[1] == 'e' && s[2] == 'w') 1494 return TOKnew; 1495 break; 1496 case 't': 1497 if(s[1] == 'r' && s[2] == 'y') 1498 return TOKtry; 1499 break; 1500 case 'v': 1501 if(s[1] == 'a' && s[2] == 'r') 1502 return TOKvar; 1503 break; 1504 default: 1505 break; 1506 } 1507 break; 1508 1509 case 4: 1510 switch(s[0]) 1511 { 1512 case 'b': 1513 if(s[1] == 'y' && s[2] == 't' && s[3] == 'e') 1514 return TOKbyte; 1515 break; 1516 case 'c': 1517 if(s[1] == 'a' && s[2] == 's' && s[3] == 'e') 1518 return TOKcase; 1519 if(s[1] == 'h' && s[2] == 'a' && s[3] == 'r') 1520 return TOKchar; 1521 break; 1522 case 'e': 1523 if(s[1] == 'l' && s[2] == 's' && s[3] == 'e') 1524 return TOKelse; 1525 if(s[1] == 'n' && s[2] == 'u' && s[3] == 'm') 1526 return TOKenum; 1527 break; 1528 case 'g': 1529 if(s[1] == 'o' && s[2] == 't' && s[3] == 'o') 1530 return TOKgoto; 1531 break; 1532 case 'l': 1533 if(s[1] == 'o' && s[2] == 'n' && s[3] == 'g') 1534 return TOKlong; 1535 break; 1536 case 'n': 1537 if(s[1] == 'u' && s[2] == 'l' && s[3] == 'l') 1538 return TOKnull; 1539 break; 1540 case 't': 1541 if(s[1] == 'h' && s[2] == 'i' && s[3] == 's') 1542 return TOKthis; 1543 if(s[1] == 'r' && s[2] == 'u' && s[3] == 'e') 1544 return TOKtrue; 1545 break; 1546 case 'w': 1547 if(s[1] == 'i' && s[2] == 't' && s[3] == 'h') 1548 return TOKwith; 1549 break; 1550 case 'v': 1551 if(s[1] == 'o' && s[2] == 'i' && s[3] == 'd') 1552 return TOKvoid; 1553 break; 1554 default: 1555 break; 1556 } 1557 break; 1558 1559 case 5: 1560 switch(s) 1561 { 1562 case "break": return TOKbreak; 1563 case "catch": return TOKcatch; 1564 case "class": return TOKclass; 1565 case "const": return TOKconst; 1566 case "false": return TOKfalse; 1567 case "final": return TOKfinal; 1568 case "float": return TOKfloat; 1569 case "short": return TOKshort; 1570 case "super": return TOKsuper; 1571 case "throw": return TOKthrow; 1572 case "while": return TOKwhile; 1573 default: 1574 break; 1575 } 1576 break; 1577 1578 case 6: 1579 switch(s) 1580 { 1581 case "delete": return TOKdelete; 1582 case "double": return TOKdouble; 1583 case "export": return TOKexport; 1584 case "import": return TOKimport; 1585 case "native": return TOKnative; 1586 case "public": return TOKpublic; 1587 case "return": return TOKreturn; 1588 case "static": return TOKstatic; 1589 case "switch": return TOKswitch; 1590 case "throws": return TOKthrows; 1591 case "typeof": return TOKtypeof; 1592 default: 1593 break; 1594 } 1595 break; 1596 1597 case 7: 1598 switch(s) 1599 { 1600 case "boolean": return TOKboolean; 1601 case "default": return TOKdefault; 1602 case "extends": return TOKextends; 1603 case "finally": return TOKfinally; 1604 case "package": return TOKpackage; 1605 case "private": return TOKprivate; 1606 default: 1607 break; 1608 } 1609 break; 1610 1611 case 8: 1612 switch(s) 1613 { 1614 case "abstract": return TOKabstract; 1615 case "continue": return TOKcontinue; 1616 case "debugger": return TOKdebugger; 1617 case "function": return TOKfunction; 1618 default: 1619 break; 1620 } 1621 break; 1622 1623 case 9: 1624 switch(s) 1625 { 1626 case "interface": return TOKinterface; 1627 case "protected": return TOKprotected; 1628 case "transient": return TOKtransient; 1629 default: 1630 break; 1631 } 1632 break; 1633 1634 case 10: 1635 switch(s) 1636 { 1637 case "implements": return TOKimplements; 1638 case "instanceof": return TOKinstanceof; 1639 default: 1640 break; 1641 } 1642 break; 1643 1644 case 12: 1645 if(s == "synchronized") 1646 return TOKsynchronized; 1647 break; 1648 1649 default: 1650 break; 1651 } 1652 return TOKreserved; // not a keyword 1653 } 1654 } 1655 1656 1657 /**************************************** 1658 */ 1659 1660 struct Keyword 1661 { string name; 1662 TOK value; } 1663 1664 static Keyword keywords[] = 1665 [ 1666 // { "", TOK }, 1667 1668 { "break", TOKbreak }, 1669 { "case", TOKcase }, 1670 { "continue", TOKcontinue }, 1671 { "default", TOKdefault }, 1672 { "delete", TOKdelete }, 1673 { "do", TOKdo }, 1674 { "else", TOKelse }, 1675 { "export", TOKexport }, 1676 { "false", TOKfalse }, 1677 { "for", TOKfor }, 1678 { "function", TOKfunction }, 1679 { "if", TOKif }, 1680 { "import", TOKimport }, 1681 { "in", TOKin }, 1682 { "new", TOKnew }, 1683 { "null", TOKnull }, 1684 { "return", TOKreturn }, 1685 { "switch", TOKswitch }, 1686 { "this", TOKthis }, 1687 { "true", TOKtrue }, 1688 { "typeof", TOKtypeof }, 1689 { "var", TOKvar }, 1690 { "void", TOKvoid }, 1691 { "while", TOKwhile }, 1692 { "with", TOKwith }, 1693 1694 { "catch", TOKcatch }, 1695 { "class", TOKclass }, 1696 { "const", TOKconst }, 1697 { "debugger", TOKdebugger }, 1698 { "enum", TOKenum }, 1699 { "extends", TOKextends }, 1700 { "finally", TOKfinally }, 1701 { "super", TOKsuper }, 1702 { "throw", TOKthrow }, 1703 { "try", TOKtry }, 1704 1705 { "abstract", TOKabstract }, 1706 { "boolean", TOKboolean }, 1707 { "byte", TOKbyte }, 1708 { "char", TOKchar }, 1709 { "double", TOKdouble }, 1710 { "final", TOKfinal }, 1711 { "float", TOKfloat }, 1712 { "goto", TOKgoto }, 1713 { "implements", TOKimplements }, 1714 { "instanceof", TOKinstanceof }, 1715 { "int", TOKint }, 1716 { "interface", TOKinterface }, 1717 { "long", TOKlong }, 1718 { "native", TOKnative }, 1719 { "package", TOKpackage }, 1720 { "private", TOKprivate }, 1721 { "protected", TOKprotected }, 1722 { "public", TOKpublic }, 1723 { "short", TOKshort }, 1724 { "static", TOKstatic }, 1725 { "synchronized", TOKsynchronized }, 1726 { "throws", TOKthrows }, 1727 { "transient", TOKtransient }, 1728 ]; 1729 1730 void init() 1731 { 1732 uint u; 1733 TOK v; 1734 1735 for(u = 0; u < keywords.length; u++) 1736 { 1737 d_string s; 1738 1739 //writefln("keyword[%d] = '%s'", u, keywords[u].name); 1740 s = keywords[u].name; 1741 v = keywords[u].value; 1742 1743 //writefln("tochars[%d] = '%s'", v, s); 1744 Token.tochars[v] = s; 1745 } 1746 1747 Token.tochars[TOKreserved] = "reserved"; 1748 Token.tochars[TOKeof] = "EOF"; 1749 Token.tochars[TOKlbrace] = "{"; 1750 Token.tochars[TOKrbrace] = "}"; 1751 Token.tochars[TOKlparen] = "("; 1752 Token.tochars[TOKrparen] = ""; 1753 Token.tochars[TOKlbracket] = "["; 1754 Token.tochars[TOKrbracket] = "]"; 1755 Token.tochars[TOKcolon] = ":"; 1756 Token.tochars[TOKsemicolon] = ";"; 1757 Token.tochars[TOKcomma] = ","; 1758 Token.tochars[TOKor] = "|"; 1759 Token.tochars[TOKorass] = "|="; 1760 Token.tochars[TOKxor] = "^"; 1761 Token.tochars[TOKxorass] = "^="; 1762 Token.tochars[TOKassign] = "="; 1763 Token.tochars[TOKless] = "<"; 1764 Token.tochars[TOKgreater] = ">"; 1765 Token.tochars[TOKlessequal] = "<="; 1766 Token.tochars[TOKgreaterequal] = ">="; 1767 Token.tochars[TOKequal] = "=="; 1768 Token.tochars[TOKnotequal] = "!="; 1769 Token.tochars[TOKidentity] = "==="; 1770 Token.tochars[TOKnonidentity] = "!=="; 1771 Token.tochars[TOKshiftleft] = "<<"; 1772 Token.tochars[TOKshiftright] = ">>"; 1773 Token.tochars[TOKushiftright] = ">>>"; 1774 Token.tochars[TOKplus] = "+"; 1775 Token.tochars[TOKplusass] = "+="; 1776 Token.tochars[TOKminus] = "-"; 1777 Token.tochars[TOKminusass] = "-="; 1778 Token.tochars[TOKmultiply] = "*"; 1779 Token.tochars[TOKmultiplyass] = "*="; 1780 Token.tochars[TOKdivide] = "/"; 1781 Token.tochars[TOKdivideass] = "/="; 1782 Token.tochars[TOKpercent] = "%"; 1783 Token.tochars[TOKpercentass] = "%="; 1784 Token.tochars[TOKand] = "&"; 1785 Token.tochars[TOKandass] = "&="; 1786 Token.tochars[TOKdot] = "."; 1787 Token.tochars[TOKquestion] = "?"; 1788 Token.tochars[TOKtilde] = "~"; 1789 Token.tochars[TOKnot] = "!"; 1790 Token.tochars[TOKandand] = "&&"; 1791 Token.tochars[TOKoror] = "||"; 1792 Token.tochars[TOKplusplus] = "++"; 1793 Token.tochars[TOKminusminus] = "--"; 1794 Token.tochars[TOKcall] = "CALL"; 1795 1796 Lexer.inited = true; 1797 } 1798