1 /* Digital Mars DMDScript source code. 2 * Copyright (c) 2000-2002 by Chromium Communications 3 * D version Copyright (c) 2004-2010 by Digital Mars 4 * Distributed under the Boost Software License, Version 1.0. 5 * (See accompanying file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) 6 * written by Walter Bright 7 * http://www.digitalmars.com 8 * 9 * D2 port by Dmitry Olshansky 10 * 11 * DMDScript is implemented in the D Programming Language, 12 * http://www.digitalmars.com/d/ 13 * 14 * For a C++ implementation of DMDScript, including COM support, see 15 * http://www.digitalmars.com/dscript/cppscript.html 16 */ 17 18 /* Lexical Analyzer 19 */ 20 21 module dmdscript.lexer; 22 23 import std.range; 24 import std.algorithm; 25 import std.stdio; 26 import std.string; 27 import std.utf; 28 import std.outbuffer; 29 import std.ascii; 30 import core.stdc.stdlib; 31 32 import dmdscript.script; 33 import dmdscript.text; 34 import dmdscript.identifier; 35 import dmdscript.scopex; 36 import dmdscript.errmsgs; 37 import dmdscript.utf; 38 39 /* Tokens: 40 ( ) 41 [ ] 42 { } 43 < > <= >= == != 44 === !== 45 << >> <<= >>= >>> >>>= 46 + - += -= 47 * / % *= /= %= 48 & | ^ &= |= ^= 49 = ! ~ 50 ++ -- 51 . : , 52 ? && || 53 */ 54 55 alias int TOK; 56 57 enum 58 { 59 TOKreserved, 60 61 // Other 62 TOKlparen, TOKrparen, 63 TOKlbracket, TOKrbracket, 64 TOKlbrace, TOKrbrace, 65 TOKcolon, TOKneg, 66 TOKpos, 67 TOKsemicolon, TOKeof, 68 TOKarray, TOKcall, 69 TOKarraylit, TOKobjectlit, 70 TOKcomma, TOKassert, 71 72 // Operators 73 TOKless, TOKgreater, 74 TOKlessequal, TOKgreaterequal, 75 TOKequal, TOKnotequal, 76 TOKidentity, TOKnonidentity, 77 TOKshiftleft, TOKshiftright, 78 TOKshiftleftass, TOKshiftrightass, 79 TOKushiftright, TOKushiftrightass, 80 TOKplus, TOKminus, TOKplusass, TOKminusass, 81 TOKmultiply, TOKdivide, TOKpercent, 82 TOKmultiplyass, TOKdivideass, TOKpercentass, 83 TOKand, TOKor, TOKxor, 84 TOKandass, TOKorass, TOKxorass, 85 TOKassign, TOKnot, TOKtilde, 86 TOKplusplus, TOKminusminus, TOKdot, 87 TOKquestion, TOKandand, TOKoror, 88 89 // Leaf operators 90 TOKnumber, TOKidentifier, TOKstring, 91 TOKregexp, TOKreal, 92 93 // Keywords 94 TOKbreak, TOKcase, TOKcontinue, 95 TOKdefault, TOKdelete, TOKdo, 96 TOKelse, TOKexport, TOKfalse, 97 TOKfor, TOKfunction, TOKif, 98 TOKimport, TOKin, TOKnew, 99 TOKnull, TOKreturn, 100 TOKswitch, TOKthis, TOKtrue, 101 TOKtypeof, TOKvar, TOKvoid, 102 TOKwhile, TOKwith, 103 104 // Reserved for ECMA extensions 105 TOKcatch, TOKclass, 106 TOKconst, TOKdebugger, 107 TOKenum, TOKextends, 108 TOKfinally, TOKsuper, 109 TOKthrow, TOKtry, 110 111 // Java keywords reserved for unknown reasons 112 TOKabstract, TOKboolean, 113 TOKbyte, TOKchar, 114 TOKdouble, TOKfinal, 115 TOKfloat, TOKgoto, 116 TOKimplements, TOKinstanceof, 117 TOKint, TOKinterface, 118 TOKlong, TOKnative, 119 TOKpackage, TOKprivate, 120 TOKprotected, TOKpublic, 121 TOKshort, TOKstatic, 122 TOKsynchronized, 123 TOKtransient, 124 125 TOKmax 126 }; 127 128 int isoctal(dchar c) 129 { 130 return('0' <= c && c <= '7'); 131 } 132 int isasciidigit(dchar c) 133 { 134 return('0' <= c && c <= '9'); 135 } 136 int isasciilower(dchar c) 137 { 138 return('a' <= c && c <= 'z'); 139 } 140 int isasciiupper(dchar c) 141 { 142 return('A' <= c && c <= 'Z'); 143 } 144 int ishex(dchar c) 145 { 146 return 147 ('0' <= c && c <= '9') || 148 ('a' <= c && c <= 'f') || 149 ('A' <= c && c <= 'F'); 150 } 151 152 153 /******************************************************/ 154 155 struct Token 156 { 157 Token *next; 158 immutable(tchar) *ptr; // pointer to first character of this token within buffer 159 uint linnum; 160 TOK value; 161 immutable(tchar) *sawLineTerminator; // where we saw the last line terminator 162 union 163 { 164 number_t intvalue; 165 real_t realvalue; 166 d_string string; 167 Identifier *ident; 168 }; 169 170 static d_string[TOKmax] tochars; 171 172 void print() 173 { 174 writefln(toString()); 175 } 176 177 d_string toString() 178 { 179 d_string p; 180 181 switch(value) 182 { 183 case TOKnumber: 184 p = std..string.format("%d", intvalue); 185 break; 186 187 case TOKreal: 188 long l = cast(long)realvalue; 189 if(l == realvalue) 190 p = std..string.format("%s", l); 191 else 192 p = std..string.format("%s", realvalue); 193 break; 194 195 case TOKstring: 196 case TOKregexp: 197 p = string; 198 break; 199 200 case TOKidentifier: 201 p = ident.toString(); 202 break; 203 204 default: 205 p = toString(value); 206 break; 207 } 208 return p; 209 } 210 211 static d_string toString(TOK value) 212 { 213 d_string p; 214 215 p = tochars[value]; 216 if(!p) 217 p = std..string.format("TOK%d", value); 218 return p; 219 } 220 } 221 222 223 224 225 /*******************************************************************/ 226 227 class Lexer 228 { 229 Identifier[d_string] stringtable; 230 Token* freelist; 231 232 d_string sourcename; // for error message strings 233 234 d_string base; // pointer to start of buffer 235 immutable(char) * end; // past end of buffer 236 immutable(char) * p; // current character 237 uint currentline; 238 Token token; 239 OutBuffer stringbuffer; 240 int useStringtable; // use for Identifiers 241 242 ErrInfo errinfo; // syntax error information 243 static bool inited; 244 245 246 Token* allocToken() 247 { 248 Token *t; 249 250 if(freelist) 251 { 252 t = freelist; 253 freelist = t.next; 254 return t; 255 } 256 257 return new Token(); 258 } 259 260 261 this(d_string sourcename, d_string base, int useStringtable) 262 { 263 import core.stdc.string : memset; 264 //writefln("Lexer::Lexer(base = '%s')\n",base); 265 if(!inited) 266 init(); 267 268 memset(&token, 0, token.sizeof); 269 this.useStringtable = useStringtable; 270 this.sourcename = sourcename; 271 if(!base.length || (base[$ - 1] != 0 && base[$ - 1] != 0x1A)) 272 base ~= cast(tchar)0x1A; 273 this.base = base; 274 this.end = base.ptr + base.length; 275 p = base.ptr; 276 currentline = 1; 277 freelist = null; 278 } 279 280 281 ~this() 282 { 283 //writef(L"~Lexer()\n"); 284 freelist = null; 285 sourcename = null; 286 base = null; 287 end = null; 288 p = null; 289 } 290 291 dchar get(immutable(tchar)* p) 292 { 293 size_t idx = p - base.ptr; 294 return std.utf.decode(base, idx); 295 } 296 297 immutable(tchar) * inc(immutable(tchar) * p) 298 { 299 size_t idx = p - base.ptr; 300 std.utf.decode(base, idx); 301 return base.ptr + idx; 302 } 303 304 void error(ARGS...)(int msgnum, ARGS args) 305 { 306 error(errmsgtbl[msgnum], args); 307 } 308 309 void error(ARGS...)(.string fmt, ARGS args) 310 { 311 import std.format : format, formattedWrite; 312 313 uint linnum = 1; 314 immutable(tchar) * s; 315 immutable(tchar) * slinestart; 316 immutable(tchar) * slineend; 317 d_string buf; 318 319 //FuncLog funclog(L"Lexer.error()"); 320 //writefln("TEXT START ------------\n%ls\nTEXT END ------------------", base); 321 322 // Find the beginning of the line 323 slinestart = base.ptr; 324 for(s = base.ptr; s != p; s++) 325 { 326 if(*s == '\n') 327 { 328 linnum++; 329 slinestart = s + 1; 330 } 331 } 332 333 // Find the end of the line 334 for(;; ) 335 { 336 switch(*s) 337 { 338 case '\n': 339 case 0: 340 case 0x1A: 341 break; 342 default: 343 s++; 344 continue; 345 } 346 break; 347 } 348 slineend = s; 349 350 buf = format("%s(%d) : Error: ", sourcename, linnum); 351 352 void putc(dchar c) 353 { 354 dmdscript.utf.encode(buf, c); 355 } 356 357 formattedWrite(&putc, fmt, args); 358 359 if(!errinfo.message) 360 { 361 size_t len; 362 363 errinfo.message = buf; 364 errinfo.linnum = linnum; 365 errinfo.charpos = cast(uint)(p - slinestart); 366 367 len = slineend - slinestart; 368 errinfo.srcline = slinestart[0 .. len]; 369 } 370 371 // Consume input until the end 372 while(*p != 0x1A && *p != 0) 373 p++; 374 token.next = null; // dump any lookahead 375 376 version(none) 377 { 378 writefln(errinfo.message); 379 fflush(stdout); 380 exit(EXIT_FAILURE); 381 } 382 } 383 384 /************************************************ 385 * Given source text, convert loc to a string for the corresponding line. 386 */ 387 388 static d_string locToSrcline(immutable(char) *src, Loc loc) 389 { 390 immutable(char) * slinestart; 391 immutable(char) * slineend; 392 immutable(char) * s; 393 uint linnum = 1; 394 size_t len; 395 396 if(!src) 397 return null; 398 slinestart = src; 399 for(s = src;; s++) 400 { 401 switch(*s) 402 { 403 case '\n': 404 if(linnum == loc) 405 { 406 slineend = s; 407 break; 408 } 409 slinestart = s + 1; 410 linnum++; 411 continue; 412 413 case 0: 414 case 0x1A: 415 slineend = s; 416 break; 417 418 default: 419 continue; 420 } 421 break; 422 } 423 424 // Remove trailing \r's 425 while(slinestart < slineend && slineend[-1] == '\r') 426 --slineend; 427 428 len = slineend - slinestart; 429 return slinestart[0 .. len]; 430 } 431 432 433 TOK nextToken() 434 { 435 Token *t; 436 437 if(token.next) 438 { 439 t = token.next; 440 token = *t; 441 t.next = freelist; 442 freelist = t; 443 } 444 else 445 { 446 scan(&token); 447 } 448 //token.print(); 449 return token.value; 450 } 451 452 Token *peek(Token *ct) 453 { 454 Token *t; 455 456 if(ct.next) 457 t = ct.next; 458 else 459 { 460 t = allocToken(); 461 scan(t); 462 t.next = null; 463 ct.next = t; 464 } 465 return t; 466 } 467 468 void insertSemicolon(immutable(tchar) *loc) 469 { 470 // Push current token back into the input, and 471 // create a new current token that is a semicolon 472 Token *t; 473 474 t = allocToken(); 475 *t = token; 476 token.next = t; 477 token.value = TOKsemicolon; 478 token.ptr = loc; 479 token.sawLineTerminator = null; 480 } 481 482 /********************************** 483 * Horrible kludge to support disambiguating TOKregexp from TOKdivide. 484 * The idea is, if we are looking for a TOKdivide, and find instead 485 * a TOKregexp, we back up and rescan. 486 */ 487 488 void rescan() 489 { 490 token.next = null; // no lookahead 491 // should put on freelist 492 p = token.ptr + 1; 493 } 494 495 496 /**************************** 497 * Turn next token in buffer into a token. 498 */ 499 500 void scan(Token *t) 501 { 502 static import std.ascii; 503 static import std.uni; 504 505 tchar c; 506 dchar d; 507 d_string id; 508 509 //writefln("Lexer.scan()"); 510 t.sawLineTerminator = null; 511 for(;; ) 512 { 513 t.ptr = p; 514 //t.linnum = currentline; 515 //writefln("p = %x",cast(uint)p); 516 //writefln("p = %x, *p = x%02x, '%s'",cast(uint)p,*p,*p); 517 switch(*p) 518 { 519 case 0: 520 case 0x1A: 521 t.value = TOKeof; // end of file 522 return; 523 524 case ' ': 525 case '\t': 526 case '\v': 527 case '\f': 528 case 0xA0: // no-break space 529 p++; 530 continue; // skip white space 531 532 case '\n': // line terminator 533 currentline++; 534 goto case; 535 case '\r': 536 t.sawLineTerminator = p; 537 p++; 538 continue; 539 540 case '"': 541 case '\'': 542 t..string = string(*p); 543 t.value = TOKstring; 544 return; 545 546 case '0': case '1': case '2': case '3': case '4': 547 case '5': case '6': case '7': case '8': case '9': 548 t.value = number(t); 549 return; 550 551 case 'a': case 'b': case 'c': case 'd': case 'e': 552 case 'f': case 'g': case 'h': case 'i': case 'j': 553 case 'k': case 'l': case 'm': case 'n': case 'o': 554 case 'p': case 'q': case 'r': case 's': case 't': 555 case 'u': case 'v': case 'w': case 'x': case 'y': 556 case 'z': 557 case 'A': case 'B': case 'C': case 'D': case 'E': 558 case 'F': case 'G': case 'H': case 'I': case 'J': 559 case 'K': case 'L': case 'M': case 'N': case 'O': 560 case 'P': case 'Q': case 'R': case 'S': case 'T': 561 case 'U': case 'V': case 'W': case 'X': case 'Y': 562 case 'Z': 563 case '_': 564 case '$': 565 Lidentifier: 566 { 567 id = null; 568 569 static bool isidletter(dchar d) 570 { 571 return std.ascii.isAlphaNum(d) || d == '_' || d == '$' || (d >= 0x80 && std.uni.isAlpha(d)); 572 } 573 574 do 575 { 576 p = inc(p); 577 d = get(p); 578 if(d == '\\' && p[1] == 'u') 579 { 580 Lidentifier2: 581 id = t.ptr[0 .. p - t.ptr].idup; 582 auto ps = p; 583 p++; 584 d = unicode(); 585 if(!isidletter(d)) 586 { 587 p = ps; 588 break; 589 } 590 dmdscript.utf.encode(id, d); 591 for(;; ) 592 { 593 d = get(p); 594 if(d == '\\' && p[1] == 'u') 595 { 596 auto pstart = p; 597 p++; 598 d = unicode(); 599 if(isidletter(d)) 600 dmdscript.utf.encode(id, d); 601 else 602 { 603 p = pstart; 604 goto Lidentifier3; 605 } 606 } 607 else if(isidletter(d)) 608 { 609 dmdscript.utf.encode(id, d); 610 p = inc(p); 611 } 612 else 613 goto Lidentifier3; 614 } 615 } 616 } while(isidletter(d)); 617 id = t.ptr[0 .. p - t.ptr]; 618 Lidentifier3: 619 //printf("id = '%.*s'\n", id); 620 t.value = isKeyword(id); 621 if(t.value) 622 return; 623 if(useStringtable) 624 { //Identifier* i = &stringtable[id]; 625 Identifier* i = id in stringtable; 626 if(!i) 627 { 628 stringtable[id] = Identifier.init; 629 i = id in stringtable; 630 } 631 i.value.putVstring(id); 632 i.value.hashString(); 633 t.ident = i; 634 } 635 else 636 t.ident = Identifier.build(id); 637 t.value = TOKidentifier; 638 return; } 639 640 case '/': 641 p++; 642 c = *p; 643 if(c == '=') 644 { 645 p++; 646 t.value = TOKdivideass; 647 return; 648 } 649 else if(c == '*') 650 { 651 p++; 652 for(;; p++) 653 { 654 c = *p; 655 Lcomment: 656 switch(c) 657 { 658 case '*': 659 p++; 660 c = *p; 661 if(c == '/') 662 { 663 p++; 664 break; 665 } 666 goto Lcomment; 667 668 case '\n': 669 currentline++; 670 goto case; 671 case '\r': 672 t.sawLineTerminator = p; 673 continue; 674 675 case 0: 676 case 0x1A: 677 error(ERR_BAD_C_COMMENT); 678 t.value = TOKeof; 679 return; 680 681 default: 682 continue; 683 } 684 break; 685 } 686 continue; 687 } 688 else if(c == '/') 689 { 690 auto r = p[0..end-p]; 691 uint j; 692 do{ 693 r.popFront(); 694 j = startsWith(r,'\n','\r','\0',0x1A,'\u2028','\u2029'); 695 696 }while(!j); 697 p = &r[0]; 698 switch(j){ 699 case 1: 700 currentline++; 701 goto case; 702 case 2: case 5: case 6: 703 t.sawLineTerminator = p; 704 break; 705 case 3: case 4: 706 t.value = TOKeof; 707 return; 708 default: 709 assert(0); 710 } 711 p = inc(p); 712 continue; 713 /*for(;; ) 714 { 715 p++; 716 switch(*p) 717 { 718 case '\n': 719 currentline++; 720 case '\r': 721 t.sawLineTerminator = p; 722 break; 723 724 case 0: 725 case 0x1A: // end of file 726 t.value = TOKeof; 727 return; 728 729 default: 730 continue; 731 } 732 break; 733 } 734 p++; 735 continue;*/ 736 } 737 else if((t..string = regexp()) != null) 738 t.value = TOKregexp; 739 else 740 t.value = TOKdivide; 741 return; 742 743 case '.': 744 immutable(tchar) * q; 745 q = p + 1; 746 c = *q; 747 if(std.ascii.isDigit(c)) 748 t.value = number(t); 749 else 750 { 751 t.value = TOKdot; 752 p = q; 753 } 754 return; 755 756 case '&': 757 p++; 758 c = *p; 759 if(c == '=') 760 { 761 p++; 762 t.value = TOKandass; 763 } 764 else if(c == '&') 765 { 766 p++; 767 t.value = TOKandand; 768 } 769 else 770 t.value = TOKand; 771 return; 772 773 case '|': 774 p++; 775 c = *p; 776 if(c == '=') 777 { 778 p++; 779 t.value = TOKorass; 780 } 781 else if(c == '|') 782 { 783 p++; 784 t.value = TOKoror; 785 } 786 else 787 t.value = TOKor; 788 return; 789 790 case '-': 791 p++; 792 c = *p; 793 if(c == '=') 794 { 795 p++; 796 t.value = TOKminusass; 797 } 798 else if(c == '-') 799 { 800 p++; 801 802 // If the last token in the file is -. then 803 // treat it as EOF. This is to accept broken 804 // scripts that forgot to protect the closing -. 805 // with a // comment. 806 if(*p == '>') 807 { 808 // Scan ahead to see if it's the last token 809 immutable(tchar) * q; 810 811 q = p; 812 for(;; ) 813 { 814 switch(*++q) 815 { 816 case 0: 817 case 0x1A: 818 t.value = TOKeof; 819 p = q; 820 return; 821 822 case ' ': 823 case '\t': 824 case '\v': 825 case '\f': 826 case '\n': 827 case '\r': 828 case 0xA0: // no-break space 829 continue; 830 831 default: 832 assert(0); 833 } 834 } 835 } 836 t.value = TOKminusminus; 837 } 838 else 839 t.value = TOKminus; 840 return; 841 842 case '+': 843 p++; 844 c = *p; 845 if(c == '=') 846 { 847 p++; 848 t.value = TOKplusass; 849 } 850 else if(c == '+') 851 { 852 p++; 853 t.value = TOKplusplus; 854 } 855 else 856 t.value = TOKplus; 857 return; 858 859 case '<': 860 p++; 861 c = *p; 862 if(c == '=') 863 { 864 p++; 865 t.value = TOKlessequal; 866 } 867 else if(c == '<') 868 { 869 p++; 870 c = *p; 871 if(c == '=') 872 { 873 p++; 874 t.value = TOKshiftleftass; 875 } 876 else 877 t.value = TOKshiftleft; 878 } 879 else if(c == '!' && p[1] == '-' && p[2] == '-') 880 { // Special comment to end of line 881 p += 2; 882 for(;; ) 883 { 884 p++; 885 switch(*p) 886 { 887 case '\n': 888 currentline++; 889 goto case; 890 case '\r': 891 t.sawLineTerminator = p; 892 break; 893 894 case 0: 895 case 0x1A: // end of file 896 error(ERR_BAD_HTML_COMMENT); 897 t.value = TOKeof; 898 return; 899 900 default: 901 continue; 902 } 903 break; 904 } 905 p++; 906 continue; 907 } 908 else 909 t.value = TOKless; 910 return; 911 912 case '>': 913 p++; 914 c = *p; 915 if(c == '=') 916 { 917 p++; 918 t.value = TOKgreaterequal; 919 } 920 else if(c == '>') 921 { 922 p++; 923 c = *p; 924 if(c == '=') 925 { 926 p++; 927 t.value = TOKshiftrightass; 928 } 929 else if(c == '>') 930 { 931 p++; 932 c = *p; 933 if(c == '=') 934 { 935 p++; 936 t.value = TOKushiftrightass; 937 } 938 else 939 t.value = TOKushiftright; 940 } 941 else 942 t.value = TOKshiftright; 943 } 944 else 945 t.value = TOKgreater; 946 return; 947 948 case '(': p++; t.value = TOKlparen; return; 949 case ')': p++; t.value = TOKrparen; return; 950 case '[': p++; t.value = TOKlbracket; return; 951 case ']': p++; t.value = TOKrbracket; return; 952 case '{': p++; t.value = TOKlbrace; return; 953 case '}': p++; t.value = TOKrbrace; return; 954 case '~': p++; t.value = TOKtilde; return; 955 case '?': p++; t.value = TOKquestion; return; 956 case ',': p++; t.value = TOKcomma; return; 957 case ';': p++; t.value = TOKsemicolon; return; 958 case ':': p++; t.value = TOKcolon; return; 959 960 case '*': 961 p++; 962 c = *p; 963 if(c == '=') 964 { 965 p++; 966 t.value = TOKmultiplyass; 967 } 968 else 969 t.value = TOKmultiply; 970 return; 971 972 case '%': 973 p++; 974 c = *p; 975 if(c == '=') 976 { 977 p++; 978 t.value = TOKpercentass; 979 } 980 else 981 t.value = TOKpercent; 982 return; 983 984 case '^': 985 p++; 986 c = *p; 987 if(c == '=') 988 { 989 p++; 990 t.value = TOKxorass; 991 } 992 else 993 t.value = TOKxor; 994 return; 995 996 case '=': 997 p++; 998 c = *p; 999 if(c == '=') 1000 { 1001 p++; 1002 c = *p; 1003 if(c == '=') 1004 { 1005 p++; 1006 t.value = TOKidentity; 1007 } 1008 else 1009 t.value = TOKequal; 1010 } 1011 else 1012 t.value = TOKassign; 1013 return; 1014 1015 case '!': 1016 p++; 1017 c = *p; 1018 if(c == '=') 1019 { 1020 p++; 1021 c = *p; 1022 if(c == '=') 1023 { 1024 p++; 1025 t.value = TOKnonidentity; 1026 } 1027 else 1028 t.value = TOKnotequal; 1029 } 1030 else 1031 t.value = TOKnot; 1032 return; 1033 1034 case '\\': 1035 if(p[1] == 'u') 1036 { 1037 // \uXXXX starts an identifier 1038 goto Lidentifier2; 1039 } 1040 goto default; 1041 default: 1042 d = get(p); 1043 if(d >= 0x80 && std.uni.isAlpha(d)) 1044 goto Lidentifier; 1045 else if(isStrWhiteSpaceChar(d)) 1046 { 1047 p = inc(p); //also skip unicode whitespace 1048 continue; 1049 } 1050 else 1051 { 1052 if(std.ascii.isPrintable(d)) 1053 error(errmsgtbl[ERR_BAD_CHAR_C], d); 1054 else 1055 error(errmsgtbl[ERR_BAD_CHAR_X], d); 1056 } 1057 continue; 1058 } 1059 } 1060 } 1061 1062 /******************************************* 1063 * Parse escape sequence. 1064 */ 1065 1066 dchar escapeSequence() 1067 { 1068 uint c; 1069 int n; 1070 1071 c = *p; 1072 p++; 1073 switch(c) 1074 { 1075 case '\'': 1076 case '"': 1077 case '?': 1078 case '\\': 1079 break; 1080 case 'a': 1081 c = 7; 1082 break; 1083 case 'b': 1084 c = 8; 1085 break; 1086 case 'f': 1087 c = 12; 1088 break; 1089 case 'n': 1090 c = 10; 1091 break; 1092 case 'r': 1093 c = 13; 1094 break; 1095 case 't': 1096 c = 9; 1097 break; 1098 1099 case 'v': 1100 version(JSCRIPT_ESCAPEV_BUG) 1101 { 1102 } 1103 else 1104 { 1105 c = 11; 1106 } 1107 break; 1108 1109 case 'x': 1110 c = *p; 1111 p++; 1112 if(ishex(c)) 1113 { 1114 uint v; 1115 1116 n = 0; 1117 v = 0; 1118 for(;; ) 1119 { 1120 if(std.ascii.isDigit(c)) 1121 c -= '0'; 1122 else if(std.ascii.isLower(c)) 1123 c -= 'a' - 10; 1124 else // 'A' <= c && c <= 'Z' 1125 c -= 'A' - 10; 1126 v = v * 16 + c; 1127 c = *p; 1128 if(++n >= 2 || !ishex(c)) 1129 break; 1130 p++; 1131 } 1132 if(n == 1) 1133 error(ERR_BAD_HEX_SEQUENCE); 1134 c = v; 1135 } 1136 else 1137 error(errmsgtbl[ERR_UNDEFINED_ESC_SEQUENCE], c); 1138 break; 1139 1140 default: 1141 if(c > 0x7F) 1142 { 1143 p--; 1144 c = get(p); 1145 p = inc(p); 1146 } 1147 if(isoctal(c)) 1148 { 1149 uint v; 1150 1151 n = 0; 1152 v = 0; 1153 for(;; ) 1154 { 1155 v = v * 8 + (c - '0'); 1156 c = *p; 1157 if(++n >= 3 || !isoctal(c)) 1158 break; 1159 p++; 1160 } 1161 c = v; 1162 } 1163 // Don't throw error, just accept it 1164 //error("undefined escape sequence \\%c\n",c); 1165 break; 1166 } 1167 return c; 1168 } 1169 1170 /************************************** 1171 */ 1172 1173 d_string string(tchar quote) 1174 { 1175 tchar c; 1176 dchar d; 1177 d_string stringbuffer; 1178 1179 //printf("Lexer.string('%c')\n", quote); 1180 p++; 1181 for(;; ) 1182 { 1183 c = *p; 1184 switch(c) 1185 { 1186 case '"': 1187 case '\'': 1188 p++; 1189 if(c == quote) 1190 return stringbuffer; 1191 break; 1192 1193 case '\\': 1194 p++; 1195 if(*p == 'u') 1196 d = unicode(); 1197 else 1198 d = escapeSequence(); 1199 dmdscript.utf.encode(stringbuffer, d); 1200 continue; 1201 1202 case '\n': 1203 case '\r': 1204 p++; 1205 error(errmsgtbl[ERR_STRING_NO_END_QUOTE], quote); 1206 return null; 1207 1208 case 0: 1209 case 0x1A: 1210 error(ERR_UNTERMINATED_STRING); 1211 return null; 1212 1213 default: 1214 p++; 1215 break; 1216 } 1217 stringbuffer ~= c; 1218 } 1219 assert(0); 1220 } 1221 1222 /************************************** 1223 * Scan regular expression. Return null with buffer 1224 * pointer intact if it is not a regexp. 1225 */ 1226 1227 d_string regexp() 1228 { 1229 tchar c; 1230 immutable(tchar) * s; 1231 immutable(tchar) * start; 1232 1233 /* 1234 RegExpLiteral: RegExpBody RegExpFlags 1235 RegExpFlags: 1236 empty 1237 | RegExpFlags ContinuingIdentifierCharacter 1238 RegExpBody: / RegExpFirstChar RegExpChars / 1239 RegExpFirstChar: 1240 OrdinaryRegExpFirstChar 1241 | \ NonTerminator 1242 OrdinaryRegExpFirstChar: NonTerminator except \ | / | * 1243 RegExpChars: 1244 empty 1245 | RegExpChars RegExpChar 1246 RegExpChar: 1247 OrdinaryRegExpChar 1248 | \ NonTerminator 1249 OrdinaryRegExpChar: NonTerminator except \ | / 1250 */ 1251 1252 //writefln("Lexer.regexp()\n"); 1253 start = p - 1; 1254 s = p; 1255 1256 // Do RegExpBody 1257 for(;; ) 1258 { 1259 c = *s; 1260 s++; 1261 switch(c) 1262 { 1263 case '\\': 1264 if(s == p) 1265 return null; 1266 c = *s; 1267 switch(c) 1268 { 1269 case '\r': 1270 case '\n': // new line 1271 case 0: // end of file 1272 case 0x1A: // end of file 1273 return null; // not a regexp 1274 default: 1275 break; 1276 } 1277 s++; 1278 continue; 1279 1280 case '/': 1281 if(s == p + 1) 1282 return null; 1283 break; 1284 1285 case '\r': 1286 case '\n': // new line 1287 case 0: // end of file 1288 case 0x1A: // end of file 1289 return null; // not a regexp 1290 1291 case '*': 1292 if(s == p + 1) 1293 return null; 1294 goto default; 1295 default: 1296 continue; 1297 } 1298 break; 1299 } 1300 1301 // Do RegExpFlags 1302 for(;; ) 1303 { 1304 c = *s; 1305 if(std.ascii.isAlphaNum(c) || c == '_' || c == '$') 1306 { 1307 s++; 1308 } 1309 else 1310 break; 1311 } 1312 1313 // Finish pattern & return it 1314 p = s; 1315 return start[0 .. s - start].idup; 1316 } 1317 1318 /*************************************** 1319 */ 1320 1321 dchar unicode() 1322 { 1323 dchar value; 1324 uint n; 1325 dchar c; 1326 1327 value = 0; 1328 p++; 1329 for(n = 0; n < 4; n++) 1330 { 1331 c = *p; 1332 if(!ishex(c)) 1333 { 1334 error(ERR_BAD_U_SEQUENCE); 1335 break; 1336 } 1337 p++; 1338 if(std.ascii.isDigit(c)) 1339 c -= '0'; 1340 else if(isasciilower(c)) 1341 c -= 'a' - 10; 1342 else // 'A' <= c && c <= 'Z' 1343 c -= 'A' - 10; 1344 value <<= 4; 1345 value |= c; 1346 } 1347 return value; 1348 } 1349 1350 /******************************************** 1351 * Read a number. 1352 */ 1353 1354 TOK number(Token *t) 1355 { 1356 immutable(tchar) * start; 1357 number_t intvalue; 1358 real realvalue; 1359 int base = 10; 1360 tchar c; 1361 1362 start = p; 1363 for(;; ) 1364 { 1365 c = *p; 1366 p++; 1367 switch(c) 1368 { 1369 case '0': 1370 // ECMA grammar implies that numbers with leading 0 1371 // like 015 are illegal. But other scripts allow them. 1372 if(p - start == 1) // if leading 0 1373 base = 8; 1374 goto case; 1375 case '1': case '2': case '3': case '4': case '5': 1376 case '6': case '7': 1377 break; 1378 1379 case '8': case '9': // decimal digits 1380 if(base == 8) // and octal base 1381 base = 10; // means back to decimal base 1382 break; 1383 1384 default: 1385 p--; 1386 Lnumber: 1387 if(base == 0) 1388 base = 10; 1389 intvalue = 0; 1390 foreach(tchar v; start[0 .. p - start]) 1391 { 1392 if('0' <= v && v <= '9') 1393 v -= '0'; 1394 else if('a' <= v && v <= 'f') 1395 v -= ('a' - 10); 1396 else if('A' <= v && v <= 'F') 1397 v -= ('A' - 10); 1398 else 1399 assert(0); 1400 assert(v < base); 1401 if((number_t.max - v) / base < intvalue) 1402 { 1403 realvalue = 0; 1404 foreach(tchar w; start[0 .. p - start]) 1405 { 1406 if('0' <= w && w <= '9') 1407 w -= '0'; 1408 else if('a' <= w && w <= 'f') 1409 w -= ('a' - 10); 1410 else if('A' <= w && w <= 'F') 1411 w -= ('A' - 10); 1412 else 1413 assert(0); 1414 realvalue *= base; 1415 realvalue += v; 1416 } 1417 t.realvalue = realvalue; 1418 return TOKreal; 1419 } 1420 intvalue *= base; 1421 intvalue += v; 1422 } 1423 t.realvalue = cast(double)intvalue; 1424 return TOKreal; 1425 1426 case 'x': 1427 case 'X': 1428 if(p - start != 2 || !ishex(*p)) 1429 goto Lerr; 1430 do 1431 p++; 1432 while(ishex(*p)); 1433 start += 2; 1434 base = 16; 1435 goto Lnumber; 1436 1437 case '.': 1438 while(std.ascii.isDigit(*p)) 1439 p++; 1440 if(*p == 'e' || *p == 'E') 1441 { 1442 p++; 1443 goto Lexponent; 1444 } 1445 goto Ldouble; 1446 1447 case 'e': 1448 case 'E': 1449 Lexponent: 1450 if(*p == '+' || *p == '-') 1451 p++; 1452 if(!std.ascii.isDigit(*p)) 1453 goto Lerr; 1454 do 1455 p++; 1456 while(std.ascii.isDigit(*p)); 1457 goto Ldouble; 1458 1459 Ldouble: 1460 // convert double 1461 realvalue = core.stdc.stdlib.strtod(toStringz(start[0 .. p - start]), null); 1462 t.realvalue = realvalue; 1463 return TOKreal; 1464 } 1465 } 1466 1467 Lerr: 1468 error(ERR_UNRECOGNIZED_N_LITERAL); 1469 return TOKeof; 1470 } 1471 1472 static TOK isKeyword(const (tchar)[] s) 1473 { 1474 if(s[0] >= 'a' && s[0] <= 'w') 1475 switch(s.length) 1476 { 1477 case 2: 1478 if(s[0] == 'i') 1479 { 1480 if(s[1] == 'f') 1481 return TOKif; 1482 if(s[1] == 'n') 1483 return TOKin; 1484 } 1485 else if(s[0] == 'd' && s[1] == 'o') 1486 return TOKdo; 1487 break; 1488 1489 case 3: 1490 switch(s[0]) 1491 { 1492 case 'f': 1493 if(s[1] == 'o' && s[2] == 'r') 1494 return TOKfor; 1495 break; 1496 case 'i': 1497 if(s[1] == 'n' && s[2] == 't') 1498 return TOKint; 1499 break; 1500 case 'n': 1501 if(s[1] == 'e' && s[2] == 'w') 1502 return TOKnew; 1503 break; 1504 case 't': 1505 if(s[1] == 'r' && s[2] == 'y') 1506 return TOKtry; 1507 break; 1508 case 'v': 1509 if(s[1] == 'a' && s[2] == 'r') 1510 return TOKvar; 1511 break; 1512 default: 1513 break; 1514 } 1515 break; 1516 1517 case 4: 1518 switch(s[0]) 1519 { 1520 case 'b': 1521 if(s[1] == 'y' && s[2] == 't' && s[3] == 'e') 1522 return TOKbyte; 1523 break; 1524 case 'c': 1525 if(s[1] == 'a' && s[2] == 's' && s[3] == 'e') 1526 return TOKcase; 1527 if(s[1] == 'h' && s[2] == 'a' && s[3] == 'r') 1528 return TOKchar; 1529 break; 1530 case 'e': 1531 if(s[1] == 'l' && s[2] == 's' && s[3] == 'e') 1532 return TOKelse; 1533 if(s[1] == 'n' && s[2] == 'u' && s[3] == 'm') 1534 return TOKenum; 1535 break; 1536 case 'g': 1537 if(s[1] == 'o' && s[2] == 't' && s[3] == 'o') 1538 return TOKgoto; 1539 break; 1540 case 'l': 1541 if(s[1] == 'o' && s[2] == 'n' && s[3] == 'g') 1542 return TOKlong; 1543 break; 1544 case 'n': 1545 if(s[1] == 'u' && s[2] == 'l' && s[3] == 'l') 1546 return TOKnull; 1547 break; 1548 case 't': 1549 if(s[1] == 'h' && s[2] == 'i' && s[3] == 's') 1550 return TOKthis; 1551 if(s[1] == 'r' && s[2] == 'u' && s[3] == 'e') 1552 return TOKtrue; 1553 break; 1554 case 'w': 1555 if(s[1] == 'i' && s[2] == 't' && s[3] == 'h') 1556 return TOKwith; 1557 break; 1558 case 'v': 1559 if(s[1] == 'o' && s[2] == 'i' && s[3] == 'd') 1560 return TOKvoid; 1561 break; 1562 default: 1563 break; 1564 } 1565 break; 1566 1567 case 5: 1568 switch(s) 1569 { 1570 case "break": return TOKbreak; 1571 case "catch": return TOKcatch; 1572 case "class": return TOKclass; 1573 case "const": return TOKconst; 1574 case "false": return TOKfalse; 1575 case "final": return TOKfinal; 1576 case "float": return TOKfloat; 1577 case "short": return TOKshort; 1578 case "super": return TOKsuper; 1579 case "throw": return TOKthrow; 1580 case "while": return TOKwhile; 1581 default: 1582 break; 1583 } 1584 break; 1585 1586 case 6: 1587 switch(s) 1588 { 1589 case "delete": return TOKdelete; 1590 case "double": return TOKdouble; 1591 case "export": return TOKexport; 1592 case "import": return TOKimport; 1593 case "native": return TOKnative; 1594 case "public": return TOKpublic; 1595 case "return": return TOKreturn; 1596 case "static": return TOKstatic; 1597 case "switch": return TOKswitch; 1598 case "typeof": return TOKtypeof; 1599 default: 1600 break; 1601 } 1602 break; 1603 1604 case 7: 1605 switch(s) 1606 { 1607 case "boolean": return TOKboolean; 1608 case "default": return TOKdefault; 1609 case "extends": return TOKextends; 1610 case "finally": return TOKfinally; 1611 case "package": return TOKpackage; 1612 case "private": return TOKprivate; 1613 default: 1614 break; 1615 } 1616 break; 1617 1618 case 8: 1619 switch(s) 1620 { 1621 case "abstract": return TOKabstract; 1622 case "continue": return TOKcontinue; 1623 case "debugger": return TOKdebugger; 1624 case "function": return TOKfunction; 1625 default: 1626 break; 1627 } 1628 break; 1629 1630 case 9: 1631 switch(s) 1632 { 1633 case "interface": return TOKinterface; 1634 case "protected": return TOKprotected; 1635 case "transient": return TOKtransient; 1636 default: 1637 break; 1638 } 1639 break; 1640 1641 case 10: 1642 switch(s) 1643 { 1644 case "implements": return TOKimplements; 1645 case "instanceof": return TOKinstanceof; 1646 default: 1647 break; 1648 } 1649 break; 1650 1651 case 12: 1652 if(s == "synchronized") 1653 return TOKsynchronized; 1654 break; 1655 1656 default: 1657 break; 1658 } 1659 return TOKreserved; // not a keyword 1660 } 1661 } 1662 1663 1664 /**************************************** 1665 */ 1666 1667 struct Keyword 1668 { 1669 string name; 1670 TOK value; 1671 } 1672 1673 static immutable Keyword[] keywords = 1674 [ 1675 // { "", TOK }, 1676 1677 { "break", TOKbreak }, 1678 { "case", TOKcase }, 1679 { "continue", TOKcontinue }, 1680 { "default", TOKdefault }, 1681 { "delete", TOKdelete }, 1682 { "do", TOKdo }, 1683 { "else", TOKelse }, 1684 { "export", TOKexport }, 1685 { "false", TOKfalse }, 1686 { "for", TOKfor }, 1687 { "function", TOKfunction }, 1688 { "if", TOKif }, 1689 { "import", TOKimport }, 1690 { "in", TOKin }, 1691 { "new", TOKnew }, 1692 { "null", TOKnull }, 1693 { "return", TOKreturn }, 1694 { "switch", TOKswitch }, 1695 { "this", TOKthis }, 1696 { "true", TOKtrue }, 1697 { "typeof", TOKtypeof }, 1698 { "var", TOKvar }, 1699 { "void", TOKvoid }, 1700 { "while", TOKwhile }, 1701 { "with", TOKwith }, 1702 1703 { "catch", TOKcatch }, 1704 { "class", TOKclass }, 1705 { "const", TOKconst }, 1706 { "debugger", TOKdebugger }, 1707 { "enum", TOKenum }, 1708 { "extends", TOKextends }, 1709 { "finally", TOKfinally }, 1710 { "super", TOKsuper }, 1711 { "throw", TOKthrow }, 1712 { "try", TOKtry }, 1713 1714 { "abstract", TOKabstract }, 1715 { "boolean", TOKboolean }, 1716 { "byte", TOKbyte }, 1717 { "char", TOKchar }, 1718 { "double", TOKdouble }, 1719 { "final", TOKfinal }, 1720 { "float", TOKfloat }, 1721 { "goto", TOKgoto }, 1722 { "implements", TOKimplements }, 1723 { "instanceof", TOKinstanceof }, 1724 { "int", TOKint }, 1725 { "interface", TOKinterface }, 1726 { "long", TOKlong }, 1727 { "native", TOKnative }, 1728 { "package", TOKpackage }, 1729 { "private", TOKprivate }, 1730 { "protected", TOKprotected }, 1731 { "public", TOKpublic }, 1732 { "short", TOKshort }, 1733 { "static", TOKstatic }, 1734 { "synchronized", TOKsynchronized }, 1735 { "transient", TOKtransient }, 1736 ]; 1737 1738 void init() 1739 { 1740 uint u; 1741 TOK v; 1742 1743 for(u = 0; u < keywords.length; u++) 1744 { 1745 d_string s; 1746 1747 //writefln("keyword[%d] = '%s'", u, keywords[u].name); 1748 s = keywords[u].name; 1749 v = keywords[u].value; 1750 1751 //writefln("tochars[%d] = '%s'", v, s); 1752 Token.tochars[v] = s; 1753 } 1754 1755 Token.tochars[TOKreserved] = "reserved"; 1756 Token.tochars[TOKeof] = "EOF"; 1757 Token.tochars[TOKlbrace] = "{"; 1758 Token.tochars[TOKrbrace] = "}"; 1759 Token.tochars[TOKlparen] = "("; 1760 Token.tochars[TOKrparen] = ""; 1761 Token.tochars[TOKlbracket] = "["; 1762 Token.tochars[TOKrbracket] = "]"; 1763 Token.tochars[TOKcolon] = ":"; 1764 Token.tochars[TOKsemicolon] = ";"; 1765 Token.tochars[TOKcomma] = ","; 1766 Token.tochars[TOKor] = "|"; 1767 Token.tochars[TOKorass] = "|="; 1768 Token.tochars[TOKxor] = "^"; 1769 Token.tochars[TOKxorass] = "^="; 1770 Token.tochars[TOKassign] = "="; 1771 Token.tochars[TOKless] = "<"; 1772 Token.tochars[TOKgreater] = ">"; 1773 Token.tochars[TOKlessequal] = "<="; 1774 Token.tochars[TOKgreaterequal] = ">="; 1775 Token.tochars[TOKequal] = "=="; 1776 Token.tochars[TOKnotequal] = "!="; 1777 Token.tochars[TOKidentity] = "==="; 1778 Token.tochars[TOKnonidentity] = "!=="; 1779 Token.tochars[TOKshiftleft] = "<<"; 1780 Token.tochars[TOKshiftright] = ">>"; 1781 Token.tochars[TOKushiftright] = ">>>"; 1782 Token.tochars[TOKplus] = "+"; 1783 Token.tochars[TOKplusass] = "+="; 1784 Token.tochars[TOKminus] = "-"; 1785 Token.tochars[TOKminusass] = "-="; 1786 Token.tochars[TOKmultiply] = "*"; 1787 Token.tochars[TOKmultiplyass] = "*="; 1788 Token.tochars[TOKdivide] = "/"; 1789 Token.tochars[TOKdivideass] = "/="; 1790 Token.tochars[TOKpercent] = "%"; 1791 Token.tochars[TOKpercentass] = "%="; 1792 Token.tochars[TOKand] = "&"; 1793 Token.tochars[TOKandass] = "&="; 1794 Token.tochars[TOKdot] = "."; 1795 Token.tochars[TOKquestion] = "?"; 1796 Token.tochars[TOKtilde] = "~"; 1797 Token.tochars[TOKnot] = "!"; 1798 Token.tochars[TOKandand] = "&&"; 1799 Token.tochars[TOKoror] = "||"; 1800 Token.tochars[TOKplusplus] = "++"; 1801 Token.tochars[TOKminusminus] = "--"; 1802 Token.tochars[TOKcall] = "CALL"; 1803 1804 Lexer.inited = true; 1805 } 1806