1 /* Digital Mars DMDScript source code. 2 * Copyright (c) 2000-2002 by Chromium Communications 3 * D version Copyright (c) 2004-2010 by Digital Mars 4 * Distributed under the Boost Software License, Version 1.0. 5 * (See accompanying file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) 6 * written by Walter Bright 7 * http://www.digitalmars.com 8 * 9 * D2 port by Dmitry Olshansky 10 * 11 * DMDScript is implemented in the D Programming Language, 12 * http://www.digitalmars.com/d/ 13 * 14 * For a C++ implementation of DMDScript, including COM support, see 15 * http://www.digitalmars.com/dscript/cppscript.html 16 */ 17 18 19 module dmdscript.dregexp; 20 21 private import undead.regexp; 22 23 import dmdscript.script; 24 import dmdscript.dobject; 25 import dmdscript.value; 26 import dmdscript.protoerror; 27 import dmdscript.text; 28 import dmdscript.darray; 29 import dmdscript.threadcontext; 30 import dmdscript.dfunction; 31 import dmdscript.property; 32 import dmdscript.errmsgs; 33 import dmdscript.dnative; 34 35 //alias script.tchar tchar; 36 37 // Values for Dregexp.exec.rettype 38 enum { EXEC_STRING, EXEC_ARRAY, EXEC_BOOLEAN, EXEC_INDEX }; 39 40 41 /* ===================== Dregexp_constructor ==================== */ 42 43 class DregexpConstructor : Dfunction 44 { 45 Value* input; 46 Value* multiline; 47 Value* lastMatch; 48 Value* lastParen; 49 Value* leftContext; 50 Value* rightContext; 51 Value*[10] dollar; 52 53 // Extensions 54 Value* index; 55 Value* lastIndex; 56 57 this(CallContext* cc) 58 { 59 super(cc, 2, cc.tc.Dfunction_prototype); 60 61 Value v; 62 v.putVstring(null); 63 64 Value vb; 65 vb.putVboolean(false); 66 67 Value vnm1; 68 vnm1.putVnumber(-1); 69 70 name = "RegExp"; 71 72 // Static properties 73 Put(cc, TEXT_input, &v, DontDelete); 74 Put(cc, TEXT_multiline, &vb, DontDelete); 75 Put(cc, TEXT_lastMatch, &v, ReadOnly | DontDelete); 76 Put(cc, TEXT_lastParen, &v, ReadOnly | DontDelete); 77 Put(cc, TEXT_leftContext, &v, ReadOnly | DontDelete); 78 Put(cc, TEXT_rightContext, &v, ReadOnly | DontDelete); 79 Put(cc, TEXT_dollar1, &v, ReadOnly | DontDelete); 80 Put(cc, TEXT_dollar2, &v, ReadOnly | DontDelete); 81 Put(cc, TEXT_dollar3, &v, ReadOnly | DontDelete); 82 Put(cc, TEXT_dollar4, &v, ReadOnly | DontDelete); 83 Put(cc, TEXT_dollar5, &v, ReadOnly | DontDelete); 84 Put(cc, TEXT_dollar6, &v, ReadOnly | DontDelete); 85 Put(cc, TEXT_dollar7, &v, ReadOnly | DontDelete); 86 Put(cc, TEXT_dollar8, &v, ReadOnly | DontDelete); 87 Put(cc, TEXT_dollar9, &v, ReadOnly | DontDelete); 88 89 Put(cc, TEXT_index, &vnm1, ReadOnly | DontDelete); 90 Put(cc, TEXT_lastIndex, &vnm1, ReadOnly | DontDelete); 91 92 input = Get(TEXT_input); 93 multiline = Get(TEXT_multiline); 94 lastMatch = Get(TEXT_lastMatch); 95 lastParen = Get(TEXT_lastParen); 96 leftContext = Get(TEXT_leftContext); 97 rightContext = Get(TEXT_rightContext); 98 dollar[0] = lastMatch; 99 dollar[1] = Get(TEXT_dollar1); 100 dollar[2] = Get(TEXT_dollar2); 101 dollar[3] = Get(TEXT_dollar3); 102 dollar[4] = Get(TEXT_dollar4); 103 dollar[5] = Get(TEXT_dollar5); 104 dollar[6] = Get(TEXT_dollar6); 105 dollar[7] = Get(TEXT_dollar7); 106 dollar[8] = Get(TEXT_dollar8); 107 dollar[9] = Get(TEXT_dollar9); 108 109 index = Get(TEXT_index); 110 lastIndex = Get(TEXT_lastIndex); 111 112 // Should lastMatch be an alias for dollar[nparens], 113 // or should it be a separate property? 114 // We implemented it the latter way. 115 // Since both are ReadOnly, I can't see that it makes 116 // any difference. 117 } 118 119 override void* Construct(CallContext *cc, Value *ret, Value[] arglist) 120 { 121 // ECMA 262 v3 15.10.4.1 122 123 Value* pattern; 124 Value* flags; 125 d_string P; 126 d_string F; 127 Dregexp r; 128 Dregexp R; 129 130 //writef("Dregexp_constructor.Construct()\n"); 131 ret.putVundefined(); 132 pattern = &vundefined; 133 flags = &vundefined; 134 switch(arglist.length) 135 { 136 case 0: 137 break; 138 139 default: 140 flags = &arglist[1]; 141 goto case; 142 case 1: 143 pattern = &arglist[0]; 144 break; 145 } 146 R = Dregexp.isRegExp(pattern, cc); 147 if(R) 148 { 149 if(flags.isUndefined()) 150 { 151 P = R.re.pattern; 152 F = R.re.flags; 153 } 154 else 155 { 156 ErrInfo errinfo; 157 return RuntimeError(&errinfo, cc, ERR_TYPE_ERROR, 158 "RegExp.prototype.constructor"); 159 } 160 } 161 else 162 { 163 P = pattern.isUndefined() ? "" : pattern.toString(cc); 164 F = flags.isUndefined() ? "" : flags.toString(cc); 165 } 166 r = new Dregexp(cc, P, F); 167 if(r.re.errors) 168 { 169 Dobject o; 170 ErrInfo errinfo; 171 172 version(none) 173 { 174 writef("P = '%s'\nF = '%s'\n", d_string_ptr(P), d_string_ptr(F)); 175 for(int i = 0; i < d_string_len(P); i++) 176 writef("x%02x\n", d_string_ptr(P)[i]); 177 } 178 errinfo.message = errmsgtbl[ERR_REGEXP_COMPILE]; 179 o = new syntaxerror.D0(cc, &errinfo); 180 Value* v = new Value; 181 v.putVobject(o); 182 return v; 183 } 184 else 185 { 186 ret.putVobject(r); 187 return null; 188 } 189 } 190 191 override void* Call(CallContext *cc, Dobject othis, Value* ret, Value[] arglist) 192 { 193 // ECMA 262 v3 15.10.3.1 194 if(arglist.length >= 1) 195 { 196 Value* pattern; 197 Dobject o; 198 199 pattern = &arglist[0]; 200 if(!pattern.isPrimitive()) 201 { 202 o = pattern.object; 203 if(o.isDregexp() && 204 (arglist.length == 1 || arglist[1].isUndefined()) 205 ) 206 { 207 ret.putVobject(o); 208 return null; 209 } 210 } 211 } 212 return Construct(cc, ret, arglist); 213 } 214 215 216 override Value* Get(d_string PropertyName) 217 { 218 return Dfunction.Get(perlAlias(PropertyName)); 219 } 220 221 override Value* Put(CallContext* cc, d_string PropertyName, Value* value, uint attributes) 222 { 223 return Dfunction.Put(cc, perlAlias(PropertyName), value, attributes); 224 } 225 226 override Value* Put(CallContext* cc, d_string PropertyName, Dobject o, uint attributes) 227 { 228 return Dfunction.Put(cc, perlAlias(PropertyName), o, attributes); 229 } 230 231 override Value* Put(CallContext* cc, d_string PropertyName, d_number n, uint attributes) 232 { 233 return Dfunction.Put(cc, perlAlias(PropertyName), n, attributes); 234 } 235 236 override int CanPut(d_string PropertyName) 237 { 238 return Dfunction.CanPut(perlAlias(PropertyName)); 239 } 240 241 override int HasProperty(d_string PropertyName) 242 { 243 return Dfunction.HasProperty(perlAlias(PropertyName)); 244 } 245 246 override int Delete(d_string PropertyName) 247 { 248 return Dfunction.Delete(perlAlias(PropertyName)); 249 } 250 251 // Translate Perl property names to script property names 252 static d_string perlAlias(d_string s) 253 { 254 import std.algorithm.searching : countUntil; 255 256 d_string t; 257 258 static immutable tchar[] from = "_*&+`'"; 259 static enum d_string[] to = 260 [ 261 TEXT_input, 262 TEXT_multiline, 263 TEXT_lastMatch, 264 TEXT_lastParen, 265 TEXT_leftContext, 266 TEXT_rightContext, 267 ]; 268 269 t = s; 270 if(s.length == 2 && s[0] == '$') 271 { 272 ptrdiff_t i; 273 274 i = countUntil(from, s[1]); 275 if(i >= 0) 276 t = to[i]; 277 } 278 return t; 279 } 280 } 281 282 283 /* ===================== Dregexp_prototype_toString =============== */ 284 285 void* Dregexp_prototype_toString(Dobject pthis, CallContext *cc, Dobject othis, Value *ret, Value[] arglist) 286 { 287 // othis must be a RegExp 288 Dregexp r; 289 290 if(!othis.isDregexp()) 291 { 292 ret.putVundefined(); 293 ErrInfo errinfo; 294 return Dobject.RuntimeError(&errinfo, cc, ERR_NOT_TRANSFERRABLE, 295 "RegExp.prototype.toString()"); 296 } 297 else 298 { 299 d_string s; 300 301 r = cast(Dregexp)(othis); 302 s = "/"; 303 s ~= r.re.pattern; 304 s ~= "/"; 305 s ~= r.re.flags; 306 ret.putVstring(s); 307 } 308 return null; 309 } 310 311 /* ===================== Dregexp_prototype_test =============== */ 312 313 void* Dregexp_prototype_test(Dobject pthis, CallContext *cc, Dobject othis, Value *ret, Value[] arglist) 314 { 315 // ECMA v3 15.10.6.3 says this is equivalent to: 316 // RegExp.prototype.exec(string) != null 317 return Dregexp.exec(othis, cc, ret, arglist, EXEC_BOOLEAN); 318 } 319 320 /* ===================== Dregexp_prototype_exec ============= */ 321 322 void* Dregexp_prototype_exec(Dobject pthis, CallContext *cc, Dobject othis, Value *ret, Value[] arglist) 323 { 324 return Dregexp.exec(othis, cc, ret, arglist, EXEC_ARRAY); 325 } 326 327 328 /* ===================== Dregexp_prototype_compile ============= */ 329 330 void* Dregexp_prototype_compile(Dobject pthis, CallContext *cc, Dobject othis, Value *ret, Value[] arglist) 331 { 332 // RegExp.prototype.compile(pattern, attributes) 333 334 // othis must be a RegExp 335 if(!othis.isClass(TEXT_RegExp)) 336 { 337 ErrInfo errinfo; 338 ret.putVundefined(); 339 return Dobject.RuntimeError(&errinfo, cc, ERR_NOT_TRANSFERRABLE, 340 "RegExp.prototype.compile()"); 341 } 342 else 343 { 344 d_string pattern; 345 d_string attributes; 346 Dregexp dr; 347 RegExp r; 348 349 dr = cast(Dregexp)othis; 350 switch(arglist.length) 351 { 352 case 0: 353 break; 354 355 default: 356 attributes = arglist[1].toString(cc); 357 goto case; 358 case 1: 359 pattern = arglist[0].toString(cc); 360 break; 361 } 362 363 r = dr.re; 364 try 365 { 366 r.compile(pattern, attributes); 367 } 368 catch(RegExpException e) 369 { 370 // Affect source, global and ignoreCase properties 371 dr.source.putVstring(r.pattern); 372 dr.global.putVboolean((r.attributes & RegExp.REA.global) != 0); 373 dr.ignoreCase.putVboolean((r.attributes & RegExp.REA.ignoreCase) != 0); 374 } 375 //writef("r.attributes = x%x\n", r.attributes); 376 } 377 // Documentation says nothing about a return value, 378 // so let's use "undefined" 379 ret.putVundefined(); 380 return null; 381 } 382 383 /* ===================== Dregexp_prototype ==================== */ 384 385 class DregexpPrototype : Dregexp 386 { 387 this(CallContext* cc) 388 { 389 super(cc, cc.tc.Dobject_prototype); 390 classname = TEXT_Object; 391 uint attributes = ReadOnly | DontDelete | DontEnum; 392 Dobject f = cc.tc.Dfunction_prototype; 393 394 Put(cc, TEXT_constructor, cc.tc.Dregexp_constructor, attributes); 395 396 static enum NativeFunctionData[] nfd = 397 [ 398 { TEXT_toString, &Dregexp_prototype_toString, 0 }, 399 { TEXT_compile, &Dregexp_prototype_compile, 2 }, 400 { TEXT_exec, &Dregexp_prototype_exec, 1 }, 401 { TEXT_test, &Dregexp_prototype_test, 1 }, 402 ]; 403 404 DnativeFunction.initialize(this, cc, nfd, attributes); 405 } 406 } 407 408 409 /* ===================== Dregexp ==================== */ 410 411 412 class Dregexp : Dobject 413 { 414 Value *global; 415 Value *ignoreCase; 416 Value *multiline; 417 Value *lastIndex; 418 Value *source; 419 420 RegExp re; 421 422 this(CallContext* cc, d_string pattern, d_string attributes) 423 { 424 super(cc, getPrototype(cc)); 425 426 Value v; 427 v.putVstring(null); 428 429 Value vb; 430 vb.putVboolean(false); 431 432 classname = TEXT_RegExp; 433 434 //writef("Dregexp.Dregexp(pattern = '%ls', attributes = '%ls')\n", d_string_ptr(pattern), d_string_ptr(attributes)); 435 Put(cc, TEXT_source, &v, ReadOnly | DontDelete | DontEnum); 436 Put(cc, TEXT_global, &vb, ReadOnly | DontDelete | DontEnum); 437 Put(cc, TEXT_ignoreCase, &vb, ReadOnly | DontDelete | DontEnum); 438 Put(cc, TEXT_multiline, &vb, ReadOnly | DontDelete | DontEnum); 439 Put(cc, TEXT_lastIndex, 0.0, DontDelete | DontEnum); 440 441 source = Get(TEXT_source); 442 global = Get(TEXT_global); 443 ignoreCase = Get(TEXT_ignoreCase); 444 multiline = Get(TEXT_multiline); 445 lastIndex = Get(TEXT_lastIndex); 446 447 re = new RegExp(pattern, attributes); 448 if(re.errors == 0) 449 { 450 source.putVstring(pattern); 451 //writef("source = '%s'\n", source.x.string.toDchars()); 452 global.putVboolean((re.attributes & RegExp.REA.global) != 0); 453 ignoreCase.putVboolean((re.attributes & RegExp.REA.ignoreCase) != 0); 454 multiline.putVboolean((re.attributes & RegExp.REA.multiline) != 0); 455 } 456 else 457 { 458 // have caller throw SyntaxError 459 } 460 } 461 462 this(CallContext* cc, Dobject prototype) 463 { 464 super(cc, prototype); 465 466 Value v; 467 v.putVstring(null); 468 469 Value vb; 470 vb.putVboolean(false); 471 472 classname = TEXT_RegExp; 473 474 Put(cc, TEXT_source, &v, ReadOnly | DontDelete | DontEnum); 475 Put(cc, TEXT_global, &vb, ReadOnly | DontDelete | DontEnum); 476 Put(cc, TEXT_ignoreCase, &vb, ReadOnly | DontDelete | DontEnum); 477 Put(cc, TEXT_multiline, &vb, ReadOnly | DontDelete | DontEnum); 478 Put(cc, TEXT_lastIndex, 0.0, DontDelete | DontEnum); 479 480 source = Get(TEXT_source); 481 global = Get(TEXT_global); 482 ignoreCase = Get(TEXT_ignoreCase); 483 multiline = Get(TEXT_multiline); 484 lastIndex = Get(TEXT_lastIndex); 485 486 re = new RegExp(null, null); 487 } 488 489 override void* Call(CallContext *cc, Dobject othis, Value* ret, Value[] arglist) 490 { 491 // This is the same as calling RegExp.prototype.exec(str) 492 Value* v; 493 494 v = Get(TEXT_exec); 495 return v.toObject(cc).Call(cc, this, ret, arglist); 496 } 497 498 static Dregexp isRegExp(Value* v, CallContext* cc) 499 { 500 Dregexp r; 501 502 if(!v.isPrimitive() && v.toObject(cc).isDregexp()) 503 { 504 r = cast(Dregexp)(v.toObject(cc)); 505 } 506 return r; 507 } 508 509 static void* exec(Dobject othis, CallContext* cc, Value* ret, Value[] arglist, int rettype) 510 { 511 //writef("Dregexp.exec(arglist.length = %d, rettype = %d)\n", arglist.length, rettype); 512 513 // othis must be a RegExp 514 if(!othis.isClass(TEXT_RegExp)) 515 { 516 ret.putVundefined(); 517 ErrInfo errinfo; 518 return RuntimeError(&errinfo, cc, ERR_NOT_TRANSFERRABLE, 519 "RegExp.prototype.exec()"); 520 } 521 else 522 { 523 d_string s; 524 Dregexp dr; 525 RegExp r; 526 DregexpConstructor dc; 527 uint i; 528 d_int32 lasti; 529 530 if(arglist.length) 531 s = arglist[0].toString(cc); 532 else 533 { 534 Dfunction df; 535 536 df = Dregexp.getConstructor(cc); 537 s = (cast(DregexpConstructor)df).input..string; 538 } 539 540 dr = cast(Dregexp)othis; 541 r = dr.re; 542 dc = cast(DregexpConstructor)Dregexp.getConstructor(cc); 543 544 // Decide if we are multiline 545 if(dr.multiline.dbool) 546 r.attributes |= RegExp.REA.multiline; 547 else 548 r.attributes &= ~RegExp.REA.multiline; 549 550 if(r.attributes & RegExp.REA.global && rettype != EXEC_INDEX) 551 lasti = cast(int)dr.lastIndex.toInteger(cc); 552 else 553 lasti = 0; 554 555 if(r.test(s, lasti)) 556 { // Successful match 557 Value* lastv; 558 uint nmatches; 559 560 if(r.attributes & RegExp.REA.global && rettype != EXEC_INDEX) 561 { 562 dr.lastIndex.putVnumber(r.pmatch[0].rm_eo); 563 } 564 565 dc.input.putVstring(r.input); 566 567 s = r.input[r.pmatch[0].rm_so .. r.pmatch[0].rm_eo]; 568 dc.lastMatch.putVstring(s); 569 570 s = r.input[0 .. r.pmatch[0].rm_so]; 571 dc.leftContext.putVstring(s); 572 573 s = r.input[r.pmatch[0].rm_eo .. $]; 574 dc.rightContext.putVstring(s); 575 576 dc.index.putVnumber(r.pmatch[0].rm_so); 577 dc.lastIndex.putVnumber(r.pmatch[0].rm_eo); 578 579 // Fill in $1..$9 580 lastv = &vundefined; 581 nmatches = 0; 582 for(i = 1; i <= 9; i++) 583 { 584 if(i <= r.re_nsub) 585 { 586 int n; 587 588 // Use last 9 entries for $1..$9 589 n = i; 590 if(r.re_nsub > 9) 591 n += (r.re_nsub - 9); 592 593 if(r.pmatch[n].rm_so != -1) 594 { 595 s = r.input[r.pmatch[n].rm_so .. r.pmatch[n].rm_eo]; 596 dc.dollar[i].putVstring(s); 597 nmatches = i; 598 } 599 else 600 dc.dollar[i].putVundefined(); 601 lastv = dc.dollar[i]; 602 } 603 else 604 dc.dollar[i].putVundefined(); 605 } 606 // Last substring in $1..$9, or "" if none 607 if(r.re_nsub) 608 Value.copy(dc.lastParen, lastv); 609 else 610 dc.lastParen.putVstring(null); 611 612 switch(rettype) 613 { 614 case EXEC_ARRAY: 615 { 616 Darray a = new Darray(cc); 617 618 a.Put(cc, TEXT_input, r.input, 0); 619 a.Put(cc, TEXT_index, r.pmatch[0].rm_so, 0); 620 a.Put(cc, TEXT_lastIndex, r.pmatch[0].rm_eo, 0); 621 622 a.Put(cc, cast(d_uint32)0, dc.lastMatch, cast(uint)0); 623 624 // [1]..[nparens] 625 for(i = 1; i <= r.re_nsub; i++) 626 { 627 if(i > nmatches) 628 a.Put(cc, i, TEXT_, 0); 629 630 // Reuse values already put into dc.dollar[] 631 else if(r.re_nsub <= 9) 632 a.Put(cc, i, dc.dollar[i], 0); 633 else if(i > r.re_nsub - 9) 634 a.Put(cc, i, dc.dollar[i - (r.re_nsub - 9)], 0); 635 else if(r.pmatch[i].rm_so == -1) 636 { 637 a.Put(cc, i, &vundefined, 0); 638 } 639 else 640 { 641 s = r.input[r.pmatch[i].rm_so .. r.pmatch[i].rm_eo]; 642 a.Put(cc, i, s, 0); 643 } 644 } 645 ret.putVobject(a); 646 break; 647 } 648 case EXEC_STRING: 649 Value.copy(ret, dc.lastMatch); 650 break; 651 652 case EXEC_BOOLEAN: 653 ret.putVboolean(true); // success 654 break; 655 656 case EXEC_INDEX: 657 ret.putVnumber(r.pmatch[0].rm_so); 658 break; 659 660 default: 661 assert(0); 662 } 663 } 664 else // failed to match 665 { 666 //writef("failed\n"); 667 switch(rettype) 668 { 669 case EXEC_ARRAY: 670 //writef("memcpy\n"); 671 ret.putVnull(); // Return null 672 dr.lastIndex.putVnumber(0); 673 break; 674 675 case EXEC_STRING: 676 ret.putVstring(null); 677 dr.lastIndex.putVnumber(0); 678 break; 679 680 case EXEC_BOOLEAN: 681 ret.putVboolean(false); 682 dr.lastIndex.putVnumber(0); 683 break; 684 685 case EXEC_INDEX: 686 ret.putVnumber(-1.0); 687 // Do not set lastIndex 688 break; 689 690 default: 691 assert(0); 692 } 693 } 694 } 695 return null; 696 } 697 698 static Dfunction getConstructor(CallContext* cc) 699 { 700 return cc.tc.Dregexp_constructor; 701 } 702 703 static Dobject getPrototype(CallContext* cc) 704 { 705 return cc.tc.Dregexp_prototype; 706 } 707 708 static void initialize(CallContext* cc) 709 { 710 cc.tc.Dregexp_constructor = new DregexpConstructor(cc); 711 cc.tc.Dregexp_prototype = new DregexpPrototype(cc); 712 713 version(none) 714 { 715 writef("Dregexp_constructor = %x\n", cc.tc.Dregexp_constructor); 716 uint *p; 717 p = cast(uint *)cc.tc.Dregexp_constructor; 718 writef("p = %x\n", p); 719 if(p) 720 writef("*p = %x, %x, %x, %x\n", p[0], p[1], p[2], p[3]); 721 } 722 723 cc.tc.Dregexp_constructor.Put(cc, TEXT_prototype, cc.tc.Dregexp_prototype, DontEnum | DontDelete | ReadOnly); 724 } 725 }