1 /* Digital Mars DMDScript source code. 2 * Copyright (c) 2000-2002 by Chromium Communications 3 * D version Copyright (c) 2004-2010 by Digital Mars 4 * Distributed under the Boost Software License, Version 1.0. 5 * (See accompanying file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) 6 * written by Walter Bright 7 * http://www.digitalmars.com 8 * 9 * D2 port by Dmitry Olshansky 10 * 11 * DMDScript is implemented in the D Programming Language, 12 * http://www.digitalmars.com/d/ 13 * 14 * For a C++ implementation of DMDScript, including COM support, see 15 * http://www.digitalmars.com/dscript/cppscript.html 16 */ 17 18 19 module dmdscript.dregexp; 20 21 private import undead.regexp; 22 23 import dmdscript.script; 24 import dmdscript.dobject; 25 import dmdscript.value; 26 import dmdscript.protoerror; 27 import dmdscript.text; 28 import dmdscript.darray; 29 import dmdscript.threadcontext; 30 import dmdscript.dfunction; 31 import dmdscript.property; 32 import dmdscript.errmsgs; 33 import dmdscript.dnative; 34 35 //alias script.tchar tchar; 36 37 // Values for Dregexp.exec.rettype 38 enum { EXEC_STRING, EXEC_ARRAY, EXEC_BOOLEAN, EXEC_INDEX }; 39 40 41 /* ===================== Dregexp_constructor ==================== */ 42 43 class DregexpConstructor : Dfunction 44 { 45 Value* input; 46 Value* multiline; 47 Value* lastMatch; 48 Value* lastParen; 49 Value* leftContext; 50 Value* rightContext; 51 Value* dollar[10]; 52 53 // Extensions 54 Value* index; 55 Value* lastIndex; 56 57 this() 58 { 59 super(2, Dfunction_prototype); 60 61 Value v; 62 v.putVstring(null); 63 64 Value vb; 65 vb.putVboolean(false); 66 67 Value vnm1; 68 vnm1.putVnumber(-1); 69 70 name = "RegExp"; 71 72 // Static properties 73 Put(TEXT_input, &v, DontDelete); 74 Put(TEXT_multiline, &vb, DontDelete); 75 Put(TEXT_lastMatch, &v, ReadOnly | DontDelete); 76 Put(TEXT_lastParen, &v, ReadOnly | DontDelete); 77 Put(TEXT_leftContext, &v, ReadOnly | DontDelete); 78 Put(TEXT_rightContext, &v, ReadOnly | DontDelete); 79 Put(TEXT_dollar1, &v, ReadOnly | DontDelete); 80 Put(TEXT_dollar2, &v, ReadOnly | DontDelete); 81 Put(TEXT_dollar3, &v, ReadOnly | DontDelete); 82 Put(TEXT_dollar4, &v, ReadOnly | DontDelete); 83 Put(TEXT_dollar5, &v, ReadOnly | DontDelete); 84 Put(TEXT_dollar6, &v, ReadOnly | DontDelete); 85 Put(TEXT_dollar7, &v, ReadOnly | DontDelete); 86 Put(TEXT_dollar8, &v, ReadOnly | DontDelete); 87 Put(TEXT_dollar9, &v, ReadOnly | DontDelete); 88 89 Put(TEXT_index, &vnm1, ReadOnly | DontDelete); 90 Put(TEXT_lastIndex, &vnm1, ReadOnly | DontDelete); 91 92 input = Get(TEXT_input); 93 multiline = Get(TEXT_multiline); 94 lastMatch = Get(TEXT_lastMatch); 95 lastParen = Get(TEXT_lastParen); 96 leftContext = Get(TEXT_leftContext); 97 rightContext = Get(TEXT_rightContext); 98 dollar[0] = lastMatch; 99 dollar[1] = Get(TEXT_dollar1); 100 dollar[2] = Get(TEXT_dollar2); 101 dollar[3] = Get(TEXT_dollar3); 102 dollar[4] = Get(TEXT_dollar4); 103 dollar[5] = Get(TEXT_dollar5); 104 dollar[6] = Get(TEXT_dollar6); 105 dollar[7] = Get(TEXT_dollar7); 106 dollar[8] = Get(TEXT_dollar8); 107 dollar[9] = Get(TEXT_dollar9); 108 109 index = Get(TEXT_index); 110 lastIndex = Get(TEXT_lastIndex); 111 112 // Should lastMatch be an alias for dollar[nparens], 113 // or should it be a separate property? 114 // We implemented it the latter way. 115 // Since both are ReadOnly, I can't see that it makes 116 // any difference. 117 } 118 119 override void* Construct(CallContext *cc, Value *ret, Value[] arglist) 120 { 121 // ECMA 262 v3 15.10.4.1 122 123 Value* pattern; 124 Value* flags; 125 d_string P; 126 d_string F; 127 Dregexp r; 128 Dregexp R; 129 130 //writef("Dregexp_constructor.Construct()\n"); 131 ret.putVundefined(); 132 pattern = &vundefined; 133 flags = &vundefined; 134 switch(arglist.length) 135 { 136 case 0: 137 break; 138 139 default: 140 flags = &arglist[1]; 141 goto case; 142 case 1: 143 pattern = &arglist[0]; 144 break; 145 } 146 R = Dregexp.isRegExp(pattern); 147 if(R) 148 { 149 if(flags.isUndefined()) 150 { 151 P = R.re.pattern; 152 F = R.re.flags; 153 } 154 else 155 { 156 ErrInfo errinfo; 157 return RuntimeError(&errinfo, ERR_TYPE_ERROR, 158 "RegExp.prototype.constructor"); 159 } 160 } 161 else 162 { 163 P = pattern.isUndefined() ? "" : pattern.toString(); 164 F = flags.isUndefined() ? "" : flags.toString(); 165 } 166 r = new Dregexp(P, F); 167 if(r.re.errors) 168 { 169 Dobject o; 170 ErrInfo errinfo; 171 172 version(none) 173 { 174 writef("P = '%s'\nF = '%s'\n", d_string_ptr(P), d_string_ptr(F)); 175 for(int i = 0; i < d_string_len(P); i++) 176 writef("x%02x\n", d_string_ptr(P)[i]); 177 } 178 errinfo.message = errmsgtbl[ERR_REGEXP_COMPILE]; 179 o = new syntaxerror.D0(&errinfo); 180 Value* v = new Value; 181 v.putVobject(o); 182 return v; 183 } 184 else 185 { 186 ret.putVobject(r); 187 return null; 188 } 189 } 190 191 override void* Call(CallContext *cc, Dobject othis, Value* ret, Value[] arglist) 192 { 193 // ECMA 262 v3 15.10.3.1 194 if(arglist.length >= 1) 195 { 196 Value* pattern; 197 Dobject o; 198 199 pattern = &arglist[0]; 200 if(!pattern.isPrimitive()) 201 { 202 o = pattern.object; 203 if(o.isDregexp() && 204 (arglist.length == 1 || arglist[1].isUndefined()) 205 ) 206 { 207 ret.putVobject(o); 208 return null; 209 } 210 } 211 } 212 return Construct(cc, ret, arglist); 213 } 214 215 216 override Value* Get(d_string PropertyName) const 217 { 218 return Dfunction.Get(perlAlias(PropertyName)); 219 } 220 221 override Value* Put(d_string PropertyName, Value* value, uint attributes) 222 { 223 return Dfunction.Put(perlAlias(PropertyName), value, attributes); 224 } 225 226 override Value* Put(d_string PropertyName, Dobject o, uint attributes) 227 { 228 return Dfunction.Put(perlAlias(PropertyName), o, attributes); 229 } 230 231 override Value* Put(d_string PropertyName, d_number n, uint attributes) 232 { 233 return Dfunction.Put(perlAlias(PropertyName), n, attributes); 234 } 235 236 override int CanPut(d_string PropertyName) 237 { 238 return Dfunction.CanPut(perlAlias(PropertyName)); 239 } 240 241 override int HasProperty(d_string PropertyName) 242 { 243 return Dfunction.HasProperty(perlAlias(PropertyName)); 244 } 245 246 override int Delete(d_string PropertyName) 247 { 248 return Dfunction.Delete(perlAlias(PropertyName)); 249 } 250 251 // Translate Perl property names to script property names 252 static d_string perlAlias(d_string s) 253 { 254 d_string t; 255 256 static tchar[6] from = "_*&+`'"; 257 static enum d_string[] to = 258 [ 259 TEXT_input, 260 TEXT_multiline, 261 TEXT_lastMatch, 262 TEXT_lastParen, 263 TEXT_leftContext, 264 TEXT_rightContext, 265 ]; 266 267 t = s; 268 if(s.length == 2 && s[0] == '$') 269 { 270 ptrdiff_t i; 271 272 i = std..string.indexOf(from[], s[1]); 273 if(i >= 0) 274 t = to[i]; 275 } 276 return t; 277 } 278 } 279 280 281 /* ===================== Dregexp_prototype_toString =============== */ 282 283 void* Dregexp_prototype_toString(Dobject pthis, CallContext *cc, Dobject othis, Value *ret, Value[] arglist) 284 { 285 // othis must be a RegExp 286 Dregexp r; 287 288 if(!othis.isDregexp()) 289 { 290 ret.putVundefined(); 291 ErrInfo errinfo; 292 return Dobject.RuntimeError(&errinfo, ERR_NOT_TRANSFERRABLE, 293 "RegExp.prototype.toString()"); 294 } 295 else 296 { 297 d_string s; 298 299 r = cast(Dregexp)(othis); 300 s = "/"; 301 s ~= r.re.pattern; 302 s ~= "/"; 303 s ~= r.re.flags; 304 ret.putVstring(s); 305 } 306 return null; 307 } 308 309 /* ===================== Dregexp_prototype_test =============== */ 310 311 void* Dregexp_prototype_test(Dobject pthis, CallContext *cc, Dobject othis, Value *ret, Value[] arglist) 312 { 313 // ECMA v3 15.10.6.3 says this is equivalent to: 314 // RegExp.prototype.exec(string) != null 315 return Dregexp.exec(othis, ret, arglist, EXEC_BOOLEAN); 316 } 317 318 /* ===================== Dregexp_prototype_exec ============= */ 319 320 void* Dregexp_prototype_exec(Dobject pthis, CallContext *cc, Dobject othis, Value *ret, Value[] arglist) 321 { 322 return Dregexp.exec(othis, ret, arglist, EXEC_ARRAY); 323 } 324 325 326 /* ===================== Dregexp_prototype_compile ============= */ 327 328 void* Dregexp_prototype_compile(Dobject pthis, CallContext *cc, Dobject othis, Value *ret, Value[] arglist) 329 { 330 // RegExp.prototype.compile(pattern, attributes) 331 332 // othis must be a RegExp 333 if(!othis.isClass(TEXT_RegExp)) 334 { 335 ErrInfo errinfo; 336 ret.putVundefined(); 337 return Dobject.RuntimeError(&errinfo, ERR_NOT_TRANSFERRABLE, 338 "RegExp.prototype.compile()"); 339 } 340 else 341 { 342 d_string pattern; 343 d_string attributes; 344 Dregexp dr; 345 RegExp r; 346 347 dr = cast(Dregexp)othis; 348 switch(arglist.length) 349 { 350 case 0: 351 break; 352 353 default: 354 attributes = arglist[1].toString(); 355 goto case; 356 case 1: 357 pattern = arglist[0].toString(); 358 break; 359 } 360 361 r = dr.re; 362 try 363 { 364 r.compile(pattern, attributes); 365 } 366 catch(RegExpException e) 367 { 368 // Affect source, global and ignoreCase properties 369 dr.source.putVstring(r.pattern); 370 dr.global.putVboolean((r.attributes & RegExp.REA.global) != 0); 371 dr.ignoreCase.putVboolean((r.attributes & RegExp.REA.ignoreCase) != 0); 372 } 373 //writef("r.attributes = x%x\n", r.attributes); 374 } 375 // Documentation says nothing about a return value, 376 // so let's use "undefined" 377 ret.putVundefined(); 378 return null; 379 } 380 381 /* ===================== Dregexp_prototype ==================== */ 382 383 class DregexpPrototype : Dregexp 384 { 385 this() 386 { 387 super(Dobject_prototype); 388 classname = TEXT_Object; 389 uint attributes = ReadOnly | DontDelete | DontEnum; 390 Dobject f = Dfunction_prototype; 391 392 Put(TEXT_constructor, Dregexp_constructor, attributes); 393 394 static enum NativeFunctionData nfd[] = 395 [ 396 { TEXT_toString, &Dregexp_prototype_toString, 0 }, 397 { TEXT_compile, &Dregexp_prototype_compile, 2 }, 398 { TEXT_exec, &Dregexp_prototype_exec, 1 }, 399 { TEXT_test, &Dregexp_prototype_test, 1 }, 400 ]; 401 402 DnativeFunction.initialize(this, nfd, attributes); 403 } 404 } 405 406 407 /* ===================== Dregexp ==================== */ 408 409 410 class Dregexp : Dobject 411 { 412 Value *global; 413 Value *ignoreCase; 414 Value *multiline; 415 Value *lastIndex; 416 Value *source; 417 418 RegExp re; 419 420 this(d_string pattern, d_string attributes) 421 { 422 super(getPrototype()); 423 424 Value v; 425 v.putVstring(null); 426 427 Value vb; 428 vb.putVboolean(false); 429 430 classname = TEXT_RegExp; 431 432 //writef("Dregexp.Dregexp(pattern = '%ls', attributes = '%ls')\n", d_string_ptr(pattern), d_string_ptr(attributes)); 433 Put(TEXT_source, &v, ReadOnly | DontDelete | DontEnum); 434 Put(TEXT_global, &vb, ReadOnly | DontDelete | DontEnum); 435 Put(TEXT_ignoreCase, &vb, ReadOnly | DontDelete | DontEnum); 436 Put(TEXT_multiline, &vb, ReadOnly | DontDelete | DontEnum); 437 Put(TEXT_lastIndex, 0.0, DontDelete | DontEnum); 438 439 source = Get(TEXT_source); 440 global = Get(TEXT_global); 441 ignoreCase = Get(TEXT_ignoreCase); 442 multiline = Get(TEXT_multiline); 443 lastIndex = Get(TEXT_lastIndex); 444 445 re = new RegExp(pattern, attributes); 446 if(re.errors == 0) 447 { 448 source.putVstring(pattern); 449 //writef("source = '%s'\n", source.x.string.toDchars()); 450 global.putVboolean((re.attributes & RegExp.REA.global) != 0); 451 ignoreCase.putVboolean((re.attributes & RegExp.REA.ignoreCase) != 0); 452 multiline.putVboolean((re.attributes & RegExp.REA.multiline) != 0); 453 } 454 else 455 { 456 // have caller throw SyntaxError 457 } 458 } 459 460 this(Dobject prototype) 461 { 462 super(prototype); 463 464 Value v; 465 v.putVstring(null); 466 467 Value vb; 468 vb.putVboolean(false); 469 470 classname = TEXT_RegExp; 471 472 Put(TEXT_source, &v, ReadOnly | DontDelete | DontEnum); 473 Put(TEXT_global, &vb, ReadOnly | DontDelete | DontEnum); 474 Put(TEXT_ignoreCase, &vb, ReadOnly | DontDelete | DontEnum); 475 Put(TEXT_multiline, &vb, ReadOnly | DontDelete | DontEnum); 476 Put(TEXT_lastIndex, 0.0, DontDelete | DontEnum); 477 478 source = Get(TEXT_source); 479 global = Get(TEXT_global); 480 ignoreCase = Get(TEXT_ignoreCase); 481 multiline = Get(TEXT_multiline); 482 lastIndex = Get(TEXT_lastIndex); 483 484 re = new RegExp(null, null); 485 } 486 487 override void* Call(CallContext *cc, Dobject othis, Value* ret, Value[] arglist) 488 { 489 // This is the same as calling RegExp.prototype.exec(str) 490 Value* v; 491 492 v = Get(TEXT_exec); 493 return v.toObject().Call(cc, this, ret, arglist); 494 } 495 496 static Dregexp isRegExp(Value* v) 497 { 498 Dregexp r; 499 500 if(!v.isPrimitive() && v.toObject().isDregexp()) 501 { 502 r = cast(Dregexp)(v.toObject()); 503 } 504 return r; 505 } 506 507 static void* exec(Dobject othis, Value* ret, Value[] arglist, int rettype) 508 { 509 //writef("Dregexp.exec(arglist.length = %d, rettype = %d)\n", arglist.length, rettype); 510 511 // othis must be a RegExp 512 if(!othis.isClass(TEXT_RegExp)) 513 { 514 ret.putVundefined(); 515 ErrInfo errinfo; 516 return RuntimeError(&errinfo, ERR_NOT_TRANSFERRABLE, 517 "RegExp.prototype.exec()"); 518 } 519 else 520 { 521 d_string s; 522 Dregexp dr; 523 RegExp r; 524 DregexpConstructor dc; 525 uint i; 526 d_int32 lasti; 527 528 if(arglist.length) 529 s = arglist[0].toString(); 530 else 531 { 532 Dfunction df; 533 534 df = Dregexp.getConstructor(); 535 s = (cast(DregexpConstructor)df).input..string; 536 } 537 538 dr = cast(Dregexp)othis; 539 r = dr.re; 540 dc = cast(DregexpConstructor)Dregexp.getConstructor(); 541 542 // Decide if we are multiline 543 if(dr.multiline.dbool) 544 r.attributes |= RegExp.REA.multiline; 545 else 546 r.attributes &= ~RegExp.REA.multiline; 547 548 if(r.attributes & RegExp.REA.global && rettype != EXEC_INDEX) 549 lasti = cast(int)dr.lastIndex.toInteger(); 550 else 551 lasti = 0; 552 553 if(r.test(s, lasti)) 554 { // Successful match 555 Value* lastv; 556 uint nmatches; 557 558 if(r.attributes & RegExp.REA.global && rettype != EXEC_INDEX) 559 { 560 dr.lastIndex.putVnumber(r.pmatch[0].rm_eo); 561 } 562 563 dc.input.putVstring(r.input); 564 565 s = r.input[r.pmatch[0].rm_so .. r.pmatch[0].rm_eo]; 566 dc.lastMatch.putVstring(s); 567 568 s = r.input[0 .. r.pmatch[0].rm_so]; 569 dc.leftContext.putVstring(s); 570 571 s = r.input[r.pmatch[0].rm_eo .. $]; 572 dc.rightContext.putVstring(s); 573 574 dc.index.putVnumber(r.pmatch[0].rm_so); 575 dc.lastIndex.putVnumber(r.pmatch[0].rm_eo); 576 577 // Fill in $1..$9 578 lastv = &vundefined; 579 nmatches = 0; 580 for(i = 1; i <= 9; i++) 581 { 582 if(i <= r.re_nsub) 583 { 584 int n; 585 586 // Use last 9 entries for $1..$9 587 n = i; 588 if(r.re_nsub > 9) 589 n += (r.re_nsub - 9); 590 591 if(r.pmatch[n].rm_so != -1) 592 { 593 s = r.input[r.pmatch[n].rm_so .. r.pmatch[n].rm_eo]; 594 dc.dollar[i].putVstring(s); 595 nmatches = i; 596 } 597 else 598 dc.dollar[i].putVundefined(); 599 lastv = dc.dollar[i]; 600 } 601 else 602 dc.dollar[i].putVundefined(); 603 } 604 // Last substring in $1..$9, or "" if none 605 if(r.re_nsub) 606 Value.copy(dc.lastParen, lastv); 607 else 608 dc.lastParen.putVstring(null); 609 610 switch(rettype) 611 { 612 case EXEC_ARRAY: 613 { 614 Darray a = new Darray(); 615 616 a.Put(TEXT_input, r.input, 0); 617 a.Put(TEXT_index, r.pmatch[0].rm_so, 0); 618 a.Put(TEXT_lastIndex, r.pmatch[0].rm_eo, 0); 619 620 a.Put(cast(d_uint32)0, dc.lastMatch, cast(uint)0); 621 622 // [1]..[nparens] 623 for(i = 1; i <= r.re_nsub; i++) 624 { 625 if(i > nmatches) 626 a.Put(i, TEXT_, 0); 627 628 // Reuse values already put into dc.dollar[] 629 else if(r.re_nsub <= 9) 630 a.Put(i, dc.dollar[i], 0); 631 else if(i > r.re_nsub - 9) 632 a.Put(i, dc.dollar[i - (r.re_nsub - 9)], 0); 633 else if(r.pmatch[i].rm_so == -1) 634 { 635 a.Put(i, &vundefined, 0); 636 } 637 else 638 { 639 s = r.input[r.pmatch[i].rm_so .. r.pmatch[i].rm_eo]; 640 a.Put(i, s, 0); 641 } 642 } 643 ret.putVobject(a); 644 break; 645 } 646 case EXEC_STRING: 647 Value.copy(ret, dc.lastMatch); 648 break; 649 650 case EXEC_BOOLEAN: 651 ret.putVboolean(true); // success 652 break; 653 654 case EXEC_INDEX: 655 ret.putVnumber(r.pmatch[0].rm_so); 656 break; 657 658 default: 659 assert(0); 660 } 661 } 662 else // failed to match 663 { 664 //writef("failed\n"); 665 switch(rettype) 666 { 667 case EXEC_ARRAY: 668 //writef("memcpy\n"); 669 ret.putVnull(); // Return null 670 dr.lastIndex.putVnumber(0); 671 break; 672 673 case EXEC_STRING: 674 ret.putVstring(null); 675 dr.lastIndex.putVnumber(0); 676 break; 677 678 case EXEC_BOOLEAN: 679 ret.putVboolean(false); 680 dr.lastIndex.putVnumber(0); 681 break; 682 683 case EXEC_INDEX: 684 ret.putVnumber(-1.0); 685 // Do not set lastIndex 686 break; 687 688 default: 689 assert(0); 690 } 691 } 692 } 693 return null; 694 } 695 696 static Dfunction getConstructor() 697 { 698 return Dregexp_constructor; 699 } 700 701 static Dobject getPrototype() 702 { 703 return Dregexp_prototype; 704 } 705 706 static void initialize() 707 { 708 Dregexp_constructor = new DregexpConstructor(); 709 Dregexp_prototype = new DregexpPrototype(); 710 711 version(none) 712 { 713 writef("Dregexp_constructor = %x\n", Dregexp_constructor); 714 uint *p; 715 p = cast(uint *)Dregexp_constructor; 716 writef("p = %x\n", p); 717 if(p) 718 writef("*p = %x, %x, %x, %x\n", p[0], p[1], p[2], p[3]); 719 } 720 721 Dregexp_constructor.Put(TEXT_prototype, Dregexp_prototype, DontEnum | DontDelete | ReadOnly); 722 } 723 }