1 /* Digital Mars DMDScript source code.
2  * Copyright (c) 2000-2002 by Chromium Communications
3  * D version Copyright (c) 2004-2010 by Digital Mars
4  * Distributed under the Boost Software License, Version 1.0.
5  * (See accompanying file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
6  * written by Walter Bright
7  * http://www.digitalmars.com
8  *
9  * D2 port by Dmitry Olshansky 
10  *
11  * DMDScript is implemented in the D Programming Language,
12  * http://www.digitalmars.com/d/
13  *
14  * For a C++ implementation of DMDScript, including COM support, see
15  * http://www.digitalmars.com/dscript/cppscript.html
16  */
17 
18 
19 module dmdscript.dregexp;
20 
21 private import undead.regexp;
22 
23 import dmdscript.script;
24 import dmdscript.dobject;
25 import dmdscript.value;
26 import dmdscript.protoerror;
27 import dmdscript.text;
28 import dmdscript.darray;
29 import dmdscript.threadcontext;
30 import dmdscript.dfunction;
31 import dmdscript.property;
32 import dmdscript.errmsgs;
33 import dmdscript.dnative;
34 
35 //alias script.tchar tchar;
36 
37 // Values for Dregexp.exec.rettype
38 enum { EXEC_STRING, EXEC_ARRAY, EXEC_BOOLEAN, EXEC_INDEX };
39 
40 
41 /* ===================== Dregexp_constructor ==================== */
42 
43 class DregexpConstructor : Dfunction
44 {
45     Value* input;
46     Value* multiline;
47     Value* lastMatch;
48     Value* lastParen;
49     Value* leftContext;
50     Value* rightContext;
51     Value*[10] dollar;
52 
53     // Extensions
54     Value* index;
55     Value* lastIndex;
56 
57     this()
58     {
59         super(2, Dfunction_prototype);
60 
61         Value v;
62         v.putVstring(null);
63 
64         Value vb;
65         vb.putVboolean(false);
66 
67         Value vnm1;
68         vnm1.putVnumber(-1);
69 
70         name = "RegExp";
71 
72         // Static properties
73         Put(TEXT_input, &v, DontDelete);
74         Put(TEXT_multiline, &vb, DontDelete);
75         Put(TEXT_lastMatch, &v, ReadOnly | DontDelete);
76         Put(TEXT_lastParen, &v, ReadOnly | DontDelete);
77         Put(TEXT_leftContext, &v, ReadOnly | DontDelete);
78         Put(TEXT_rightContext, &v, ReadOnly | DontDelete);
79         Put(TEXT_dollar1, &v, ReadOnly | DontDelete);
80         Put(TEXT_dollar2, &v, ReadOnly | DontDelete);
81         Put(TEXT_dollar3, &v, ReadOnly | DontDelete);
82         Put(TEXT_dollar4, &v, ReadOnly | DontDelete);
83         Put(TEXT_dollar5, &v, ReadOnly | DontDelete);
84         Put(TEXT_dollar6, &v, ReadOnly | DontDelete);
85         Put(TEXT_dollar7, &v, ReadOnly | DontDelete);
86         Put(TEXT_dollar8, &v, ReadOnly | DontDelete);
87         Put(TEXT_dollar9, &v, ReadOnly | DontDelete);
88 
89         Put(TEXT_index, &vnm1, ReadOnly | DontDelete);
90         Put(TEXT_lastIndex, &vnm1, ReadOnly | DontDelete);
91 
92         input = Get(TEXT_input);
93         multiline = Get(TEXT_multiline);
94         lastMatch = Get(TEXT_lastMatch);
95         lastParen = Get(TEXT_lastParen);
96         leftContext = Get(TEXT_leftContext);
97         rightContext = Get(TEXT_rightContext);
98         dollar[0] = lastMatch;
99         dollar[1] = Get(TEXT_dollar1);
100         dollar[2] = Get(TEXT_dollar2);
101         dollar[3] = Get(TEXT_dollar3);
102         dollar[4] = Get(TEXT_dollar4);
103         dollar[5] = Get(TEXT_dollar5);
104         dollar[6] = Get(TEXT_dollar6);
105         dollar[7] = Get(TEXT_dollar7);
106         dollar[8] = Get(TEXT_dollar8);
107         dollar[9] = Get(TEXT_dollar9);
108 
109         index = Get(TEXT_index);
110         lastIndex = Get(TEXT_lastIndex);
111 
112         // Should lastMatch be an alias for dollar[nparens],
113         // or should it be a separate property?
114         // We implemented it the latter way.
115         // Since both are ReadOnly, I can't see that it makes
116         // any difference.
117     }
118 
119     override void* Construct(CallContext *cc, Value *ret, Value[] arglist)
120     {
121         // ECMA 262 v3 15.10.4.1
122 
123         Value* pattern;
124         Value* flags;
125         d_string P;
126         d_string F;
127         Dregexp r;
128         Dregexp R;
129 
130         //writef("Dregexp_constructor.Construct()\n");
131         ret.putVundefined();
132         pattern = &vundefined;
133         flags = &vundefined;
134         switch(arglist.length)
135         {
136         case 0:
137             break;
138 
139         default:
140             flags = &arglist[1];
141             goto case;
142         case 1:
143             pattern = &arglist[0];
144             break;
145         }
146         R = Dregexp.isRegExp(pattern);
147         if(R)
148         {
149             if(flags.isUndefined())
150             {
151                 P = R.re.pattern;
152                 F = R.re.flags;
153             }
154             else
155             {
156                 ErrInfo errinfo;
157                 return RuntimeError(&errinfo, ERR_TYPE_ERROR,
158                                     "RegExp.prototype.constructor");
159             }
160         }
161         else
162         {
163             P = pattern.isUndefined() ? "" : pattern.toString();
164             F = flags.isUndefined() ? "" : flags.toString();
165         }
166         r = new Dregexp(P, F);
167         if(r.re.errors)
168         {
169             Dobject o;
170             ErrInfo errinfo;
171 
172             version(none)
173             {
174                 writef("P = '%s'\nF = '%s'\n", d_string_ptr(P), d_string_ptr(F));
175                 for(int i = 0; i < d_string_len(P); i++)
176                     writef("x%02x\n", d_string_ptr(P)[i]);
177             }
178             errinfo.message = errmsgtbl[ERR_REGEXP_COMPILE];
179             o = new syntaxerror.D0(&errinfo);
180             Value* v = new Value;
181             v.putVobject(o);
182             return v;
183         }
184         else
185         {
186             ret.putVobject(r);
187             return null;
188         }
189     }
190 
191     override void* Call(CallContext *cc, Dobject othis, Value* ret, Value[] arglist)
192     {
193         // ECMA 262 v3 15.10.3.1
194         if(arglist.length >= 1)
195         {
196             Value* pattern;
197             Dobject o;
198 
199             pattern = &arglist[0];
200             if(!pattern.isPrimitive())
201             {
202                 o = pattern.object;
203                 if(o.isDregexp() &&
204                    (arglist.length == 1 || arglist[1].isUndefined())
205                    )
206                 {
207                     ret.putVobject(o);
208                     return null;
209                 }
210             }
211         }
212         return Construct(cc, ret, arglist);
213     }
214 
215 
216     override Value* Get(d_string PropertyName)
217     {
218         return Dfunction.Get(perlAlias(PropertyName));
219     }
220 
221     override Value* Put(d_string PropertyName, Value* value, uint attributes)
222     {
223         return Dfunction.Put(perlAlias(PropertyName), value, attributes);
224     }
225 
226     override Value* Put(d_string PropertyName, Dobject o, uint attributes)
227     {
228         return Dfunction.Put(perlAlias(PropertyName), o, attributes);
229     }
230 
231     override Value* Put(d_string PropertyName, d_number n, uint attributes)
232     {
233         return Dfunction.Put(perlAlias(PropertyName), n, attributes);
234     }
235 
236     override int CanPut(d_string PropertyName)
237     {
238         return Dfunction.CanPut(perlAlias(PropertyName));
239     }
240 
241     override int HasProperty(d_string PropertyName)
242     {
243         return Dfunction.HasProperty(perlAlias(PropertyName));
244     }
245 
246     override int Delete(d_string PropertyName)
247     {
248         return Dfunction.Delete(perlAlias(PropertyName));
249     }
250 
251     // Translate Perl property names to script property names
252     static d_string perlAlias(d_string s)
253     {
254         import std.algorithm.searching : countUntil;
255 
256         d_string t;
257 
258         static immutable tchar[] from = "_*&+`'";
259         static enum d_string[] to =
260         [
261             TEXT_input,
262             TEXT_multiline,
263             TEXT_lastMatch,
264             TEXT_lastParen,
265             TEXT_leftContext,
266             TEXT_rightContext,
267         ];
268 
269         t = s;
270         if(s.length == 2 && s[0] == '$')
271         {
272             ptrdiff_t i;
273 
274             i = countUntil(from, s[1]);
275             if(i >= 0)
276                 t = to[i];
277         }
278         return t;
279     }
280 }
281 
282 
283 /* ===================== Dregexp_prototype_toString =============== */
284 
285 void* Dregexp_prototype_toString(Dobject pthis, CallContext *cc, Dobject othis, Value *ret, Value[] arglist)
286 {
287     // othis must be a RegExp
288     Dregexp r;
289 
290     if(!othis.isDregexp())
291     {
292         ret.putVundefined();
293         ErrInfo errinfo;
294         return Dobject.RuntimeError(&errinfo, ERR_NOT_TRANSFERRABLE,
295                                     "RegExp.prototype.toString()");
296     }
297     else
298     {
299         d_string s;
300 
301         r = cast(Dregexp)(othis);
302         s = "/";
303         s ~= r.re.pattern;
304         s ~= "/";
305         s ~= r.re.flags;
306         ret.putVstring(s);
307     }
308     return null;
309 }
310 
311 /* ===================== Dregexp_prototype_test =============== */
312 
313 void* Dregexp_prototype_test(Dobject pthis, CallContext *cc, Dobject othis, Value *ret, Value[] arglist)
314 {
315     // ECMA v3 15.10.6.3 says this is equivalent to:
316     //	RegExp.prototype.exec(string) != null
317     return Dregexp.exec(othis, ret, arglist, EXEC_BOOLEAN);
318 }
319 
320 /* ===================== Dregexp_prototype_exec ============= */
321 
322 void* Dregexp_prototype_exec(Dobject pthis, CallContext *cc, Dobject othis, Value *ret, Value[] arglist)
323 {
324     return Dregexp.exec(othis, ret, arglist, EXEC_ARRAY);
325 }
326 
327 
328 /* ===================== Dregexp_prototype_compile ============= */
329 
330 void* Dregexp_prototype_compile(Dobject pthis, CallContext *cc, Dobject othis, Value *ret, Value[] arglist)
331 {
332     // RegExp.prototype.compile(pattern, attributes)
333 
334     // othis must be a RegExp
335     if(!othis.isClass(TEXT_RegExp))
336     {
337         ErrInfo errinfo;
338         ret.putVundefined();
339         return Dobject.RuntimeError(&errinfo, ERR_NOT_TRANSFERRABLE,
340                                     "RegExp.prototype.compile()");
341     }
342     else
343     {
344         d_string pattern;
345         d_string attributes;
346         Dregexp dr;
347         RegExp r;
348 
349         dr = cast(Dregexp)othis;
350         switch(arglist.length)
351         {
352         case 0:
353             break;
354 
355         default:
356             attributes = arglist[1].toString();
357             goto case;
358         case 1:
359             pattern = arglist[0].toString();
360             break;
361         }
362 
363         r = dr.re;
364         try
365         {
366             r.compile(pattern, attributes);
367         }
368         catch(RegExpException e)
369         {
370             // Affect source, global and ignoreCase properties
371             dr.source.putVstring(r.pattern);
372             dr.global.putVboolean((r.attributes & RegExp.REA.global) != 0);
373             dr.ignoreCase.putVboolean((r.attributes & RegExp.REA.ignoreCase) != 0);
374         }
375         //writef("r.attributes = x%x\n", r.attributes);
376     }
377     // Documentation says nothing about a return value,
378     // so let's use "undefined"
379     ret.putVundefined();
380     return null;
381 }
382 
383 /* ===================== Dregexp_prototype ==================== */
384 
385 class DregexpPrototype : Dregexp
386 {
387     this()
388     {
389         super(Dobject_prototype);
390         classname = TEXT_Object;
391         uint attributes = ReadOnly | DontDelete | DontEnum;
392         Dobject f = Dfunction_prototype;
393 
394         Put(TEXT_constructor, Dregexp_constructor, attributes);
395 
396         static enum NativeFunctionData[] nfd =
397         [
398             { TEXT_toString, &Dregexp_prototype_toString, 0 },
399             { TEXT_compile, &Dregexp_prototype_compile, 2 },
400             { TEXT_exec, &Dregexp_prototype_exec, 1 },
401             { TEXT_test, &Dregexp_prototype_test, 1 },
402         ];
403 
404         DnativeFunction.initialize(this, nfd, attributes);
405     }
406 }
407 
408 
409 /* ===================== Dregexp ==================== */
410 
411 
412 class Dregexp : Dobject
413 {
414     Value *global;
415     Value *ignoreCase;
416     Value *multiline;
417     Value *lastIndex;
418     Value *source;
419 
420     RegExp re;
421 
422     this(d_string pattern, d_string attributes)
423     {
424         super(getPrototype());
425 
426         Value v;
427         v.putVstring(null);
428 
429         Value vb;
430         vb.putVboolean(false);
431 
432         classname = TEXT_RegExp;
433 
434         //writef("Dregexp.Dregexp(pattern = '%ls', attributes = '%ls')\n", d_string_ptr(pattern), d_string_ptr(attributes));
435         Put(TEXT_source, &v, ReadOnly | DontDelete | DontEnum);
436         Put(TEXT_global, &vb, ReadOnly | DontDelete | DontEnum);
437         Put(TEXT_ignoreCase, &vb, ReadOnly | DontDelete | DontEnum);
438         Put(TEXT_multiline, &vb, ReadOnly | DontDelete | DontEnum);
439         Put(TEXT_lastIndex, 0.0, DontDelete | DontEnum);
440 
441         source = Get(TEXT_source);
442         global = Get(TEXT_global);
443         ignoreCase = Get(TEXT_ignoreCase);
444         multiline = Get(TEXT_multiline);
445         lastIndex = Get(TEXT_lastIndex);
446 
447         re = new RegExp(pattern, attributes);
448         if(re.errors == 0)
449         {
450             source.putVstring(pattern);
451             //writef("source = '%s'\n", source.x.string.toDchars());
452             global.putVboolean((re.attributes & RegExp.REA.global) != 0);
453             ignoreCase.putVboolean((re.attributes & RegExp.REA.ignoreCase) != 0);
454             multiline.putVboolean((re.attributes & RegExp.REA.multiline) != 0);
455         }
456         else
457         {
458             // have caller throw SyntaxError
459         }
460     }
461 
462     this(Dobject prototype)
463     {
464         super(prototype);
465 
466         Value v;
467         v.putVstring(null);
468 
469         Value vb;
470         vb.putVboolean(false);
471 
472         classname = TEXT_RegExp;
473 
474         Put(TEXT_source, &v, ReadOnly | DontDelete | DontEnum);
475         Put(TEXT_global, &vb, ReadOnly | DontDelete | DontEnum);
476         Put(TEXT_ignoreCase, &vb, ReadOnly | DontDelete | DontEnum);
477         Put(TEXT_multiline, &vb, ReadOnly | DontDelete | DontEnum);
478         Put(TEXT_lastIndex, 0.0, DontDelete | DontEnum);
479 
480         source = Get(TEXT_source);
481         global = Get(TEXT_global);
482         ignoreCase = Get(TEXT_ignoreCase);
483         multiline = Get(TEXT_multiline);
484         lastIndex = Get(TEXT_lastIndex);
485 
486         re = new RegExp(null, null);
487     }
488 
489     override void* Call(CallContext *cc, Dobject othis, Value* ret, Value[] arglist)
490     {
491         // This is the same as calling RegExp.prototype.exec(str)
492         Value* v;
493 
494         v = Get(TEXT_exec);
495         return v.toObject().Call(cc, this, ret, arglist);
496     }
497 
498     static Dregexp isRegExp(Value* v)
499     {
500         Dregexp r;
501 
502         if(!v.isPrimitive() && v.toObject().isDregexp())
503         {
504             r = cast(Dregexp)(v.toObject());
505         }
506         return r;
507     }
508 
509     static void* exec(Dobject othis, Value* ret, Value[] arglist, int rettype)
510     {
511         //writef("Dregexp.exec(arglist.length = %d, rettype = %d)\n", arglist.length, rettype);
512 
513         // othis must be a RegExp
514         if(!othis.isClass(TEXT_RegExp))
515         {
516             ret.putVundefined();
517             ErrInfo errinfo;
518             return RuntimeError(&errinfo, ERR_NOT_TRANSFERRABLE,
519                                 "RegExp.prototype.exec()");
520         }
521         else
522         {
523             d_string s;
524             Dregexp dr;
525             RegExp r;
526             DregexpConstructor dc;
527             uint i;
528             d_int32 lasti;
529 
530             if(arglist.length)
531                 s = arglist[0].toString();
532             else
533             {
534                 Dfunction df;
535 
536                 df = Dregexp.getConstructor();
537                 s = (cast(DregexpConstructor)df).input..string;
538             }
539 
540             dr = cast(Dregexp)othis;
541             r = dr.re;
542             dc = cast(DregexpConstructor)Dregexp.getConstructor();
543 
544             // Decide if we are multiline
545             if(dr.multiline.dbool)
546                 r.attributes |= RegExp.REA.multiline;
547             else
548                 r.attributes &= ~RegExp.REA.multiline;
549 
550             if(r.attributes & RegExp.REA.global && rettype != EXEC_INDEX)
551                 lasti = cast(int)dr.lastIndex.toInteger();
552             else
553                 lasti = 0;
554 
555             if(r.test(s, lasti))
556             {   // Successful match
557                 Value* lastv;
558                 uint nmatches;
559 
560                 if(r.attributes & RegExp.REA.global && rettype != EXEC_INDEX)
561                 {
562                     dr.lastIndex.putVnumber(r.pmatch[0].rm_eo);
563                 }
564 
565                 dc.input.putVstring(r.input);
566 
567                 s = r.input[r.pmatch[0].rm_so .. r.pmatch[0].rm_eo];
568                 dc.lastMatch.putVstring(s);
569 
570                 s = r.input[0 .. r.pmatch[0].rm_so];
571                 dc.leftContext.putVstring(s);
572 
573                 s = r.input[r.pmatch[0].rm_eo .. $];
574                 dc.rightContext.putVstring(s);
575 
576                 dc.index.putVnumber(r.pmatch[0].rm_so);
577                 dc.lastIndex.putVnumber(r.pmatch[0].rm_eo);
578 
579                 // Fill in $1..$9
580                 lastv = &vundefined;
581                 nmatches = 0;
582                 for(i = 1; i <= 9; i++)
583                 {
584                     if(i <= r.re_nsub)
585                     {
586                         int n;
587 
588                         // Use last 9 entries for $1..$9
589                         n = i;
590                         if(r.re_nsub > 9)
591                             n += (r.re_nsub - 9);
592 
593                         if(r.pmatch[n].rm_so != -1)
594                         {
595                             s = r.input[r.pmatch[n].rm_so .. r.pmatch[n].rm_eo];
596                             dc.dollar[i].putVstring(s);
597                             nmatches = i;
598                         }
599                         else
600                             dc.dollar[i].putVundefined();
601                         lastv = dc.dollar[i];
602                     }
603                     else
604                         dc.dollar[i].putVundefined();
605                 }
606                 // Last substring in $1..$9, or "" if none
607                 if(r.re_nsub)
608                     Value.copy(dc.lastParen, lastv);
609                 else
610                     dc.lastParen.putVstring(null);
611 
612                 switch(rettype)
613                 {
614                 case EXEC_ARRAY:
615                 {
616                     Darray a = new Darray();
617 
618                     a.Put(TEXT_input, r.input, 0);
619                     a.Put(TEXT_index, r.pmatch[0].rm_so, 0);
620                     a.Put(TEXT_lastIndex, r.pmatch[0].rm_eo, 0);
621 
622                     a.Put(cast(d_uint32)0, dc.lastMatch, cast(uint)0);
623 
624                     // [1]..[nparens]
625                     for(i = 1; i <= r.re_nsub; i++)
626                     {
627                         if(i > nmatches)
628                             a.Put(i, TEXT_, 0);
629 
630                         // Reuse values already put into dc.dollar[]
631                         else if(r.re_nsub <= 9)
632                             a.Put(i, dc.dollar[i], 0);
633                         else if(i > r.re_nsub - 9)
634                             a.Put(i, dc.dollar[i - (r.re_nsub - 9)], 0);
635                         else if(r.pmatch[i].rm_so == -1)
636                         {
637                             a.Put(i, &vundefined, 0);
638                         }
639                         else
640                         {
641                             s = r.input[r.pmatch[i].rm_so .. r.pmatch[i].rm_eo];
642                             a.Put(i, s, 0);
643                         }
644                     }
645                     ret.putVobject(a);
646                     break;
647                 }
648                 case EXEC_STRING:
649                     Value.copy(ret, dc.lastMatch);
650                     break;
651 
652                 case EXEC_BOOLEAN:
653                     ret.putVboolean(true);      // success
654                     break;
655 
656                 case EXEC_INDEX:
657                     ret.putVnumber(r.pmatch[0].rm_so);
658                     break;
659 
660                 default:
661                     assert(0);
662                 }
663             }
664             else        // failed to match
665             {
666                 //writef("failed\n");
667                 switch(rettype)
668                 {
669                 case EXEC_ARRAY:
670                     //writef("memcpy\n");
671                     ret.putVnull();         // Return null
672                     dr.lastIndex.putVnumber(0);
673                     break;
674 
675                 case EXEC_STRING:
676                     ret.putVstring(null);
677                     dr.lastIndex.putVnumber(0);
678                     break;
679 
680                 case EXEC_BOOLEAN:
681                     ret.putVboolean(false);
682                     dr.lastIndex.putVnumber(0);
683                     break;
684 
685                 case EXEC_INDEX:
686                     ret.putVnumber(-1.0);
687                     // Do not set lastIndex
688                     break;
689 
690                 default:
691                     assert(0);
692                 }
693             }
694         }
695         return null;
696     }
697 
698     static Dfunction getConstructor()
699     {
700         return Dregexp_constructor;
701     }
702 
703     static Dobject getPrototype()
704     {
705         return Dregexp_prototype;
706     }
707 
708     static void initialize()
709     {
710         Dregexp_constructor = new DregexpConstructor();
711         Dregexp_prototype = new DregexpPrototype();
712 
713         version(none)
714         {
715             writef("Dregexp_constructor = %x\n", Dregexp_constructor);
716             uint *p;
717             p = cast(uint *)Dregexp_constructor;
718             writef("p = %x\n", p);
719             if(p)
720                 writef("*p = %x, %x, %x, %x\n", p[0], p[1], p[2], p[3]);
721         }
722 
723         Dregexp_constructor.Put(TEXT_prototype, Dregexp_prototype, DontEnum | DontDelete | ReadOnly);
724     }
725 }