1 /* Digital Mars DMDScript source code.
2  * Copyright (c) 2000-2002 by Chromium Communications
3  * D version Copyright (c) 2004-2010 by Digital Mars
4  * Distributed under the Boost Software License, Version 1.0.
5  * (See accompanying file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
6  * written by Walter Bright
7  * http://www.digitalmars.com
8  *
9  * D2 port by Dmitry Olshansky 
10  *
11  * DMDScript is implemented in the D Programming Language,
12  * http://www.digitalmars.com/d/
13  *
14  * For a C++ implementation of DMDScript, including COM support, see
15  * http://www.digitalmars.com/dscript/cppscript.html
16  */
17 
18 
19 module dmdscript.dregexp;
20 
21 private import undead.regexp;
22 
23 import dmdscript.script;
24 import dmdscript.dobject;
25 import dmdscript.value;
26 import dmdscript.protoerror;
27 import dmdscript.text;
28 import dmdscript.darray;
29 import dmdscript.threadcontext;
30 import dmdscript.dfunction;
31 import dmdscript.property;
32 import dmdscript.errmsgs;
33 import dmdscript.dnative;
34 
35 //alias script.tchar tchar;
36 
37 // Values for Dregexp.exec.rettype
38 enum { EXEC_STRING, EXEC_ARRAY, EXEC_BOOLEAN, EXEC_INDEX };
39 
40 
41 /* ===================== Dregexp_constructor ==================== */
42 
43 class DregexpConstructor : Dfunction
44 {
45     Value* input;
46     Value* multiline;
47     Value* lastMatch;
48     Value* lastParen;
49     Value* leftContext;
50     Value* rightContext;
51     Value* dollar[10];
52 
53     // Extensions
54     Value* index;
55     Value* lastIndex;
56 
57     this()
58     {
59         super(2, Dfunction_prototype);
60 
61         Value v;
62         v.putVstring(null);
63 
64         Value vb;
65         vb.putVboolean(false);
66 
67         Value vnm1;
68         vnm1.putVnumber(-1);
69 
70         name = "RegExp";
71 
72         // Static properties
73         Put(TEXT_input, &v, DontDelete);
74         Put(TEXT_multiline, &vb, DontDelete);
75         Put(TEXT_lastMatch, &v, ReadOnly | DontDelete);
76         Put(TEXT_lastParen, &v, ReadOnly | DontDelete);
77         Put(TEXT_leftContext, &v, ReadOnly | DontDelete);
78         Put(TEXT_rightContext, &v, ReadOnly | DontDelete);
79         Put(TEXT_dollar1, &v, ReadOnly | DontDelete);
80         Put(TEXT_dollar2, &v, ReadOnly | DontDelete);
81         Put(TEXT_dollar3, &v, ReadOnly | DontDelete);
82         Put(TEXT_dollar4, &v, ReadOnly | DontDelete);
83         Put(TEXT_dollar5, &v, ReadOnly | DontDelete);
84         Put(TEXT_dollar6, &v, ReadOnly | DontDelete);
85         Put(TEXT_dollar7, &v, ReadOnly | DontDelete);
86         Put(TEXT_dollar8, &v, ReadOnly | DontDelete);
87         Put(TEXT_dollar9, &v, ReadOnly | DontDelete);
88 
89         Put(TEXT_index, &vnm1, ReadOnly | DontDelete);
90         Put(TEXT_lastIndex, &vnm1, ReadOnly | DontDelete);
91 
92         input = Get(TEXT_input);
93         multiline = Get(TEXT_multiline);
94         lastMatch = Get(TEXT_lastMatch);
95         lastParen = Get(TEXT_lastParen);
96         leftContext = Get(TEXT_leftContext);
97         rightContext = Get(TEXT_rightContext);
98         dollar[0] = lastMatch;
99         dollar[1] = Get(TEXT_dollar1);
100         dollar[2] = Get(TEXT_dollar2);
101         dollar[3] = Get(TEXT_dollar3);
102         dollar[4] = Get(TEXT_dollar4);
103         dollar[5] = Get(TEXT_dollar5);
104         dollar[6] = Get(TEXT_dollar6);
105         dollar[7] = Get(TEXT_dollar7);
106         dollar[8] = Get(TEXT_dollar8);
107         dollar[9] = Get(TEXT_dollar9);
108 
109         index = Get(TEXT_index);
110         lastIndex = Get(TEXT_lastIndex);
111 
112         // Should lastMatch be an alias for dollar[nparens],
113         // or should it be a separate property?
114         // We implemented it the latter way.
115         // Since both are ReadOnly, I can't see that it makes
116         // any difference.
117     }
118 
119     override void* Construct(CallContext *cc, Value *ret, Value[] arglist)
120     {
121         // ECMA 262 v3 15.10.4.1
122 
123         Value* pattern;
124         Value* flags;
125         d_string P;
126         d_string F;
127         Dregexp r;
128         Dregexp R;
129 
130         //writef("Dregexp_constructor.Construct()\n");
131         ret.putVundefined();
132         pattern = &vundefined;
133         flags = &vundefined;
134         switch(arglist.length)
135         {
136         case 0:
137             break;
138 
139         default:
140             flags = &arglist[1];
141             goto case;
142         case 1:
143             pattern = &arglist[0];
144             break;
145         }
146         R = Dregexp.isRegExp(pattern);
147         if(R)
148         {
149             if(flags.isUndefined())
150             {
151                 P = R.re.pattern;
152                 F = R.re.flags;
153             }
154             else
155             {
156                 ErrInfo errinfo;
157                 return RuntimeError(&errinfo, ERR_TYPE_ERROR,
158                                     "RegExp.prototype.constructor");
159             }
160         }
161         else
162         {
163             P = pattern.isUndefined() ? "" : pattern.toString();
164             F = flags.isUndefined() ? "" : flags.toString();
165         }
166         r = new Dregexp(P, F);
167         if(r.re.errors)
168         {
169             Dobject o;
170             ErrInfo errinfo;
171 
172             version(none)
173             {
174                 writef("P = '%s'\nF = '%s'\n", d_string_ptr(P), d_string_ptr(F));
175                 for(int i = 0; i < d_string_len(P); i++)
176                     writef("x%02x\n", d_string_ptr(P)[i]);
177             }
178             errinfo.message = errmsgtbl[ERR_REGEXP_COMPILE];
179             o = new syntaxerror.D0(&errinfo);
180             Value* v = new Value;
181             v.putVobject(o);
182             return v;
183         }
184         else
185         {
186             ret.putVobject(r);
187             return null;
188         }
189     }
190 
191     override void* Call(CallContext *cc, Dobject othis, Value* ret, Value[] arglist)
192     {
193         // ECMA 262 v3 15.10.3.1
194         if(arglist.length >= 1)
195         {
196             Value* pattern;
197             Dobject o;
198 
199             pattern = &arglist[0];
200             if(!pattern.isPrimitive())
201             {
202                 o = pattern.object;
203                 if(o.isDregexp() &&
204                    (arglist.length == 1 || arglist[1].isUndefined())
205                    )
206                 {
207                     ret.putVobject(o);
208                     return null;
209                 }
210             }
211         }
212         return Construct(cc, ret, arglist);
213     }
214 
215 
216     override Value* Get(d_string PropertyName) const
217     {
218         return Dfunction.Get(perlAlias(PropertyName));
219     }
220 
221     override Value* Put(d_string PropertyName, Value* value, uint attributes)
222     {
223         return Dfunction.Put(perlAlias(PropertyName), value, attributes);
224     }
225 
226     override Value* Put(d_string PropertyName, Dobject o, uint attributes)
227     {
228         return Dfunction.Put(perlAlias(PropertyName), o, attributes);
229     }
230 
231     override Value* Put(d_string PropertyName, d_number n, uint attributes)
232     {
233         return Dfunction.Put(perlAlias(PropertyName), n, attributes);
234     }
235 
236     override int CanPut(d_string PropertyName)
237     {
238         return Dfunction.CanPut(perlAlias(PropertyName));
239     }
240 
241     override int HasProperty(d_string PropertyName)
242     {
243         return Dfunction.HasProperty(perlAlias(PropertyName));
244     }
245 
246     override int Delete(d_string PropertyName)
247     {
248         return Dfunction.Delete(perlAlias(PropertyName));
249     }
250 
251     // Translate Perl property names to script property names
252     static d_string perlAlias(d_string s)
253     {
254         d_string t;
255 
256         static tchar[6] from = "_*&+`'";
257         static enum d_string[] to =
258         [
259             TEXT_input,
260             TEXT_multiline,
261             TEXT_lastMatch,
262             TEXT_lastParen,
263             TEXT_leftContext,
264             TEXT_rightContext,
265         ];
266 
267         t = s;
268         if(s.length == 2 && s[0] == '$')
269         {
270             ptrdiff_t i;
271 
272             i = std..string.indexOf(from[], s[1]);
273             if(i >= 0)
274                 t = to[i];
275         }
276         return t;
277     }
278 }
279 
280 
281 /* ===================== Dregexp_prototype_toString =============== */
282 
283 void* Dregexp_prototype_toString(Dobject pthis, CallContext *cc, Dobject othis, Value *ret, Value[] arglist)
284 {
285     // othis must be a RegExp
286     Dregexp r;
287 
288     if(!othis.isDregexp())
289     {
290         ret.putVundefined();
291         ErrInfo errinfo;
292         return Dobject.RuntimeError(&errinfo, ERR_NOT_TRANSFERRABLE,
293                                     "RegExp.prototype.toString()");
294     }
295     else
296     {
297         d_string s;
298 
299         r = cast(Dregexp)(othis);
300         s = "/";
301         s ~= r.re.pattern;
302         s ~= "/";
303         s ~= r.re.flags;
304         ret.putVstring(s);
305     }
306     return null;
307 }
308 
309 /* ===================== Dregexp_prototype_test =============== */
310 
311 void* Dregexp_prototype_test(Dobject pthis, CallContext *cc, Dobject othis, Value *ret, Value[] arglist)
312 {
313     // ECMA v3 15.10.6.3 says this is equivalent to:
314     //	RegExp.prototype.exec(string) != null
315     return Dregexp.exec(othis, ret, arglist, EXEC_BOOLEAN);
316 }
317 
318 /* ===================== Dregexp_prototype_exec ============= */
319 
320 void* Dregexp_prototype_exec(Dobject pthis, CallContext *cc, Dobject othis, Value *ret, Value[] arglist)
321 {
322     return Dregexp.exec(othis, ret, arglist, EXEC_ARRAY);
323 }
324 
325 
326 /* ===================== Dregexp_prototype_compile ============= */
327 
328 void* Dregexp_prototype_compile(Dobject pthis, CallContext *cc, Dobject othis, Value *ret, Value[] arglist)
329 {
330     // RegExp.prototype.compile(pattern, attributes)
331 
332     // othis must be a RegExp
333     if(!othis.isClass(TEXT_RegExp))
334     {
335         ErrInfo errinfo;
336         ret.putVundefined();
337         return Dobject.RuntimeError(&errinfo, ERR_NOT_TRANSFERRABLE,
338                                     "RegExp.prototype.compile()");
339     }
340     else
341     {
342         d_string pattern;
343         d_string attributes;
344         Dregexp dr;
345         RegExp r;
346 
347         dr = cast(Dregexp)othis;
348         switch(arglist.length)
349         {
350         case 0:
351             break;
352 
353         default:
354             attributes = arglist[1].toString();
355             goto case;
356         case 1:
357             pattern = arglist[0].toString();
358             break;
359         }
360 
361         r = dr.re;
362         try
363         {
364             r.compile(pattern, attributes);
365         }
366         catch(RegExpException e)
367         {
368             // Affect source, global and ignoreCase properties
369             dr.source.putVstring(r.pattern);
370             dr.global.putVboolean((r.attributes & RegExp.REA.global) != 0);
371             dr.ignoreCase.putVboolean((r.attributes & RegExp.REA.ignoreCase) != 0);
372         }
373         //writef("r.attributes = x%x\n", r.attributes);
374     }
375     // Documentation says nothing about a return value,
376     // so let's use "undefined"
377     ret.putVundefined();
378     return null;
379 }
380 
381 /* ===================== Dregexp_prototype ==================== */
382 
383 class DregexpPrototype : Dregexp
384 {
385     this()
386     {
387         super(Dobject_prototype);
388         classname = TEXT_Object;
389         uint attributes = ReadOnly | DontDelete | DontEnum;
390         Dobject f = Dfunction_prototype;
391 
392         Put(TEXT_constructor, Dregexp_constructor, attributes);
393 
394         static enum NativeFunctionData nfd[] =
395         [
396             { TEXT_toString, &Dregexp_prototype_toString, 0 },
397             { TEXT_compile, &Dregexp_prototype_compile, 2 },
398             { TEXT_exec, &Dregexp_prototype_exec, 1 },
399             { TEXT_test, &Dregexp_prototype_test, 1 },
400         ];
401 
402         DnativeFunction.initialize(this, nfd, attributes);
403     }
404 }
405 
406 
407 /* ===================== Dregexp ==================== */
408 
409 
410 class Dregexp : Dobject
411 {
412     Value *global;
413     Value *ignoreCase;
414     Value *multiline;
415     Value *lastIndex;
416     Value *source;
417 
418     RegExp re;
419 
420     this(d_string pattern, d_string attributes)
421     {
422         super(getPrototype());
423 
424         Value v;
425         v.putVstring(null);
426 
427         Value vb;
428         vb.putVboolean(false);
429 
430         classname = TEXT_RegExp;
431 
432         //writef("Dregexp.Dregexp(pattern = '%ls', attributes = '%ls')\n", d_string_ptr(pattern), d_string_ptr(attributes));
433         Put(TEXT_source, &v, ReadOnly | DontDelete | DontEnum);
434         Put(TEXT_global, &vb, ReadOnly | DontDelete | DontEnum);
435         Put(TEXT_ignoreCase, &vb, ReadOnly | DontDelete | DontEnum);
436         Put(TEXT_multiline, &vb, ReadOnly | DontDelete | DontEnum);
437         Put(TEXT_lastIndex, 0.0, DontDelete | DontEnum);
438 
439         source = Get(TEXT_source);
440         global = Get(TEXT_global);
441         ignoreCase = Get(TEXT_ignoreCase);
442         multiline = Get(TEXT_multiline);
443         lastIndex = Get(TEXT_lastIndex);
444 
445         re = new RegExp(pattern, attributes);
446         if(re.errors == 0)
447         {
448             source.putVstring(pattern);
449             //writef("source = '%s'\n", source.x.string.toDchars());
450             global.putVboolean((re.attributes & RegExp.REA.global) != 0);
451             ignoreCase.putVboolean((re.attributes & RegExp.REA.ignoreCase) != 0);
452             multiline.putVboolean((re.attributes & RegExp.REA.multiline) != 0);
453         }
454         else
455         {
456             // have caller throw SyntaxError
457         }
458     }
459 
460     this(Dobject prototype)
461     {
462         super(prototype);
463 
464         Value v;
465         v.putVstring(null);
466 
467         Value vb;
468         vb.putVboolean(false);
469 
470         classname = TEXT_RegExp;
471 
472         Put(TEXT_source, &v, ReadOnly | DontDelete | DontEnum);
473         Put(TEXT_global, &vb, ReadOnly | DontDelete | DontEnum);
474         Put(TEXT_ignoreCase, &vb, ReadOnly | DontDelete | DontEnum);
475         Put(TEXT_multiline, &vb, ReadOnly | DontDelete | DontEnum);
476         Put(TEXT_lastIndex, 0.0, DontDelete | DontEnum);
477 
478         source = Get(TEXT_source);
479         global = Get(TEXT_global);
480         ignoreCase = Get(TEXT_ignoreCase);
481         multiline = Get(TEXT_multiline);
482         lastIndex = Get(TEXT_lastIndex);
483 
484         re = new RegExp(null, null);
485     }
486 
487     override void* Call(CallContext *cc, Dobject othis, Value* ret, Value[] arglist)
488     {
489         // This is the same as calling RegExp.prototype.exec(str)
490         Value* v;
491 
492         v = Get(TEXT_exec);
493         return v.toObject().Call(cc, this, ret, arglist);
494     }
495 
496     static Dregexp isRegExp(Value* v)
497     {
498         Dregexp r;
499 
500         if(!v.isPrimitive() && v.toObject().isDregexp())
501         {
502             r = cast(Dregexp)(v.toObject());
503         }
504         return r;
505     }
506 
507     static void* exec(Dobject othis, Value* ret, Value[] arglist, int rettype)
508     {
509         //writef("Dregexp.exec(arglist.length = %d, rettype = %d)\n", arglist.length, rettype);
510 
511         // othis must be a RegExp
512         if(!othis.isClass(TEXT_RegExp))
513         {
514             ret.putVundefined();
515             ErrInfo errinfo;
516             return RuntimeError(&errinfo, ERR_NOT_TRANSFERRABLE,
517                                 "RegExp.prototype.exec()");
518         }
519         else
520         {
521             d_string s;
522             Dregexp dr;
523             RegExp r;
524             DregexpConstructor dc;
525             uint i;
526             d_int32 lasti;
527 
528             if(arglist.length)
529                 s = arglist[0].toString();
530             else
531             {
532                 Dfunction df;
533 
534                 df = Dregexp.getConstructor();
535                 s = (cast(DregexpConstructor)df).input..string;
536             }
537 
538             dr = cast(Dregexp)othis;
539             r = dr.re;
540             dc = cast(DregexpConstructor)Dregexp.getConstructor();
541 
542             // Decide if we are multiline
543             if(dr.multiline.dbool)
544                 r.attributes |= RegExp.REA.multiline;
545             else
546                 r.attributes &= ~RegExp.REA.multiline;
547 
548             if(r.attributes & RegExp.REA.global && rettype != EXEC_INDEX)
549                 lasti = cast(int)dr.lastIndex.toInteger();
550             else
551                 lasti = 0;
552 
553             if(r.test(s, lasti))
554             {   // Successful match
555                 Value* lastv;
556                 uint nmatches;
557 
558                 if(r.attributes & RegExp.REA.global && rettype != EXEC_INDEX)
559                 {
560                     dr.lastIndex.putVnumber(r.pmatch[0].rm_eo);
561                 }
562 
563                 dc.input.putVstring(r.input);
564 
565                 s = r.input[r.pmatch[0].rm_so .. r.pmatch[0].rm_eo];
566                 dc.lastMatch.putVstring(s);
567 
568                 s = r.input[0 .. r.pmatch[0].rm_so];
569                 dc.leftContext.putVstring(s);
570 
571                 s = r.input[r.pmatch[0].rm_eo .. $];
572                 dc.rightContext.putVstring(s);
573 
574                 dc.index.putVnumber(r.pmatch[0].rm_so);
575                 dc.lastIndex.putVnumber(r.pmatch[0].rm_eo);
576 
577                 // Fill in $1..$9
578                 lastv = &vundefined;
579                 nmatches = 0;
580                 for(i = 1; i <= 9; i++)
581                 {
582                     if(i <= r.re_nsub)
583                     {
584                         int n;
585 
586                         // Use last 9 entries for $1..$9
587                         n = i;
588                         if(r.re_nsub > 9)
589                             n += (r.re_nsub - 9);
590 
591                         if(r.pmatch[n].rm_so != -1)
592                         {
593                             s = r.input[r.pmatch[n].rm_so .. r.pmatch[n].rm_eo];
594                             dc.dollar[i].putVstring(s);
595                             nmatches = i;
596                         }
597                         else
598                             dc.dollar[i].putVundefined();
599                         lastv = dc.dollar[i];
600                     }
601                     else
602                         dc.dollar[i].putVundefined();
603                 }
604                 // Last substring in $1..$9, or "" if none
605                 if(r.re_nsub)
606                     Value.copy(dc.lastParen, lastv);
607                 else
608                     dc.lastParen.putVstring(null);
609 
610                 switch(rettype)
611                 {
612                 case EXEC_ARRAY:
613                 {
614                     Darray a = new Darray();
615 
616                     a.Put(TEXT_input, r.input, 0);
617                     a.Put(TEXT_index, r.pmatch[0].rm_so, 0);
618                     a.Put(TEXT_lastIndex, r.pmatch[0].rm_eo, 0);
619 
620                     a.Put(cast(d_uint32)0, dc.lastMatch, cast(uint)0);
621 
622                     // [1]..[nparens]
623                     for(i = 1; i <= r.re_nsub; i++)
624                     {
625                         if(i > nmatches)
626                             a.Put(i, TEXT_, 0);
627 
628                         // Reuse values already put into dc.dollar[]
629                         else if(r.re_nsub <= 9)
630                             a.Put(i, dc.dollar[i], 0);
631                         else if(i > r.re_nsub - 9)
632                             a.Put(i, dc.dollar[i - (r.re_nsub - 9)], 0);
633                         else if(r.pmatch[i].rm_so == -1)
634                         {
635                             a.Put(i, &vundefined, 0);
636                         }
637                         else
638                         {
639                             s = r.input[r.pmatch[i].rm_so .. r.pmatch[i].rm_eo];
640                             a.Put(i, s, 0);
641                         }
642                     }
643                     ret.putVobject(a);
644                     break;
645                 }
646                 case EXEC_STRING:
647                     Value.copy(ret, dc.lastMatch);
648                     break;
649 
650                 case EXEC_BOOLEAN:
651                     ret.putVboolean(true);      // success
652                     break;
653 
654                 case EXEC_INDEX:
655                     ret.putVnumber(r.pmatch[0].rm_so);
656                     break;
657 
658                 default:
659                     assert(0);
660                 }
661             }
662             else        // failed to match
663             {
664                 //writef("failed\n");
665                 switch(rettype)
666                 {
667                 case EXEC_ARRAY:
668                     //writef("memcpy\n");
669                     ret.putVnull();         // Return null
670                     dr.lastIndex.putVnumber(0);
671                     break;
672 
673                 case EXEC_STRING:
674                     ret.putVstring(null);
675                     dr.lastIndex.putVnumber(0);
676                     break;
677 
678                 case EXEC_BOOLEAN:
679                     ret.putVboolean(false);
680                     dr.lastIndex.putVnumber(0);
681                     break;
682 
683                 case EXEC_INDEX:
684                     ret.putVnumber(-1.0);
685                     // Do not set lastIndex
686                     break;
687 
688                 default:
689                     assert(0);
690                 }
691             }
692         }
693         return null;
694     }
695 
696     static Dfunction getConstructor()
697     {
698         return Dregexp_constructor;
699     }
700 
701     static Dobject getPrototype()
702     {
703         return Dregexp_prototype;
704     }
705 
706     static void initialize()
707     {
708         Dregexp_constructor = new DregexpConstructor();
709         Dregexp_prototype = new DregexpPrototype();
710 
711         version(none)
712         {
713             writef("Dregexp_constructor = %x\n", Dregexp_constructor);
714             uint *p;
715             p = cast(uint *)Dregexp_constructor;
716             writef("p = %x\n", p);
717             if(p)
718                 writef("*p = %x, %x, %x, %x\n", p[0], p[1], p[2], p[3]);
719         }
720 
721         Dregexp_constructor.Put(TEXT_prototype, Dregexp_prototype, DontEnum | DontDelete | ReadOnly);
722     }
723 }