1 module dmdscript.RandAA;
2 //This is a cruedly hacked version, to allow usage of precomputed hashes.
3 /**An associative array implementation that uses randomized linear congruential
4  * probing for collision resolution.  This has the advantage that, no matter
5  * how many collisions there are in the modulus hash space, O(1) expected
6  * lookup time is guaranteed as long as there are few collisions in full 32-
7  * or 64-bit hash space.
8  *
9  * By:  David Simcha
10  *
11  * License:
12  * Boost Software License - Version 1.0 - August 17th, 2003
13  *
14  * Permission is hereby granted, free of charge, to any person or organization
15  * obtaining a copy of the software and accompanying documentation covered by
16  * this license (the "Software") to use, reproduce, display, distribute,
17  * execute, and transmit the Software, and to prepare derivative works of the
18  * Software, and to permit third-parties to whom the Software is furnished to
19  * do so, all subject to the following:
20  *
21  * The copyright notices in the Software and this entire statement, including
22  * the above license grant, this restriction and the following disclaimer,
23  * must be included in all copies of the Software, in whole or in part, and
24  * all derivative works of the Software, unless such copies or derivative
25  * works are solely in the form of machine-executable object code generated by
26  * a source language processor.
27  *
28  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
29  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
30  * FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
31  * SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
32  * FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
33  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
34  * DEALINGS IN THE SOFTWARE.
35  */
36 import std.traits, core.memory, core.exception, std.algorithm, std.conv,
37        std.exception, std.math;
38 
39 /**Exception thrown on missing keys.*/
40 class KeyError : Exception {
41     this(string msg) {
42         super(msg);
43     }
44 }
45 
46 private enum
47 {
48     EMPTY,
49     USED,
50     REMOVED
51 }
52 
53 // It's faster to store the hash if it's expensive to compute, but
54 // faster not to if it's cheap to compute.
55 private template shouldStoreHash(K)
56 {
57     enum bool shouldStoreHash = !isFloatingPoint !K && !isIntegral !K;
58 }
59 /**Forward range to iterate over keys or values of a RandAA.  Elements can
60  * safely be removed (but not added) while iterating over the keys.*/
61 
62 
63 
64 private void missing_key(K) (K key)
65 {
66     throw new KeyError(text("missing or invalid key ", key));
67 }
68 
69 struct aard (K, V, bool useRandom = false)
70 {
71     alias RandAA!(K, V, shouldStoreHash!(K), useRandom) HashMapClass;
72 
73     HashMapClass imp_;
74 
75     V opIndex(K key)
76     {
77         if(imp_ !is null)
78             return imp_.opIndex(key);
79         missing_key(key);
80         assert(0);
81     }
82 
83     V* opIn_r(K k)
84     {
85         if(imp_ is null)
86             return null;
87         return imp_.opIn_r(k);
88     }
89     void opIndexAssign(V value, K k)
90     {
91         if(imp_ is null)
92             imp_ = new HashMapClass();
93         imp_.assignNoRehashCheck(k, value);
94         imp_.rehash();
95     }
96 
97     void clear()
98     {
99         if(imp_ !is null)
100             imp_.free();
101     }
102     void detach()
103     {
104         imp_ = null;
105     }
106     bool remove(K k)
107     {
108         if(imp_ is null)
109             return false;
110         V val;
111 
112         return imp_.remove(k, val);
113     }
114 
115     bool remove(K k, ref V value)
116     {
117         if(imp_ is null)
118             return false;
119         return imp_.remove(k, value);
120     }
121     @property
122     K[] keys()
123     {
124         if(imp_ is null)
125             return null;
126         return imp_.keys();
127     }
128 
129     @property aard allocate()
130     {
131         aard newAA;
132 
133         newAA.imp_ = new HashMapClass();
134 
135         return newAA;
136     }
137     @property void loadRatio(double cap)
138     {
139         // dummy
140     }
141     @property void capacity(size_t cap)
142     {
143         // dummy
144     }
145 
146     @property
147     V[] values()
148     {
149         if(imp_ is null)
150             return null;
151         return imp_.values();
152     }
153 
154     V get(K k)
155     {
156         V* p = opIn_r(k);
157         if(p !is null)
158         {
159             return *p;
160         }
161         return V.init;
162     }
163 
164     bool get(K k, ref V val)
165     {
166         if(imp_ !is null)
167         {
168             V* p = opIn_r(k);
169             if(p !is null)
170             {
171                 val = *p;
172                 return true;
173             }
174         }
175         val = V.init;
176         return false;
177     }
178 
179     @property size_t length()
180     {
181         if(imp_ is null)
182             return 0;
183         return imp_._length;
184     }
185 
186 
187     public int opApply(int delegate(ref V value) dg)
188     {
189         return (imp_ !is null) ? imp_.opApply(dg) : 0;
190     }
191 
192     public int opApply(int delegate(ref K key, ref V value) dg)
193     {
194         return (imp_ !is null) ? imp_.opApply(dg) : 0;
195     }
196 }
197 
198 
199 
200 /**An associative array class that uses randomized probing and open
201  * addressing.  K is the key type, V is the value type, storeHash
202  * determines whether the hash of each key is stored in the array.  This
203  * increases space requirements, but allows for faster rehashing.  By
204  * default, the hash is stored unless the array is an array of floating point
205  * or integer types.
206  */
207 final class RandAA(K, V, bool storeHash = shouldStoreHash!(K), bool useRandom = false)
208 {
209 private:
210 
211     // Store keys, values in parallel arrays.  This prevents us from having
212     // alignment overhead and prevents the GC from scanning values if only
213     // keys have pointers, or vice-versa.
214     K* _keys;
215     V* vals;
216     ubyte* flags;
217 
218     static if(storeHash)
219     {
220         hash_t* hashes;  // For fast reindexing.
221     }
222 
223     size_t mask;    // easy modular 2
224     size_t clock;   // calculate with size
225     size_t _length; // Logical size
226     size_t space;   // Storage space
227     size_t nDead;   // Number of elements removed.
228     //TypeInfo  ti_;
229 
230     // Good values for a linear congruential random number gen.  The modulus
231     // is implicitly uint.max + 1, meaning that we take advantage of overflow
232     // to avoid a div instruction.
233     enum : size_t  { mul = 1103515245U, add = 12345U }
234     enum : size_t  { PERTURB_SHIFT = 32 }
235 
236     // Optimized for a few special cases to avoid the virtual function call
237     // to TypeInfo.getHash().
238     hash_t getHash(K key) const
239     {
240         static if(is (K : long) && K.sizeof <= hash_t.sizeof)
241         {
242             hash_t hash = cast(hash_t)key;
243         }
244         else
245             static if(is (typeof(key.toHash())))
246             {
247                 hash_t hash = key.toHash();
248             }
249             else
250             {
251                 hash_t hash = typeid(K).getHash(cast(const (void)*)&key);
252             }
253 
254         return hash;
255     }
256 
257 
258     static if(storeHash)
259     {
260         size_t findExisting(ref K key)  const
261         {
262             immutable hashFull = getHash(key);
263             size_t pos = hashFull & mask;
264             static if(useRandom)
265                 size_t rand = hashFull + 1;
266             else // static if (P == "perturb")
267             {
268                 size_t perturb = hashFull;
269                 size_t i = pos;
270             }
271 
272             uint flag = void;
273             while(true)
274             {
275                 flag = flags[pos];
276                 if(flag == EMPTY || (hashFull == hashes[pos] && key == _keys[pos] && flag != EMPTY))
277                 {
278                     break;
279                 }
280                 static if(useRandom)
281                 {
282                     rand = rand * mul + add;
283                     pos = (rand + hashFull) & mask;
284                 }
285                 else // static if (P == "perturb")
286                 {
287                     i = (i * 5 + perturb + 1);
288                     perturb /= PERTURB_SHIFT;
289                     pos = i & mask;
290                 }
291             }
292             return (flag == USED) ? pos : size_t.max;
293         }
294 
295         size_t findForInsert(ref K key, immutable hash_t hashFull)
296         {
297             size_t pos = hashFull & mask;
298             static if(useRandom)
299                 size_t rand = hashFull + 1;
300             else //static if (P == "perturb")
301             {
302                 size_t perturb = hashFull;
303                 size_t i = pos;
304             }
305 
306             while(true)
307             {
308                 if(flags[pos] != USED || (hashes[pos] == hashFull && _keys[pos] == key))
309                 {
310                     break;
311                 }
312                 static if(useRandom)
313                 {
314                     rand = rand * mul + add;
315                     pos = (rand + hashFull) & mask;
316                 }
317                 else //static if (P == "perturb")
318                 {
319                     i = (i * 5 + perturb + 1);
320                     perturb /= PERTURB_SHIFT;
321                     pos = i & mask;
322                 }
323             }
324 
325             hashes[pos] = hashFull;
326             return pos;
327         }
328     }
329     else
330     {
331         size_t findExisting(ref K key) const
332         {
333             immutable hashFull = getHash(key);
334             size_t pos = hashFull & mask;
335             static if(useRandom)
336                 size_t rand = hashFull + 1;
337             else //static if (P == "perturb")
338             {
339                 size_t perturb = hashFull;
340                 size_t i = pos;
341             }
342 
343             uint flag = void;
344             while(true)
345             {
346                 flag = flags[pos];
347                 if(flag == EMPTY || (_keys[pos] == key && flag != EMPTY))
348                 {
349                     break;
350                 }
351                 static if(useRandom)
352                 {
353                     rand = rand * mul + add;
354                     pos = (rand + hashFull) & mask;
355                 }
356                 else // static if (P == "perturb")
357                 {
358                     i = (i * 5 + perturb + 1);
359                     perturb /= PERTURB_SHIFT;
360                     pos = i & mask;
361                 }
362             }
363             return (flag == USED) ? pos : size_t.max;
364         }
365 
366         size_t findForInsert(ref K key, immutable hash_t hashFull) const
367         {
368             size_t pos = hashFull & mask;
369             static if(useRandom)
370             {
371                 size_t rand = hashFull + 1;
372             }
373             else
374             {
375                 size_t perturb = hashFull;
376                 size_t i = pos;
377             }
378 
379 
380 
381             while(flags[pos] == USED && _keys[pos] != key)
382             {
383                 static if(useRandom)
384                 {
385                     rand = rand * mul + add;
386                     pos = (rand + hashFull) & mask;
387                 }
388                 else
389                 {
390                     i = (i * 5 + perturb + 1);
391                     perturb /= PERTURB_SHIFT;
392                     pos = i & mask;
393                 }
394             }
395 
396             return pos;
397         }
398     }
399 
400     void assignNoRehashCheck(ref K key, ref V val, hash_t hashFull)
401     {
402         size_t i = findForInsert(key, hashFull);
403         vals[i] = val;
404         immutable uint flag = flags[i];
405         if(flag != USED)
406         {
407             if(flag == REMOVED)
408             {
409                 nDead--;
410             }
411             _length++;
412             flags[i] = USED;
413             _keys[i] = key;
414         }
415     }
416 
417     void assignNoRehashCheck(ref K key, ref V val)
418     {
419         hash_t hashFull = getHash(key);
420         size_t i = findForInsert(key, hashFull);
421         vals[i] = val;
422         immutable uint flag = flags[i];
423         if(flag != USED)
424         {
425             if(flag == REMOVED)
426             {
427                 nDead--;
428             }
429             _length++;
430             flags[i] = USED;
431             _keys[i] = key;
432         }
433     }
434 
435     // Dummy constructor only used internally.
436     this(bool dummy) {}
437 
438 public:
439 
440     static size_t getNextP2(size_t n)
441     {
442         // get the powerof 2 > n
443 
444         size_t result = 16;
445         while(n >= result)
446         {
447             result *= 2;
448         }
449         return result;
450     }
451     /**Construct an instance of RandAA with initial size initSize.
452      * initSize determines the amount of slots pre-allocated.*/
453     this(size_t initSize = 10) {
454         //initSize = nextSize(initSize);
455         space = getNextP2(initSize);
456         mask = space - 1;
457         _keys = (new K[space]).ptr;
458         vals = (new V[space]).ptr;
459 
460         static if(storeHash)
461         {
462             hashes = (new hash_t[space]).ptr;
463         }
464 
465         flags = (new ubyte[space]).ptr;
466     }
467 
468     ///
469     void rehash()
470     {
471         if(cast(float)(_length + nDead) / space < 0.7)
472         {
473             return;
474         }
475         reserve(space + 1);
476     }
477 
478     /**Reserve enough space for newSize elements.  Note that the rehashing
479      * heuristics do not guarantee that no new space will be allocated before
480      * newSize elements are added.
481      */
482     private void reserve(size_t newSize)
483     {
484         scope typeof(this)newTable = new typeof(this)(newSize);
485 
486         foreach(i; 0..space)
487         {
488             if(flags[i] == USED)
489             {
490                 static if(storeHash)
491                 {
492                     newTable.assignNoRehashCheck(_keys[i], vals[i], hashes[i]);
493                 }
494                 else
495                 {
496                     newTable.assignNoRehashCheck(_keys[i], vals[i]);
497                 }
498             }
499         }
500 
501         // Can't free vals b/c references to it could escape.  Let GC
502         // handle it.
503         GC.free(cast(void*)this._keys);
504         GC.free(cast(void*)this.flags);
505         GC.free(cast(void*)this.vals);
506 
507         static if(storeHash)
508         {
509             GC.free(cast(void*)this.hashes);
510         }
511 
512         foreach(ti, elem; newTable.tupleof)
513         {
514             this.tupleof[ti] = elem;
515         }
516     }
517 
518     /**Throws a KeyError on unsuccessful key search.*/
519     ref V opIndex(K index)
520     {
521         size_t i = findExisting(index);
522         if(i == size_t.max)
523         {
524             throw new KeyError("Could not find key " ~ to !string(index));
525         }
526         else
527         {
528             return vals[i];
529         }
530     }
531 /+ I have to insure there is no returns by value, rude hack
532     /**Non-ref return for const instances.*/
533     V opIndex(K index)
534     {
535         size_t i = findExisting(index);
536         if(i == size_t.max)
537         {
538             throw new KeyError("Could not find key " ~ to !string(index));
539         }
540         else
541         {
542             return vals[i];
543         }
544     }
545 +/
546 	///Hackery
547 	V* findExistingAlt(ref K key, hash_t hashFull){
548             size_t pos = hashFull & mask;
549             static if(useRandom)
550                 size_t rand = hashFull + 1;
551             else // static if (P == "perturb")
552             {
553                 size_t perturb = hashFull;
554                 size_t i = pos;
555             }
556 
557             uint flag = void;
558             while(true)
559             {
560                 flag = flags[pos];
561                 if(flag == EMPTY || (hashFull == hashes[pos] && key == _keys[pos] && flag != EMPTY))
562                 {
563                     break;
564                 }
565                 static if(useRandom)
566                 {
567                     rand = rand * mul + add;
568                     pos = (rand + hashFull) & mask;
569                 }
570                 else // static if (P == "perturb")
571                 {
572                     i = (i * 5 + perturb + 1);
573                     perturb /= PERTURB_SHIFT;
574                     pos = i & mask;
575                 }
576             }
577             return (flag == USED) ? &vals[pos] : null;
578 	}
579 	void insertAlt(ref K key, ref V val, hash_t hashFull){
580 		assignNoRehashCheck(key, val, hashFull);
581 		rehash();
582 	}
583 
584     ///
585     void opIndexAssign(V val, K index)
586     {
587         assignNoRehashCheck(index, val);
588         rehash();
589     }
590     struct KeyValRange (K, V, bool storeHash, bool vals) {
591 private:
592         static if(vals)
593         {
594             alias V T;
595         }
596         else
597         {
598             alias K T;
599         }
600         size_t index = 0;
601         RandAA aa;
602 public:
603         this(RandAA aa) {
604             this.aa = aa;
605             while(aa.flags[index] != USED && index < aa.space)
606             {
607                 index++;
608             }
609         }
610 
611         ///
612         T front()
613         {
614             static if(vals)
615             {
616                 return aa.vals[index];
617             }
618             else
619             {
620                 return aa._keys[index];
621             }
622         }
623 
624         ///
625         void popFront()
626         {
627             index++;
628             while(aa.flags[index] != USED && index < aa.space)
629             {
630                 index++;
631             }
632         }
633 
634         ///
635         bool empty()
636         {
637             return index == aa.space;
638         }
639 
640         string toString()
641         {
642             char[] ret = "[".dup;
643             auto copy = this;
644             foreach(elem; copy)
645             {
646                 ret ~= to !string(elem);
647                 ret ~= ", ";
648             }
649 
650             ret[$ - 2] = ']';
651             ret = ret[0..$ - 1];
652             auto retImmutable = assumeUnique(ret);
653             return retImmutable;
654         }
655     }
656     alias KeyValRange!(K, V, storeHash, false) key_range;
657     alias KeyValRange!(K, V, storeHash, true) value_range;
658 
659     /**Does not allocate.  Returns a simple forward range.*/
660     key_range keyRange()
661     {
662         return key_range(this);
663     }
664 
665     /**Does not allocate.  Returns a simple forward range.*/
666     value_range valueRange()
667     {
668         return value_range(this);
669     }
670 
671     /**Removes an element from this.  Elements *may* be removed while iterating
672      * via .keys.*/
673     V remove(K index)
674     {
675         size_t i = findExisting(index);
676         if(i == size_t.max)
677         {
678             throw new KeyError("Could not find key " ~ to !string(index));
679         }
680         else
681         {
682             _length--;
683             nDead++;
684             flags[i] = REMOVED;
685             return vals[i];
686         }
687     }
688 
689     V[] values()
690     {
691         size_t i = 0;
692         V[] result = new V[this._length];
693 
694         foreach(k, v; this)
695             result[i++] = v;
696         return result;
697     }
698     K[] keys()
699     {
700         size_t i = 0;
701         K[] result = new K[this._length];
702 
703         foreach(k, v; this)
704             result[i++] = k;
705         return result;
706     }
707     bool remove(K index, ref V value)
708     {
709         size_t i = findExisting(index);
710         if(i == size_t.max)
711         {
712             return false;
713         }
714         else
715         {
716             _length--;
717             nDead++;
718             flags[i] = REMOVED;
719             value = vals[i];
720             return true;
721         }
722     }
723     /**Returns null if index is not found.*/
724     V* opIn_r(K index)
725     {
726         size_t i = findExisting(index);
727         if(i == size_t.max)
728         {
729             return null;
730         }
731         else
732         {
733             return vals + i;
734         }
735     }
736 
737     /**Iterate over keys, values in lockstep.*/
738     int opApply(int delegate(ref K, ref V) dg)
739     {
740         int result;
741         foreach(i, k; _keys[0..space])
742             if(flags[i] == USED)
743             {
744                 result = dg(k, vals[i]);
745                 if(result)
746                 {
747                     break;
748                 }
749             }
750         return result;
751     }
752 
753     private template DeconstArrayType(T)
754     {
755         static if(isStaticArray!(T))
756         {
757             alias typeof(T.init[0])[] type;             //the equivalent dynamic array
758         }
759         else
760         {
761             alias T type;
762         }
763     }
764 
765     alias DeconstArrayType!(K).type K_;
766     alias DeconstArrayType!(V).type V_;
767 
768     public int opApply(int delegate(ref V_ value) dg)
769     {
770         return opApply((ref K_ k, ref V_ v) { return dg(v); });
771     }
772 
773     void clear()
774     {
775         free();
776     }
777     /**Allows for deleting the contents of the array manually, if supported
778      * by the GC.*/
779     void free()
780     {
781         GC.free(cast(void*)this._keys);
782         GC.free(cast(void*)this.vals);
783         GC.free(cast(void*)this.flags);
784 
785         static if(storeHash)
786         {
787             GC.free(cast(void*)this.hashes);
788         }
789     }
790 
791     ///
792     size_t length()
793     {
794         return _length;
795     }
796 }
797 
798 import std.random, std.exception, std.stdio;
799 /*
800    // Test it out.
801    void unit_tests() {
802     string[string] builtin;
803     auto myAA = new RandAA!(string, string)();
804 
805     foreach(i; 0..100_000) {
806         auto myKey = randString(20);
807         auto myVal = randString(20);
808         builtin[myKey] = myVal;
809         myAA[myKey] = myVal;
810     }
811 
812     enforce(myAA.length == builtin.length);
813     foreach(key; myAA.keys) {
814         enforce(myAA[key] == builtin[key]);
815     }
816 
817     auto keys = builtin.keys;
818     randomShuffle(keys);
819     foreach(toRemove; keys[0..1000]) {
820         builtin.remove(toRemove);
821         myAA.remove(toRemove);
822     }
823 
824 
825     myAA.rehash();
826     enforce(myAA.length == builtin.length);
827     foreach(k, v; builtin) {
828         enforce(k in myAA);
829         enforce( *(k in myAA) == v);
830     }
831 
832     string[] myValues;
833     foreach(val; myAA.values) {
834         myValues ~= val;
835     }
836 
837     string[] myKeys;
838     foreach(key; myAA.keys) {
839         myKeys ~= key;
840     }
841 
842     auto builtinKeys = builtin.keys;
843     auto builtinVals = builtin.values;
844     sort(builtinVals);
845     sort(builtinKeys);
846     sort(myKeys);
847     sort(myValues);
848     enforce(myKeys == builtinKeys);
849     enforce(myValues == builtinVals);
850 
851     writeln("Passed all tests.");
852    }
853 
854  */