1 module dmdscript.RandAA; 2 //This is a cruedly hacked version, to allow usage of precomputed hashes. 3 /**An associative array implementation that uses randomized linear congruential 4 * probing for collision resolution. This has the advantage that, no matter 5 * how many collisions there are in the modulus hash space, O(1) expected 6 * lookup time is guaranteed as long as there are few collisions in full 32- 7 * or 64-bit hash space. 8 * 9 * By: David Simcha 10 * 11 * License: 12 * Boost Software License - Version 1.0 - August 17th, 2003 13 * 14 * Permission is hereby granted, free of charge, to any person or organization 15 * obtaining a copy of the software and accompanying documentation covered by 16 * this license (the "Software") to use, reproduce, display, distribute, 17 * execute, and transmit the Software, and to prepare derivative works of the 18 * Software, and to permit third-parties to whom the Software is furnished to 19 * do so, all subject to the following: 20 * 21 * The copyright notices in the Software and this entire statement, including 22 * the above license grant, this restriction and the following disclaimer, 23 * must be included in all copies of the Software, in whole or in part, and 24 * all derivative works of the Software, unless such copies or derivative 25 * works are solely in the form of machine-executable object code generated by 26 * a source language processor. 27 * 28 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 29 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 30 * FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT 31 * SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE 32 * FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, 33 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 34 * DEALINGS IN THE SOFTWARE. 35 */ 36 import std.traits, core.memory, core.exception, std.algorithm, std.conv, 37 std.exception, std.math; 38 39 /**Exception thrown on missing keys.*/ 40 class KeyError : Exception { 41 this(string msg) { 42 super(msg); 43 } 44 } 45 46 private enum 47 { 48 EMPTY, 49 USED, 50 REMOVED 51 } 52 53 // It's faster to store the hash if it's expensive to compute, but 54 // faster not to if it's cheap to compute. 55 private template shouldStoreHash(K) 56 { 57 enum bool shouldStoreHash = !isFloatingPoint !K && !isIntegral !K; 58 } 59 /**Forward range to iterate over keys or values of a RandAA. Elements can 60 * safely be removed (but not added) while iterating over the keys.*/ 61 62 63 64 private void missing_key(K) (K key) 65 { 66 throw new KeyError(text("missing or invalid key ", key)); 67 } 68 69 struct aard (K, V, bool useRandom = false) 70 { 71 alias RandAA!(K, V, shouldStoreHash!(K), useRandom) HashMapClass; 72 73 HashMapClass imp_; 74 75 V opIndex(K key) 76 { 77 if(imp_ !is null) 78 return imp_.opIndex(key); 79 missing_key(key); 80 assert(0); 81 } 82 83 84 V* opBinaryRight(string op : "in")(K k) 85 { 86 if(imp_ is null) 87 return null; 88 return imp_.opIn_r(k); 89 } 90 void opIndexAssign(V value, K k) 91 { 92 if(imp_ is null) 93 imp_ = new HashMapClass(); 94 imp_.assignNoRehashCheck(k, value); 95 imp_.rehash(); 96 } 97 98 void clear() 99 { 100 if(imp_ !is null) 101 imp_.free(); 102 } 103 void detach() 104 { 105 imp_ = null; 106 } 107 bool remove(K k) 108 { 109 if(imp_ is null) 110 return false; 111 V val; 112 113 return imp_.remove(k, val); 114 } 115 116 bool remove(K k, ref V value) 117 { 118 if(imp_ is null) 119 return false; 120 return imp_.remove(k, value); 121 } 122 @property 123 K[] keys() 124 { 125 if(imp_ is null) 126 return null; 127 return imp_.keys(); 128 } 129 130 @property aard allocate() 131 { 132 aard newAA; 133 134 newAA.imp_ = new HashMapClass(); 135 136 return newAA; 137 } 138 @property void loadRatio(double cap) 139 { 140 // dummy 141 } 142 @property void capacity(size_t cap) 143 { 144 // dummy 145 } 146 147 @property 148 V[] values() 149 { 150 if(imp_ is null) 151 return null; 152 return imp_.values(); 153 } 154 155 V get(K k) 156 { 157 V* p = k in this; 158 if(p !is null) 159 { 160 return *p; 161 } 162 return V.init; 163 } 164 165 bool get(K k, ref V val) 166 { 167 if(imp_ !is null) 168 { 169 V* p = k in this; 170 if(p !is null) 171 { 172 val = *p; 173 return true; 174 } 175 } 176 val = V.init; 177 return false; 178 } 179 180 @property size_t length() 181 { 182 if(imp_ is null) 183 return 0; 184 return imp_._length; 185 } 186 187 188 public int opApply(int delegate(ref V value) dg) 189 { 190 return (imp_ !is null) ? imp_.opApply(dg) : 0; 191 } 192 193 public int opApply(int delegate(ref K key, ref V value) dg) 194 { 195 return (imp_ !is null) ? imp_.opApply(dg) : 0; 196 } 197 } 198 199 200 201 /**An associative array class that uses randomized probing and open 202 * addressing. K is the key type, V is the value type, storeHash 203 * determines whether the hash of each key is stored in the array. This 204 * increases space requirements, but allows for faster rehashing. By 205 * default, the hash is stored unless the array is an array of floating point 206 * or integer types. 207 */ 208 final class RandAA(K, V, bool storeHash = shouldStoreHash!(K), bool useRandom = false) 209 { 210 private: 211 import dmdscript.script : CallContext; 212 213 // Store keys, values in parallel arrays. This prevents us from having 214 // alignment overhead and prevents the GC from scanning values if only 215 // keys have pointers, or vice-versa. 216 K* _keys; 217 V* vals; 218 ubyte* flags; 219 CallContext* callcontext; 220 221 static if(storeHash) 222 { 223 hash_t* hashes; // For fast reindexing. 224 } 225 226 size_t mask; // easy modular 2 227 size_t clock; // calculate with size 228 size_t _length; // Logical size 229 size_t space; // Storage space 230 size_t nDead; // Number of elements removed. 231 //TypeInfo ti_; 232 233 // Good values for a linear congruential random number gen. The modulus 234 // is implicitly uint.max + 1, meaning that we take advantage of overflow 235 // to avoid a div instruction. 236 enum : size_t { mul = 1103515245U, add = 12345U } 237 enum : size_t { PERTURB_SHIFT = 32 } 238 239 // Optimized for a few special cases to avoid the virtual function call 240 // to TypeInfo.getHash(). 241 hash_t getHash(K key) const 242 { 243 static if(is (K : long) && K.sizeof <= hash_t.sizeof) 244 { 245 hash_t hash = cast(hash_t)key; 246 } 247 else 248 static if(is (typeof(key.toHash(cast(CallContext*)null)))) 249 { 250 hash_t hash = key.toHash(cast(CallContext*)callcontext); 251 } 252 else static if(is (typeof(key.toHash()))) 253 { 254 hash_t hash = key.toHash(); 255 } 256 else 257 { 258 hash_t hash = typeid(K).getHash(cast(const (void)*)&key); 259 } 260 261 return hash; 262 } 263 264 265 static if(storeHash) 266 { 267 size_t findExisting(ref K key) const 268 { 269 immutable hashFull = getHash(key); 270 size_t pos = hashFull & mask; 271 static if(useRandom) 272 size_t rand = hashFull + 1; 273 else // static if (P == "perturb") 274 { 275 size_t perturb = hashFull; 276 size_t i = pos; 277 } 278 279 uint flag = void; 280 while(true) 281 { 282 flag = flags[pos]; 283 if(flag == EMPTY || (hashFull == hashes[pos] && key.isEqual(cast(CallContext*)callcontext, _keys[pos]) && flag != EMPTY)) 284 { 285 break; 286 } 287 static if(useRandom) 288 { 289 rand = rand * mul + add; 290 pos = (rand + hashFull) & mask; 291 } 292 else // static if (P == "perturb") 293 { 294 i = (i * 5 + perturb + 1); 295 perturb /= PERTURB_SHIFT; 296 pos = i & mask; 297 } 298 } 299 return (flag == USED) ? pos : size_t.max; 300 } 301 302 size_t findForInsert(ref K key, immutable hash_t hashFull) 303 { 304 size_t pos = hashFull & mask; 305 static if(useRandom) 306 size_t rand = hashFull + 1; 307 else //static if (P == "perturb") 308 { 309 size_t perturb = hashFull; 310 size_t i = pos; 311 } 312 313 while(true) 314 { 315 if(flags[pos] != USED || (hashes[pos] == hashFull && _keys[pos].isEqual(callcontext, key))) 316 { 317 break; 318 } 319 static if(useRandom) 320 { 321 rand = rand * mul + add; 322 pos = (rand + hashFull) & mask; 323 } 324 else //static if (P == "perturb") 325 { 326 i = (i * 5 + perturb + 1); 327 perturb /= PERTURB_SHIFT; 328 pos = i & mask; 329 } 330 } 331 332 hashes[pos] = hashFull; 333 return pos; 334 } 335 } 336 else 337 { 338 size_t findExisting(ref K key) const 339 { 340 immutable hashFull = getHash(key); 341 size_t pos = hashFull & mask; 342 static if(useRandom) 343 size_t rand = hashFull + 1; 344 else //static if (P == "perturb") 345 { 346 size_t perturb = hashFull; 347 size_t i = pos; 348 } 349 350 uint flag = void; 351 while(true) 352 { 353 flag = flags[pos]; 354 if(flag == EMPTY || (_keys[pos] == key && flag != EMPTY)) 355 { 356 break; 357 } 358 static if(useRandom) 359 { 360 rand = rand * mul + add; 361 pos = (rand + hashFull) & mask; 362 } 363 else // static if (P == "perturb") 364 { 365 i = (i * 5 + perturb + 1); 366 perturb /= PERTURB_SHIFT; 367 pos = i & mask; 368 } 369 } 370 return (flag == USED) ? pos : size_t.max; 371 } 372 373 size_t findForInsert(ref K key, immutable hash_t hashFull) const 374 { 375 size_t pos = hashFull & mask; 376 static if(useRandom) 377 { 378 size_t rand = hashFull + 1; 379 } 380 else 381 { 382 size_t perturb = hashFull; 383 size_t i = pos; 384 } 385 386 387 388 while(flags[pos] == USED && _keys[pos] != key) 389 { 390 static if(useRandom) 391 { 392 rand = rand * mul + add; 393 pos = (rand + hashFull) & mask; 394 } 395 else 396 { 397 i = (i * 5 + perturb + 1); 398 perturb /= PERTURB_SHIFT; 399 pos = i & mask; 400 } 401 } 402 403 return pos; 404 } 405 } 406 407 void assignNoRehashCheck(ref K key, ref V val, hash_t hashFull) 408 { 409 size_t i = findForInsert(key, hashFull); 410 vals[i] = val; 411 immutable uint flag = flags[i]; 412 if(flag != USED) 413 { 414 if(flag == REMOVED) 415 { 416 nDead--; 417 } 418 _length++; 419 flags[i] = USED; 420 _keys[i] = key; 421 } 422 } 423 424 void assignNoRehashCheck(ref K key, ref V val) 425 { 426 hash_t hashFull = getHash(key); 427 size_t i = findForInsert(key, hashFull); 428 vals[i] = val; 429 immutable uint flag = flags[i]; 430 if(flag != USED) 431 { 432 if(flag == REMOVED) 433 { 434 nDead--; 435 } 436 _length++; 437 flags[i] = USED; 438 _keys[i] = key; 439 } 440 } 441 442 // Dummy constructor only used internally. 443 this(bool dummy) {} 444 445 public: 446 447 static size_t getNextP2(size_t n) 448 { 449 // get the powerof 2 > n 450 451 size_t result = 16; 452 while(n >= result) 453 { 454 result *= 2; 455 } 456 return result; 457 } 458 /**Construct an instance of RandAA with initial size initSize. 459 * initSize determines the amount of slots pre-allocated.*/ 460 this(CallContext* cc, size_t initSize = 10) { 461 //initSize = nextSize(initSize); 462 callcontext = cc; 463 space = getNextP2(initSize); 464 mask = space - 1; 465 _keys = (new K[space]).ptr; 466 vals = (new V[space]).ptr; 467 468 static if(storeHash) 469 { 470 hashes = (new hash_t[space]).ptr; 471 } 472 473 flags = (new ubyte[space]).ptr; 474 } 475 476 /// 477 void rehash() 478 { 479 if(cast(float)(_length + nDead) / space < 0.7) 480 { 481 return; 482 } 483 reserve(space + 1); 484 } 485 486 /**Reserve enough space for newSize elements. Note that the rehashing 487 * heuristics do not guarantee that no new space will be allocated before 488 * newSize elements are added. 489 */ 490 private void reserve(size_t newSize) 491 { 492 scope typeof(this)newTable = new typeof(this)(callcontext, newSize); 493 494 foreach(i; 0..space) 495 { 496 if(flags[i] == USED) 497 { 498 static if(storeHash) 499 { 500 newTable.assignNoRehashCheck(_keys[i], vals[i], hashes[i]); 501 } 502 else 503 { 504 newTable.assignNoRehashCheck(_keys[i], vals[i]); 505 } 506 } 507 } 508 509 // Can't free vals b/c references to it could escape. Let GC 510 // handle it. 511 GC.free(cast(void*)this._keys); 512 GC.free(cast(void*)this.flags); 513 GC.free(cast(void*)this.vals); 514 515 static if(storeHash) 516 { 517 GC.free(cast(void*)this.hashes); 518 } 519 520 foreach(ti, elem; newTable.tupleof) 521 { 522 this.tupleof[ti] = elem; 523 } 524 } 525 526 /**Throws a KeyError on unsuccessful key search.*/ 527 ref V opIndex(K index) 528 { 529 size_t i = findExisting(index); 530 if(i == size_t.max) 531 { 532 throw new KeyError("Could not find key " ~ to !string(index)); 533 } 534 else 535 { 536 return vals[i]; 537 } 538 } 539 /+ I have to insure there is no returns by value, rude hack 540 /**Non-ref return for const instances.*/ 541 V opIndex(K index) 542 { 543 size_t i = findExisting(index); 544 if(i == size_t.max) 545 { 546 throw new KeyError("Could not find key " ~ to !string(index)); 547 } 548 else 549 { 550 return vals[i]; 551 } 552 } 553 +/ 554 ///Hackery 555 V* findExistingAlt(ref K key, hash_t hashFull){ 556 size_t pos = hashFull & mask; 557 static if(useRandom) 558 size_t rand = hashFull + 1; 559 else // static if (P == "perturb") 560 { 561 size_t perturb = hashFull; 562 size_t i = pos; 563 } 564 565 uint flag = void; 566 while(true) 567 { 568 flag = flags[pos]; 569 if(flag == EMPTY || (hashFull == hashes[pos] && key.isEqual(callcontext, _keys[pos]) && flag != EMPTY)) 570 { 571 break; 572 } 573 static if(useRandom) 574 { 575 rand = rand * mul + add; 576 pos = (rand + hashFull) & mask; 577 } 578 else // static if (P == "perturb") 579 { 580 i = (i * 5 + perturb + 1); 581 perturb /= PERTURB_SHIFT; 582 pos = i & mask; 583 } 584 } 585 return (flag == USED) ? &vals[pos] : null; 586 } 587 void insertAlt(ref K key, ref V val, hash_t hashFull){ 588 assignNoRehashCheck(key, val, hashFull); 589 rehash(); 590 } 591 592 /// 593 void opIndexAssign(V val, K index) 594 { 595 assignNoRehashCheck(index, val); 596 rehash(); 597 } 598 struct KeyValRange (K, V, bool storeHash, bool vals) { 599 private: 600 static if(vals) 601 { 602 alias V T; 603 } 604 else 605 { 606 alias K T; 607 } 608 size_t index = 0; 609 RandAA aa; 610 public: 611 this(RandAA aa) { 612 this.aa = aa; 613 while(aa.flags[index] != USED && index < aa.space) 614 { 615 index++; 616 } 617 } 618 619 /// 620 T front() 621 { 622 static if(vals) 623 { 624 return aa.vals[index]; 625 } 626 else 627 { 628 return aa._keys[index]; 629 } 630 } 631 632 /// 633 void popFront() 634 { 635 index++; 636 while(aa.flags[index] != USED && index < aa.space) 637 { 638 index++; 639 } 640 } 641 642 /// 643 bool empty() 644 { 645 return index == aa.space; 646 } 647 648 string toString() 649 { 650 char[] ret = "[".dup; 651 auto copy = this; 652 foreach(elem; copy) 653 { 654 ret ~= to !string(elem); 655 ret ~= ", "; 656 } 657 658 ret[$ - 2] = ']'; 659 ret = ret[0..$ - 1]; 660 auto retImmutable = assumeUnique(ret); 661 return retImmutable; 662 } 663 } 664 alias KeyValRange!(K, V, storeHash, false) key_range; 665 alias KeyValRange!(K, V, storeHash, true) value_range; 666 667 /**Does not allocate. Returns a simple forward range.*/ 668 key_range keyRange() 669 { 670 return key_range(this); 671 } 672 673 /**Does not allocate. Returns a simple forward range.*/ 674 value_range valueRange() 675 { 676 return value_range(this); 677 } 678 679 /**Removes an element from this. Elements *may* be removed while iterating 680 * via .keys.*/ 681 V remove(K index) 682 { 683 size_t i = findExisting(index); 684 if(i == size_t.max) 685 { 686 throw new KeyError("Could not find key " ~ to !string(index)); 687 } 688 else 689 { 690 _length--; 691 nDead++; 692 flags[i] = REMOVED; 693 return vals[i]; 694 } 695 } 696 697 V[] values() 698 { 699 size_t i = 0; 700 V[] result = new V[this._length]; 701 702 foreach(k, v; this) 703 result[i++] = v; 704 return result; 705 } 706 K[] keys() 707 { 708 size_t i = 0; 709 K[] result = new K[this._length]; 710 711 foreach(k, v; this) 712 result[i++] = k; 713 return result; 714 } 715 bool remove(K index, ref V value) 716 { 717 size_t i = findExisting(index); 718 if(i == size_t.max) 719 { 720 return false; 721 } 722 else 723 { 724 _length--; 725 nDead++; 726 flags[i] = REMOVED; 727 value = vals[i]; 728 return true; 729 } 730 } 731 /**Returns null if index is not found.*/ 732 V* opBinaryRight(string op : "in")(K index) 733 { 734 size_t i = findExisting(index); 735 if(i == size_t.max) 736 { 737 return null; 738 } 739 else 740 { 741 return vals + i; 742 } 743 } 744 745 /**Iterate over keys, values in lockstep.*/ 746 int opApply(int delegate(ref K, ref V) dg) 747 { 748 int result; 749 foreach(i, k; _keys[0..space]) 750 if(flags[i] == USED) 751 { 752 result = dg(k, vals[i]); 753 if(result) 754 { 755 break; 756 } 757 } 758 return result; 759 } 760 761 private template DeconstArrayType(T) 762 { 763 static if(isStaticArray!(T)) 764 { 765 alias typeof(T.init[0])[] type; //the equivalent dynamic array 766 } 767 else 768 { 769 alias T type; 770 } 771 } 772 773 alias DeconstArrayType!(K).type K_; 774 alias DeconstArrayType!(V).type V_; 775 776 public int opApply(int delegate(ref V_ value) dg) 777 { 778 return opApply((ref K_ k, ref V_ v) { return dg(v); }); 779 } 780 781 void clear() 782 { 783 free(); 784 } 785 /**Allows for deleting the contents of the array manually, if supported 786 * by the GC.*/ 787 void free() 788 { 789 GC.free(cast(void*)this._keys); 790 GC.free(cast(void*)this.vals); 791 GC.free(cast(void*)this.flags); 792 793 static if(storeHash) 794 { 795 GC.free(cast(void*)this.hashes); 796 } 797 } 798 799 /// 800 size_t length() 801 { 802 return _length; 803 } 804 } 805 806 import std.random, std.exception, std.stdio; 807 /* 808 // Test it out. 809 void unit_tests() { 810 string[string] builtin; 811 auto myAA = new RandAA!(string, string)(); 812 813 foreach(i; 0..100_000) { 814 auto myKey = randString(20); 815 auto myVal = randString(20); 816 builtin[myKey] = myVal; 817 myAA[myKey] = myVal; 818 } 819 820 enforce(myAA.length == builtin.length); 821 foreach(key; myAA.keys) { 822 enforce(myAA[key] == builtin[key]); 823 } 824 825 auto keys = builtin.keys; 826 randomShuffle(keys); 827 foreach(toRemove; keys[0..1000]) { 828 builtin.remove(toRemove); 829 myAA.remove(toRemove); 830 } 831 832 833 myAA.rehash(); 834 enforce(myAA.length == builtin.length); 835 foreach(k, v; builtin) { 836 enforce(k in myAA); 837 enforce( *(k in myAA) == v); 838 } 839 840 string[] myValues; 841 foreach(val; myAA.values) { 842 myValues ~= val; 843 } 844 845 string[] myKeys; 846 foreach(key; myAA.keys) { 847 myKeys ~= key; 848 } 849 850 auto builtinKeys = builtin.keys; 851 auto builtinVals = builtin.values; 852 sort(builtinVals); 853 sort(builtinKeys); 854 sort(myKeys); 855 sort(myValues); 856 enforce(myKeys == builtinKeys); 857 enforce(myValues == builtinVals); 858 859 writeln("Passed all tests."); 860 } 861 862 */