1 module dmdscript.RandAA; 2 //This is a cruedly hacked version, to allow usage of precomputed hashes. 3 /**An associative array implementation that uses randomized linear congruential 4 * probing for collision resolution. This has the advantage that, no matter 5 * how many collisions there are in the modulus hash space, O(1) expected 6 * lookup time is guaranteed as long as there are few collisions in full 32- 7 * or 64-bit hash space. 8 * 9 * By: David Simcha 10 * 11 * License: 12 * Boost Software License - Version 1.0 - August 17th, 2003 13 * 14 * Permission is hereby granted, free of charge, to any person or organization 15 * obtaining a copy of the software and accompanying documentation covered by 16 * this license (the "Software") to use, reproduce, display, distribute, 17 * execute, and transmit the Software, and to prepare derivative works of the 18 * Software, and to permit third-parties to whom the Software is furnished to 19 * do so, all subject to the following: 20 * 21 * The copyright notices in the Software and this entire statement, including 22 * the above license grant, this restriction and the following disclaimer, 23 * must be included in all copies of the Software, in whole or in part, and 24 * all derivative works of the Software, unless such copies or derivative 25 * works are solely in the form of machine-executable object code generated by 26 * a source language processor. 27 * 28 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 29 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 30 * FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT 31 * SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE 32 * FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, 33 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 34 * DEALINGS IN THE SOFTWARE. 35 */ 36 import std.traits, core.memory, core.exception, std.algorithm, std.conv, 37 std.exception, std.math; 38 39 /**Exception thrown on missing keys.*/ 40 class KeyError : Exception { 41 this(string msg) { 42 super(msg); 43 } 44 } 45 46 private enum 47 { 48 EMPTY, 49 USED, 50 REMOVED 51 } 52 53 // It's faster to store the hash if it's expensive to compute, but 54 // faster not to if it's cheap to compute. 55 private template shouldStoreHash(K) 56 { 57 enum bool shouldStoreHash = !isFloatingPoint !K && !isIntegral !K; 58 } 59 /**Forward range to iterate over keys or values of a RandAA. Elements can 60 * safely be removed (but not added) while iterating over the keys.*/ 61 62 63 64 private void missing_key(K) (K key) 65 { 66 throw new KeyError(text("missing or invalid key ", key)); 67 } 68 69 struct aard (K, V, bool useRandom = false) 70 { 71 alias RandAA!(K, V, shouldStoreHash!(K), useRandom) HashMapClass; 72 73 HashMapClass imp_; 74 75 V opIndex(K key) 76 { 77 if(imp_ !is null) 78 return imp_.opIndex(key); 79 missing_key(key); 80 assert(0); 81 } 82 83 V* opIn_r(K k) 84 { 85 if(imp_ is null) 86 return null; 87 return imp_.opIn_r(k); 88 } 89 void opIndexAssign(V value, K k) 90 { 91 if(imp_ is null) 92 imp_ = new HashMapClass(); 93 imp_.assignNoRehashCheck(k, value); 94 imp_.rehash(); 95 } 96 97 void clear() 98 { 99 if(imp_ !is null) 100 imp_.free(); 101 } 102 void detach() 103 { 104 imp_ = null; 105 } 106 bool remove(K k) 107 { 108 if(imp_ is null) 109 return false; 110 V val; 111 112 return imp_.remove(k, val); 113 } 114 115 bool remove(K k, ref V value) 116 { 117 if(imp_ is null) 118 return false; 119 return imp_.remove(k, value); 120 } 121 @property 122 K[] keys() 123 { 124 if(imp_ is null) 125 return null; 126 return imp_.keys(); 127 } 128 129 @property aard allocate() 130 { 131 aard newAA; 132 133 newAA.imp_ = new HashMapClass(); 134 135 return newAA; 136 } 137 @property void loadRatio(double cap) 138 { 139 // dummy 140 } 141 @property void capacity(size_t cap) 142 { 143 // dummy 144 } 145 146 @property 147 V[] values() 148 { 149 if(imp_ is null) 150 return null; 151 return imp_.values(); 152 } 153 154 V get(K k) 155 { 156 V* p = opIn_r(k); 157 if(p !is null) 158 { 159 return *p; 160 } 161 return V.init; 162 } 163 164 bool get(K k, ref V val) 165 { 166 if(imp_ !is null) 167 { 168 V* p = opIn_r(k); 169 if(p !is null) 170 { 171 val = *p; 172 return true; 173 } 174 } 175 val = V.init; 176 return false; 177 } 178 179 @property size_t length() 180 { 181 if(imp_ is null) 182 return 0; 183 return imp_._length; 184 } 185 186 187 public int opApply(int delegate(ref V value) dg) 188 { 189 return (imp_ !is null) ? imp_.opApply(dg) : 0; 190 } 191 192 public int opApply(int delegate(ref K key, ref V value) dg) 193 { 194 return (imp_ !is null) ? imp_.opApply(dg) : 0; 195 } 196 } 197 198 199 200 /**An associative array class that uses randomized probing and open 201 * addressing. K is the key type, V is the value type, storeHash 202 * determines whether the hash of each key is stored in the array. This 203 * increases space requirements, but allows for faster rehashing. By 204 * default, the hash is stored unless the array is an array of floating point 205 * or integer types. 206 */ 207 final class RandAA(K, V, bool storeHash = shouldStoreHash!(K), bool useRandom = false) 208 { 209 private: 210 211 // Store keys, values in parallel arrays. This prevents us from having 212 // alignment overhead and prevents the GC from scanning values if only 213 // keys have pointers, or vice-versa. 214 K* _keys; 215 V* vals; 216 ubyte* flags; 217 218 static if(storeHash) 219 { 220 hash_t* hashes; // For fast reindexing. 221 } 222 223 size_t mask; // easy modular 2 224 size_t clock; // calculate with size 225 size_t _length; // Logical size 226 size_t space; // Storage space 227 size_t nDead; // Number of elements removed. 228 //TypeInfo ti_; 229 230 // Good values for a linear congruential random number gen. The modulus 231 // is implicitly uint.max + 1, meaning that we take advantage of overflow 232 // to avoid a div instruction. 233 enum : size_t { mul = 1103515245U, add = 12345U } 234 enum : size_t { PERTURB_SHIFT = 32 } 235 236 // Optimized for a few special cases to avoid the virtual function call 237 // to TypeInfo.getHash(). 238 hash_t getHash(K key) const 239 { 240 static if(is (K : long) && K.sizeof <= hash_t.sizeof) 241 { 242 hash_t hash = cast(hash_t)key; 243 } 244 else 245 static if(is (typeof(key.toHash()))) 246 { 247 hash_t hash = key.toHash(); 248 } 249 else 250 { 251 hash_t hash = typeid(K).getHash(cast(const (void)*)&key); 252 } 253 254 return hash; 255 } 256 257 258 static if(storeHash) 259 { 260 size_t findExisting(ref K key) const 261 { 262 immutable hashFull = getHash(key); 263 size_t pos = hashFull & mask; 264 static if(useRandom) 265 size_t rand = hashFull + 1; 266 else // static if (P == "perturb") 267 { 268 size_t perturb = hashFull; 269 size_t i = pos; 270 } 271 272 uint flag = void; 273 while(true) 274 { 275 flag = flags[pos]; 276 if(flag == EMPTY || (hashFull == hashes[pos] && key == _keys[pos] && flag != EMPTY)) 277 { 278 break; 279 } 280 static if(useRandom) 281 { 282 rand = rand * mul + add; 283 pos = (rand + hashFull) & mask; 284 } 285 else // static if (P == "perturb") 286 { 287 i = (i * 5 + perturb + 1); 288 perturb /= PERTURB_SHIFT; 289 pos = i & mask; 290 } 291 } 292 return (flag == USED) ? pos : size_t.max; 293 } 294 295 size_t findForInsert(ref K key, immutable hash_t hashFull) 296 { 297 size_t pos = hashFull & mask; 298 static if(useRandom) 299 size_t rand = hashFull + 1; 300 else //static if (P == "perturb") 301 { 302 size_t perturb = hashFull; 303 size_t i = pos; 304 } 305 306 while(true) 307 { 308 if(flags[pos] != USED || (hashes[pos] == hashFull && _keys[pos] == key)) 309 { 310 break; 311 } 312 static if(useRandom) 313 { 314 rand = rand * mul + add; 315 pos = (rand + hashFull) & mask; 316 } 317 else //static if (P == "perturb") 318 { 319 i = (i * 5 + perturb + 1); 320 perturb /= PERTURB_SHIFT; 321 pos = i & mask; 322 } 323 } 324 325 hashes[pos] = hashFull; 326 return pos; 327 } 328 } 329 else 330 { 331 size_t findExisting(ref K key) const 332 { 333 immutable hashFull = getHash(key); 334 size_t pos = hashFull & mask; 335 static if(useRandom) 336 size_t rand = hashFull + 1; 337 else //static if (P == "perturb") 338 { 339 size_t perturb = hashFull; 340 size_t i = pos; 341 } 342 343 uint flag = void; 344 while(true) 345 { 346 flag = flags[pos]; 347 if(flag == EMPTY || (_keys[pos] == key && flag != EMPTY)) 348 { 349 break; 350 } 351 static if(useRandom) 352 { 353 rand = rand * mul + add; 354 pos = (rand + hashFull) & mask; 355 } 356 else // static if (P == "perturb") 357 { 358 i = (i * 5 + perturb + 1); 359 perturb /= PERTURB_SHIFT; 360 pos = i & mask; 361 } 362 } 363 return (flag == USED) ? pos : size_t.max; 364 } 365 366 size_t findForInsert(ref K key, immutable hash_t hashFull) const 367 { 368 size_t pos = hashFull & mask; 369 static if(useRandom) 370 { 371 size_t rand = hashFull + 1; 372 } 373 else 374 { 375 size_t perturb = hashFull; 376 size_t i = pos; 377 } 378 379 380 381 while(flags[pos] == USED && _keys[pos] != key) 382 { 383 static if(useRandom) 384 { 385 rand = rand * mul + add; 386 pos = (rand + hashFull) & mask; 387 } 388 else 389 { 390 i = (i * 5 + perturb + 1); 391 perturb /= PERTURB_SHIFT; 392 pos = i & mask; 393 } 394 } 395 396 return pos; 397 } 398 } 399 400 void assignNoRehashCheck(ref K key, ref V val, hash_t hashFull) 401 { 402 size_t i = findForInsert(key, hashFull); 403 vals[i] = val; 404 immutable uint flag = flags[i]; 405 if(flag != USED) 406 { 407 if(flag == REMOVED) 408 { 409 nDead--; 410 } 411 _length++; 412 flags[i] = USED; 413 _keys[i] = key; 414 } 415 } 416 417 void assignNoRehashCheck(ref K key, ref V val) 418 { 419 hash_t hashFull = getHash(key); 420 size_t i = findForInsert(key, hashFull); 421 vals[i] = val; 422 immutable uint flag = flags[i]; 423 if(flag != USED) 424 { 425 if(flag == REMOVED) 426 { 427 nDead--; 428 } 429 _length++; 430 flags[i] = USED; 431 _keys[i] = key; 432 } 433 } 434 435 // Dummy constructor only used internally. 436 this(bool dummy) {} 437 438 public: 439 440 static size_t getNextP2(size_t n) 441 { 442 // get the powerof 2 > n 443 444 size_t result = 16; 445 while(n >= result) 446 { 447 result *= 2; 448 } 449 return result; 450 } 451 /**Construct an instance of RandAA with initial size initSize. 452 * initSize determines the amount of slots pre-allocated.*/ 453 this(size_t initSize = 10) { 454 //initSize = nextSize(initSize); 455 space = getNextP2(initSize); 456 mask = space - 1; 457 _keys = (new K[space]).ptr; 458 vals = (new V[space]).ptr; 459 460 static if(storeHash) 461 { 462 hashes = (new hash_t[space]).ptr; 463 } 464 465 flags = (new ubyte[space]).ptr; 466 } 467 468 /// 469 void rehash() 470 { 471 if(cast(float)(_length + nDead) / space < 0.7) 472 { 473 return; 474 } 475 reserve(space + 1); 476 } 477 478 /**Reserve enough space for newSize elements. Note that the rehashing 479 * heuristics do not guarantee that no new space will be allocated before 480 * newSize elements are added. 481 */ 482 private void reserve(size_t newSize) 483 { 484 scope typeof(this)newTable = new typeof(this)(newSize); 485 486 foreach(i; 0..space) 487 { 488 if(flags[i] == USED) 489 { 490 static if(storeHash) 491 { 492 newTable.assignNoRehashCheck(_keys[i], vals[i], hashes[i]); 493 } 494 else 495 { 496 newTable.assignNoRehashCheck(_keys[i], vals[i]); 497 } 498 } 499 } 500 501 // Can't free vals b/c references to it could escape. Let GC 502 // handle it. 503 GC.free(cast(void*)this._keys); 504 GC.free(cast(void*)this.flags); 505 GC.free(cast(void*)this.vals); 506 507 static if(storeHash) 508 { 509 GC.free(cast(void*)this.hashes); 510 } 511 512 foreach(ti, elem; newTable.tupleof) 513 { 514 this.tupleof[ti] = elem; 515 } 516 } 517 518 /**Throws a KeyError on unsuccessful key search.*/ 519 ref V opIndex(K index) 520 { 521 size_t i = findExisting(index); 522 if(i == size_t.max) 523 { 524 throw new KeyError("Could not find key " ~ to !string(index)); 525 } 526 else 527 { 528 return vals[i]; 529 } 530 } 531 /+ I have to insure there is no returns by value, rude hack 532 /**Non-ref return for const instances.*/ 533 V opIndex(K index) 534 { 535 size_t i = findExisting(index); 536 if(i == size_t.max) 537 { 538 throw new KeyError("Could not find key " ~ to !string(index)); 539 } 540 else 541 { 542 return vals[i]; 543 } 544 } 545 +/ 546 ///Hackery 547 V* findExistingAlt(ref K key, hash_t hashFull){ 548 size_t pos = hashFull & mask; 549 static if(useRandom) 550 size_t rand = hashFull + 1; 551 else // static if (P == "perturb") 552 { 553 size_t perturb = hashFull; 554 size_t i = pos; 555 } 556 557 uint flag = void; 558 while(true) 559 { 560 flag = flags[pos]; 561 if(flag == EMPTY || (hashFull == hashes[pos] && key == _keys[pos] && flag != EMPTY)) 562 { 563 break; 564 } 565 static if(useRandom) 566 { 567 rand = rand * mul + add; 568 pos = (rand + hashFull) & mask; 569 } 570 else // static if (P == "perturb") 571 { 572 i = (i * 5 + perturb + 1); 573 perturb /= PERTURB_SHIFT; 574 pos = i & mask; 575 } 576 } 577 return (flag == USED) ? &vals[pos] : null; 578 } 579 void insertAlt(ref K key, ref V val, hash_t hashFull){ 580 assignNoRehashCheck(key, val, hashFull); 581 rehash(); 582 } 583 584 /// 585 void opIndexAssign(V val, K index) 586 { 587 assignNoRehashCheck(index, val); 588 rehash(); 589 } 590 struct KeyValRange (K, V, bool storeHash, bool vals) { 591 private: 592 static if(vals) 593 { 594 alias V T; 595 } 596 else 597 { 598 alias K T; 599 } 600 size_t index = 0; 601 RandAA aa; 602 public: 603 this(RandAA aa) { 604 this.aa = aa; 605 while(aa.flags[index] != USED && index < aa.space) 606 { 607 index++; 608 } 609 } 610 611 /// 612 T front() 613 { 614 static if(vals) 615 { 616 return aa.vals[index]; 617 } 618 else 619 { 620 return aa._keys[index]; 621 } 622 } 623 624 /// 625 void popFront() 626 { 627 index++; 628 while(aa.flags[index] != USED && index < aa.space) 629 { 630 index++; 631 } 632 } 633 634 /// 635 bool empty() 636 { 637 return index == aa.space; 638 } 639 640 string toString() 641 { 642 char[] ret = "[".dup; 643 auto copy = this; 644 foreach(elem; copy) 645 { 646 ret ~= to !string(elem); 647 ret ~= ", "; 648 } 649 650 ret[$ - 2] = ']'; 651 ret = ret[0..$ - 1]; 652 auto retImmutable = assumeUnique(ret); 653 return retImmutable; 654 } 655 } 656 alias KeyValRange!(K, V, storeHash, false) key_range; 657 alias KeyValRange!(K, V, storeHash, true) value_range; 658 659 /**Does not allocate. Returns a simple forward range.*/ 660 key_range keyRange() 661 { 662 return key_range(this); 663 } 664 665 /**Does not allocate. Returns a simple forward range.*/ 666 value_range valueRange() 667 { 668 return value_range(this); 669 } 670 671 /**Removes an element from this. Elements *may* be removed while iterating 672 * via .keys.*/ 673 V remove(K index) 674 { 675 size_t i = findExisting(index); 676 if(i == size_t.max) 677 { 678 throw new KeyError("Could not find key " ~ to !string(index)); 679 } 680 else 681 { 682 _length--; 683 nDead++; 684 flags[i] = REMOVED; 685 return vals[i]; 686 } 687 } 688 689 V[] values() 690 { 691 size_t i = 0; 692 V[] result = new V[this._length]; 693 694 foreach(k, v; this) 695 result[i++] = v; 696 return result; 697 } 698 K[] keys() 699 { 700 size_t i = 0; 701 K[] result = new K[this._length]; 702 703 foreach(k, v; this) 704 result[i++] = k; 705 return result; 706 } 707 bool remove(K index, ref V value) 708 { 709 size_t i = findExisting(index); 710 if(i == size_t.max) 711 { 712 return false; 713 } 714 else 715 { 716 _length--; 717 nDead++; 718 flags[i] = REMOVED; 719 value = vals[i]; 720 return true; 721 } 722 } 723 /**Returns null if index is not found.*/ 724 V* opIn_r(K index) 725 { 726 size_t i = findExisting(index); 727 if(i == size_t.max) 728 { 729 return null; 730 } 731 else 732 { 733 return vals + i; 734 } 735 } 736 737 /**Iterate over keys, values in lockstep.*/ 738 int opApply(int delegate(ref K, ref V) dg) 739 { 740 int result; 741 foreach(i, k; _keys[0..space]) 742 if(flags[i] == USED) 743 { 744 result = dg(k, vals[i]); 745 if(result) 746 { 747 break; 748 } 749 } 750 return result; 751 } 752 753 private template DeconstArrayType(T) 754 { 755 static if(isStaticArray!(T)) 756 { 757 alias typeof(T.init[0])[] type; //the equivalent dynamic array 758 } 759 else 760 { 761 alias T type; 762 } 763 } 764 765 alias DeconstArrayType!(K).type K_; 766 alias DeconstArrayType!(V).type V_; 767 768 public int opApply(int delegate(ref V_ value) dg) 769 { 770 return opApply((ref K_ k, ref V_ v) { return dg(v); }); 771 } 772 773 void clear() 774 { 775 free(); 776 } 777 /**Allows for deleting the contents of the array manually, if supported 778 * by the GC.*/ 779 void free() 780 { 781 GC.free(cast(void*)this._keys); 782 GC.free(cast(void*)this.vals); 783 GC.free(cast(void*)this.flags); 784 785 static if(storeHash) 786 { 787 GC.free(cast(void*)this.hashes); 788 } 789 } 790 791 /// 792 size_t length() 793 { 794 return _length; 795 } 796 } 797 798 import std.random, std.exception, std.stdio; 799 /* 800 // Test it out. 801 void unit_tests() { 802 string[string] builtin; 803 auto myAA = new RandAA!(string, string)(); 804 805 foreach(i; 0..100_000) { 806 auto myKey = randString(20); 807 auto myVal = randString(20); 808 builtin[myKey] = myVal; 809 myAA[myKey] = myVal; 810 } 811 812 enforce(myAA.length == builtin.length); 813 foreach(key; myAA.keys) { 814 enforce(myAA[key] == builtin[key]); 815 } 816 817 auto keys = builtin.keys; 818 randomShuffle(keys); 819 foreach(toRemove; keys[0..1000]) { 820 builtin.remove(toRemove); 821 myAA.remove(toRemove); 822 } 823 824 825 myAA.rehash(); 826 enforce(myAA.length == builtin.length); 827 foreach(k, v; builtin) { 828 enforce(k in myAA); 829 enforce( *(k in myAA) == v); 830 } 831 832 string[] myValues; 833 foreach(val; myAA.values) { 834 myValues ~= val; 835 } 836 837 string[] myKeys; 838 foreach(key; myAA.keys) { 839 myKeys ~= key; 840 } 841 842 auto builtinKeys = builtin.keys; 843 auto builtinVals = builtin.values; 844 sort(builtinVals); 845 sort(builtinKeys); 846 sort(myKeys); 847 sort(myValues); 848 enforce(myKeys == builtinKeys); 849 enforce(myValues == builtinVals); 850 851 writeln("Passed all tests."); 852 } 853 854 */