1 // Compiler implementation of the D programming language 2 // Copyright (c) 1999-2015 by Digital Mars 3 // All Rights Reserved 4 // written by Walter Bright 5 // http://www.digitalmars.com 6 // Distributed under the Boost Software License, Version 1.0. 7 // http://www.boost.org/LICENSE_1_0.txt 8 9 module ddmd.root.stringtable; 10 11 import core.stdc..string; 12 import ddmd.root.rmem; 13 14 enum POOL_BITS = 12; 15 enum POOL_SIZE = (1U << POOL_BITS); 16 17 // TODO: Merge with root.String 18 // MurmurHash2 was written by Austin Appleby, and is placed in the public 19 // domain. The author hereby disclaims copyright to this source code. 20 // https://sites.google.com/site/murmurhash/ 21 private uint calcHash(const(char)* key, size_t len) pure nothrow @nogc 22 { 23 // 'm' and 'r' are mixing constants generated offline. 24 // They're not really 'magic', they just happen to work well. 25 enum uint m = 0x5bd1e995; 26 enum int r = 24; 27 // Initialize the hash to a 'random' value 28 uint h = cast(uint)len; 29 // Mix 4 bytes at a time into the hash 30 const(ubyte)* data = cast(const(ubyte)*)key; 31 while (len >= 4) 32 { 33 uint k = data[3] << 24 | data[2] << 16 | data[1] << 8 | data[0]; 34 k *= m; 35 k ^= k >> r; 36 h = (h * m) ^ (k * m); 37 data += 4; 38 len -= 4; 39 } 40 // Handle the last few bytes of the input array 41 switch (len & 3) 42 { 43 case 3: 44 h ^= data[2] << 16; 45 goto case; 46 case 2: 47 h ^= data[1] << 8; 48 goto case; 49 case 1: 50 h ^= data[0]; 51 h *= m; 52 goto default; 53 default: 54 break; 55 } 56 // Do a few final mixes of the hash to ensure the last few 57 // bytes are well-incorporated. 58 h ^= h >> 13; 59 h *= m; 60 h ^= h >> 15; 61 return h; 62 } 63 64 private size_t nextpow2(size_t val) pure nothrow @nogc @safe 65 { 66 size_t res = 1; 67 while (res < val) 68 res <<= 1; 69 return res; 70 } 71 72 enum loadFactor = 0.8; 73 74 struct StringEntry 75 { 76 uint hash; 77 uint vptr; 78 } 79 80 // StringValue is a variable-length structure. It has neither proper c'tors nor a 81 // factory method because the only thing which should be creating these is StringTable. 82 struct StringValue 83 { 84 void* ptrvalue; 85 size_t length; 86 87 extern (C++) char* lstring() 88 { 89 return cast(char*)(&this + 1); 90 } 91 92 extern (C++) size_t len() const 93 { 94 return length; 95 } 96 97 extern (C++) const(char)* toDchars() const 98 { 99 return cast(const(char)*)(&this + 1); 100 } 101 } 102 103 struct StringTable 104 { 105 private: 106 StringEntry* table; 107 size_t tabledim; 108 ubyte** pools; 109 size_t npools; 110 size_t nfill; 111 size_t count; 112 113 public: 114 extern (C++) void _init(size_t size = 0) 115 { 116 size = nextpow2(cast(size_t)(size / loadFactor)); 117 if (size < 32) 118 size = 32; 119 table = cast(StringEntry*)mem.xcalloc(size, (table[0]).sizeof); 120 tabledim = size; 121 pools = null; 122 npools = nfill = 0; 123 count = 0; 124 } 125 126 extern (C++) void reset(size_t size = 0) 127 { 128 for (size_t i = 0; i < npools; ++i) 129 mem.xfree(pools[i]); 130 mem.xfree(table); 131 mem.xfree(pools); 132 table = null; 133 pools = null; 134 _init(size); 135 } 136 137 extern (C++) ~this() 138 { 139 for (size_t i = 0; i < npools; ++i) 140 mem.xfree(pools[i]); 141 mem.xfree(table); 142 mem.xfree(pools); 143 table = null; 144 pools = null; 145 } 146 147 extern (C++) StringValue* lookup(const(char)* s, size_t length) 148 { 149 const(hash_t) hash = calcHash(s, length); 150 const(size_t) i = findSlot(hash, s, length); 151 // printf("lookup %.*s %p\n", (int)length, s, table[i].value ?: NULL); 152 return getValue(table[i].vptr); 153 } 154 155 extern (C++) StringValue* insert(const(char)* s, size_t length) 156 { 157 const(hash_t) hash = calcHash(s, length); 158 size_t i = findSlot(hash, s, length); 159 if (table[i].vptr) 160 return null; // already in table 161 if (++count > tabledim * loadFactor) 162 { 163 grow(); 164 i = findSlot(hash, s, length); 165 } 166 table[i].hash = hash; 167 table[i].vptr = allocValue(s, length); 168 // printf("insert %.*s %p\n", (int)length, s, table[i].value ?: NULL); 169 return getValue(table[i].vptr); 170 } 171 172 extern (C++) StringValue* update(const(char)* s, size_t length) 173 { 174 const(hash_t) hash = calcHash(s, length); 175 size_t i = findSlot(hash, s, length); 176 if (!table[i].vptr) 177 { 178 if (++count > tabledim * loadFactor) 179 { 180 grow(); 181 i = findSlot(hash, s, length); 182 } 183 table[i].hash = hash; 184 table[i].vptr = allocValue(s, length); 185 } 186 // printf("update %.*s %p\n", (int)length, s, table[i].value ?: NULL); 187 return getValue(table[i].vptr); 188 } 189 190 /******************************** 191 * Walk the contents of the string table, 192 * calling fp for each entry. 193 * Params: 194 * fp = function to call. Returns !=0 to stop 195 * Returns: 196 * last return value of fp call 197 */ 198 extern (C++) int apply(int function(StringValue*) fp) 199 { 200 for (size_t i = 0; i < tabledim; ++i) 201 { 202 StringEntry* se = &table[i]; 203 if (!se.vptr) 204 continue; 205 StringValue* sv = getValue(se.vptr); 206 int result = (*fp)(sv); 207 if (result) 208 return result; 209 } 210 return 0; 211 } 212 213 private: 214 extern (C++) uint allocValue(const(char)* s, size_t length) 215 { 216 const(size_t) nbytes = StringValue.sizeof + length + 1; 217 if (!npools || nfill + nbytes > POOL_SIZE) 218 { 219 pools = cast(ubyte**)mem.xrealloc(pools, ++npools * (pools[0]).sizeof); 220 pools[npools - 1] = cast(ubyte*)mem.xmalloc(nbytes > POOL_SIZE ? nbytes : POOL_SIZE); 221 nfill = 0; 222 } 223 StringValue* sv = cast(StringValue*)&pools[npools - 1][nfill]; 224 sv.ptrvalue = null; 225 sv.length = length; 226 .memcpy(sv.lstring(), s, length); 227 sv.lstring()[length] = 0; 228 const(uint) vptr = cast(uint)(npools << POOL_BITS | nfill); 229 nfill += nbytes + (-nbytes & 7); // align to 8 bytes 230 return vptr; 231 } 232 233 extern (C++) StringValue* getValue(uint vptr) 234 { 235 if (!vptr) 236 return null; 237 const(size_t) idx = (vptr >> POOL_BITS) - 1; 238 const(size_t) off = vptr & POOL_SIZE - 1; 239 return cast(StringValue*)&pools[idx][off]; 240 } 241 242 extern (C++) size_t findSlot(hash_t hash, const(char)* s, size_t length) 243 { 244 // quadratic probing using triangular numbers 245 // http://stackoverflow.com/questions/2348187/moving-from-linear-probing-to-quadratic-probing-hash-collisons/2349774#2349774 246 for (size_t i = hash & (tabledim - 1), j = 1;; ++j) 247 { 248 StringValue* sv; 249 if (!table[i].vptr || table[i].hash == hash && (sv = getValue(table[i].vptr)).length == length && .memcmp(s, sv.lstring(), length) == 0) 250 return i; 251 i = (i + j) & (tabledim - 1); 252 } 253 } 254 255 extern (C++) void grow() 256 { 257 const(size_t) odim = tabledim; 258 StringEntry* otab = table; 259 tabledim *= 2; 260 table = cast(StringEntry*)mem.xcalloc(tabledim, (table[0]).sizeof); 261 for (size_t i = 0; i < odim; ++i) 262 { 263 StringEntry* se = &otab[i]; 264 if (!se.vptr) 265 continue; 266 StringValue* sv = getValue(se.vptr); 267 table[findSlot(se.hash, sv.lstring(), sv.length)] = *se; 268 } 269 mem.xfree(otab); 270 } 271 }