1 // Compiler implementation of the D programming language 2 // Copyright (c) 1999-2015 by Digital Mars 3 // All Rights Reserved 4 // written by Walter Bright 5 // http://www.digitalmars.com 6 // Distributed under the Boost Software License, Version 1.0. 7 // http://www.boost.org/LICENSE_1_0.txt 8 9 module ddmd.root.stringtable; 10 11 import core.stdc.stdint, core.stdc..string; 12 import ddmd.root.rmem; 13 14 enum POOL_BITS = 12; 15 enum POOL_SIZE = (1U << POOL_BITS); 16 17 // TODO: Merge with root.String 18 // MurmurHash2 was written by Austin Appleby, and is placed in the public 19 // domain. The author hereby disclaims copyright to this source code. 20 // https://sites.google.com/site/murmurhash/ 21 extern (C++) static uint32_t calcHash(const(char)* key, size_t len) 22 { 23 // 'm' and 'r' are mixing constants generated offline. 24 // They're not really 'magic', they just happen to work well. 25 const(uint32_t) m = 0x5bd1e995; 26 const(int) r = 24; 27 // Initialize the hash to a 'random' value 28 uint32_t h = cast(uint32_t)len; 29 // Mix 4 bytes at a time into the hash 30 const(uint8_t)* data = cast(const(uint8_t)*)key; 31 while (len >= 4) 32 { 33 uint32_t k = data[3] << 24 | data[2] << 16 | data[1] << 8 | data[0]; 34 k *= m; 35 k ^= k >> r; 36 k *= m; 37 h *= m; 38 h ^= k; 39 data += 4; 40 len -= 4; 41 } 42 // Handle the last few bytes of the input array 43 switch (len & 3) 44 { 45 case 3: 46 h ^= data[2] << 16; 47 case 2: 48 h ^= data[1] << 8; 49 case 1: 50 h ^= data[0]; 51 h *= m; 52 default: 53 break; 54 } 55 // Do a few final mixes of the hash to ensure the last few 56 // bytes are well-incorporated. 57 h ^= h >> 13; 58 h *= m; 59 h ^= h >> 15; 60 return h; 61 } 62 63 extern (C++) static size_t nextpow2(size_t val) 64 { 65 size_t res = 1; 66 while (res < val) 67 res <<= 1; 68 return res; 69 } 70 71 extern (C++) __gshared const(double) loadFactor = 0.8; 72 73 struct StringEntry 74 { 75 uint32_t hash; 76 uint32_t vptr; 77 } 78 79 // StringValue is a variable-length structure. It has neither proper c'tors nor a 80 // factory method because the only thing which should be creating these is StringTable. 81 struct StringValue 82 { 83 void* ptrvalue; 84 size_t length; 85 86 extern (C++) char* lstring() 87 { 88 return cast(char*)(&this + 1); 89 } 90 91 extern (C++) const(size_t) len() 92 { 93 return length; 94 } 95 96 extern (C++) const(const(char)*) toDchars() 97 { 98 return cast(const(char)*)(&this + 1); 99 } 100 } 101 102 struct StringTable 103 { 104 private: 105 StringEntry* table; 106 size_t tabledim; 107 uint8_t** pools; 108 size_t npools; 109 size_t nfill; 110 size_t count; 111 112 public: 113 extern (C++) void _init(size_t size = 0) 114 { 115 size = nextpow2(cast(size_t)(size / loadFactor)); 116 if (size < 32) 117 size = 32; 118 table = cast(StringEntry*)mem.xcalloc(size, (table[0]).sizeof); 119 tabledim = size; 120 pools = null; 121 npools = nfill = 0; 122 count = 0; 123 } 124 125 extern (C++) void reset(size_t size = 0) 126 { 127 for (size_t i = 0; i < npools; ++i) 128 mem.xfree(pools[i]); 129 mem.xfree(table); 130 mem.xfree(pools); 131 table = null; 132 pools = null; 133 _init(size); 134 } 135 136 extern (C++) ~this() 137 { 138 for (size_t i = 0; i < npools; ++i) 139 mem.xfree(pools[i]); 140 mem.xfree(table); 141 mem.xfree(pools); 142 table = null; 143 pools = null; 144 } 145 146 extern (C++) StringValue* lookup(const(char)* s, size_t length) 147 { 148 const(hash_t) hash = calcHash(s, length); 149 const(size_t) i = findSlot(hash, s, length); 150 // printf("lookup %.*s %p\n", (int)length, s, table[i].value ?: NULL); 151 return getValue(table[i].vptr); 152 } 153 154 extern (C++) StringValue* insert(const(char)* s, size_t length) 155 { 156 const(hash_t) hash = calcHash(s, length); 157 size_t i = findSlot(hash, s, length); 158 if (table[i].vptr) 159 return null; // already in table 160 if (++count > tabledim * loadFactor) 161 { 162 grow(); 163 i = findSlot(hash, s, length); 164 } 165 table[i].hash = hash; 166 table[i].vptr = allocValue(s, length); 167 // printf("insert %.*s %p\n", (int)length, s, table[i].value ?: NULL); 168 return getValue(table[i].vptr); 169 } 170 171 extern (C++) StringValue* update(const(char)* s, size_t length) 172 { 173 const(hash_t) hash = calcHash(s, length); 174 size_t i = findSlot(hash, s, length); 175 if (!table[i].vptr) 176 { 177 if (++count > tabledim * loadFactor) 178 { 179 grow(); 180 i = findSlot(hash, s, length); 181 } 182 table[i].hash = hash; 183 table[i].vptr = allocValue(s, length); 184 } 185 // printf("update %.*s %p\n", (int)length, s, table[i].value ?: NULL); 186 return getValue(table[i].vptr); 187 } 188 189 /******************************** 190 * Walk the contents of the string table, 191 * calling fp for each entry. 192 * Params: 193 * fp = function to call. Returns !=0 to stop 194 * Returns: 195 * last return value of fp call 196 */ 197 extern (C++) int apply(int function(StringValue*) fp) 198 { 199 for (size_t i = 0; i < tabledim; ++i) 200 { 201 StringEntry* se = &table[i]; 202 if (!se.vptr) 203 continue; 204 StringValue* sv = getValue(se.vptr); 205 int result = (*fp)(sv); 206 if (result) 207 return result; 208 } 209 return 0; 210 } 211 212 private: 213 extern (C++) uint32_t allocValue(const(char)* s, size_t length) 214 { 215 const(size_t) nbytes = StringValue.sizeof + length + 1; 216 if (!npools || nfill + nbytes > POOL_SIZE) 217 { 218 pools = cast(uint8_t**)mem.xrealloc(pools, ++npools * (pools[0]).sizeof); 219 pools[npools - 1] = cast(uint8_t*)mem.xmalloc(nbytes > POOL_SIZE ? nbytes : POOL_SIZE); 220 nfill = 0; 221 } 222 StringValue* sv = cast(StringValue*)&pools[npools - 1][nfill]; 223 sv.ptrvalue = null; 224 sv.length = length; 225 .memcpy(sv.lstring(), s, length); 226 sv.lstring()[length] = 0; 227 const(uint32_t) vptr = cast(uint32_t)(npools << POOL_BITS | nfill); 228 nfill += nbytes + (-nbytes & 7); // align to 8 bytes 229 return vptr; 230 } 231 232 extern (C++) StringValue* getValue(uint32_t vptr) 233 { 234 if (!vptr) 235 return null; 236 const(size_t) idx = (vptr >> POOL_BITS) - 1; 237 const(size_t) off = vptr & POOL_SIZE - 1; 238 return cast(StringValue*)&pools[idx][off]; 239 } 240 241 extern (C++) size_t findSlot(hash_t hash, const(char)* s, size_t length) 242 { 243 // quadratic probing using triangular numbers 244 // http://stackoverflow.com/questions/2348187/moving-from-linear-probing-to-quadratic-probing-hash-collisons/2349774#2349774 245 for (size_t i = hash & (tabledim - 1), j = 1;; ++j) 246 { 247 StringValue* sv; 248 if (!table[i].vptr || table[i].hash == hash && (sv = getValue(table[i].vptr)).length == length && .memcmp(s, sv.lstring(), length) == 0) 249 return i; 250 i = (i + j) & (tabledim - 1); 251 } 252 } 253 254 extern (C++) void grow() 255 { 256 const(size_t) odim = tabledim; 257 StringEntry* otab = table; 258 tabledim *= 2; 259 table = cast(StringEntry*)mem.xcalloc(tabledim, (table[0]).sizeof); 260 for (size_t i = 0; i < odim; ++i) 261 { 262 StringEntry* se = &otab[i]; 263 if (!se.vptr) 264 continue; 265 StringValue* sv = getValue(se.vptr); 266 table[findSlot(se.hash, sv.lstring(), sv.length)] = *se; 267 } 268 mem.xfree(otab); 269 } 270 }