1 // Compiler implementation of the D programming language
2 // Copyright (c) 1999-2015 by Digital Mars
3 // All Rights Reserved
4 // written by Walter Bright
5 // http://www.digitalmars.com
6 // Distributed under the Boost Software License, Version 1.0.
7 // http://www.boost.org/LICENSE_1_0.txt
8 
9 module ddmd.root.stringtable;
10 
11 import core.stdc..string;
12 import ddmd.root.rmem;
13 
14 enum POOL_BITS = 12;
15 enum POOL_SIZE = (1U << POOL_BITS);
16 
17 // TODO: Merge with root.String
18 // MurmurHash2 was written by Austin Appleby, and is placed in the public
19 // domain. The author hereby disclaims copyright to this source code.
20 // https://sites.google.com/site/murmurhash/
21 private uint calcHash(const(char)* key, size_t len) pure nothrow @nogc
22 {
23     // 'm' and 'r' are mixing constants generated offline.
24     // They're not really 'magic', they just happen to work well.
25     enum uint m = 0x5bd1e995;
26     enum int r = 24;
27     // Initialize the hash to a 'random' value
28     uint h = cast(uint)len;
29     // Mix 4 bytes at a time into the hash
30     const(ubyte)* data = cast(const(ubyte)*)key;
31     while (len >= 4)
32     {
33         uint k = data[3] << 24 | data[2] << 16 | data[1] << 8 | data[0];
34         k *= m;
35         k ^= k >> r;
36         h = (h * m) ^ (k * m);
37         data += 4;
38         len -= 4;
39     }
40     // Handle the last few bytes of the input array
41     switch (len & 3)
42     {
43     case 3:
44         h ^= data[2] << 16;
45         goto case;
46     case 2:
47         h ^= data[1] << 8;
48         goto case;
49     case 1:
50         h ^= data[0];
51         h *= m;
52         goto default;
53     default:
54         break;
55     }
56     // Do a few final mixes of the hash to ensure the last few
57     // bytes are well-incorporated.
58     h ^= h >> 13;
59     h *= m;
60     h ^= h >> 15;
61     return h;
62 }
63 
64 private size_t nextpow2(size_t val) pure nothrow @nogc @safe
65 {
66     size_t res = 1;
67     while (res < val)
68         res <<= 1;
69     return res;
70 }
71 
72 enum loadFactor = 0.8;
73 
74 struct StringEntry
75 {
76     uint hash;
77     uint vptr;
78 }
79 
80 // StringValue is a variable-length structure. It has neither proper c'tors nor a
81 // factory method because the only thing which should be creating these is StringTable.
82 struct StringValue
83 {
84     void* ptrvalue;
85     size_t length;
86 
87     extern (C++) char* lstring()
88     {
89         return cast(char*)(&this + 1);
90     }
91 
92     extern (C++) size_t len() const
93     {
94         return length;
95     }
96 
97     extern (C++) const(char)* toDchars() const
98     {
99         return cast(const(char)*)(&this + 1);
100     }
101 }
102 
103 struct StringTable
104 {
105 private:
106     StringEntry* table;
107     size_t tabledim;
108     ubyte** pools;
109     size_t npools;
110     size_t nfill;
111     size_t count;
112 
113 public:
114     extern (C++) void _init(size_t size = 0)
115     {
116         size = nextpow2(cast(size_t)(size / loadFactor));
117         if (size < 32)
118             size = 32;
119         table = cast(StringEntry*)mem.xcalloc(size, (table[0]).sizeof);
120         tabledim = size;
121         pools = null;
122         npools = nfill = 0;
123         count = 0;
124     }
125 
126     extern (C++) void reset(size_t size = 0)
127     {
128         for (size_t i = 0; i < npools; ++i)
129             mem.xfree(pools[i]);
130         mem.xfree(table);
131         mem.xfree(pools);
132         table = null;
133         pools = null;
134         _init(size);
135     }
136 
137     extern (C++) ~this()
138     {
139         for (size_t i = 0; i < npools; ++i)
140             mem.xfree(pools[i]);
141         mem.xfree(table);
142         mem.xfree(pools);
143         table = null;
144         pools = null;
145     }
146 
147     extern (C++) StringValue* lookup(const(char)* s, size_t length)
148     {
149         const(hash_t) hash = calcHash(s, length);
150         const(size_t) i = findSlot(hash, s, length);
151         // printf("lookup %.*s %p\n", (int)length, s, table[i].value ?: NULL);
152         return getValue(table[i].vptr);
153     }
154 
155     extern (C++) StringValue* insert(const(char)* s, size_t length)
156     {
157         const(hash_t) hash = calcHash(s, length);
158         size_t i = findSlot(hash, s, length);
159         if (table[i].vptr)
160             return null; // already in table
161         if (++count > tabledim * loadFactor)
162         {
163             grow();
164             i = findSlot(hash, s, length);
165         }
166         table[i].hash = hash;
167         table[i].vptr = allocValue(s, length);
168         // printf("insert %.*s %p\n", (int)length, s, table[i].value ?: NULL);
169         return getValue(table[i].vptr);
170     }
171 
172     extern (C++) StringValue* update(const(char)* s, size_t length)
173     {
174         const(hash_t) hash = calcHash(s, length);
175         size_t i = findSlot(hash, s, length);
176         if (!table[i].vptr)
177         {
178             if (++count > tabledim * loadFactor)
179             {
180                 grow();
181                 i = findSlot(hash, s, length);
182             }
183             table[i].hash = hash;
184             table[i].vptr = allocValue(s, length);
185         }
186         // printf("update %.*s %p\n", (int)length, s, table[i].value ?: NULL);
187         return getValue(table[i].vptr);
188     }
189 
190     /********************************
191      * Walk the contents of the string table,
192      * calling fp for each entry.
193      * Params:
194      *      fp = function to call. Returns !=0 to stop
195      * Returns:
196      *      last return value of fp call
197      */
198     extern (C++) int apply(int function(StringValue*) fp)
199     {
200         for (size_t i = 0; i < tabledim; ++i)
201         {
202             StringEntry* se = &table[i];
203             if (!se.vptr)
204                 continue;
205             StringValue* sv = getValue(se.vptr);
206             int result = (*fp)(sv);
207             if (result)
208                 return result;
209         }
210         return 0;
211     }
212 
213 private:
214     extern (C++) uint allocValue(const(char)* s, size_t length)
215     {
216         const(size_t) nbytes = StringValue.sizeof + length + 1;
217         if (!npools || nfill + nbytes > POOL_SIZE)
218         {
219             pools = cast(ubyte**)mem.xrealloc(pools, ++npools * (pools[0]).sizeof);
220             pools[npools - 1] = cast(ubyte*)mem.xmalloc(nbytes > POOL_SIZE ? nbytes : POOL_SIZE);
221             nfill = 0;
222         }
223         StringValue* sv = cast(StringValue*)&pools[npools - 1][nfill];
224         sv.ptrvalue = null;
225         sv.length = length;
226         .memcpy(sv.lstring(), s, length);
227         sv.lstring()[length] = 0;
228         const(uint) vptr = cast(uint)(npools << POOL_BITS | nfill);
229         nfill += nbytes + (-nbytes & 7); // align to 8 bytes
230         return vptr;
231     }
232 
233     extern (C++) StringValue* getValue(uint vptr)
234     {
235         if (!vptr)
236             return null;
237         const(size_t) idx = (vptr >> POOL_BITS) - 1;
238         const(size_t) off = vptr & POOL_SIZE - 1;
239         return cast(StringValue*)&pools[idx][off];
240     }
241 
242     extern (C++) size_t findSlot(hash_t hash, const(char)* s, size_t length)
243     {
244         // quadratic probing using triangular numbers
245         // http://stackoverflow.com/questions/2348187/moving-from-linear-probing-to-quadratic-probing-hash-collisons/2349774#2349774
246         for (size_t i = hash & (tabledim - 1), j = 1;; ++j)
247         {
248             StringValue* sv;
249             if (!table[i].vptr || table[i].hash == hash && (sv = getValue(table[i].vptr)).length == length && .memcmp(s, sv.lstring(), length) == 0)
250                 return i;
251             i = (i + j) & (tabledim - 1);
252         }
253     }
254 
255     extern (C++) void grow()
256     {
257         const(size_t) odim = tabledim;
258         StringEntry* otab = table;
259         tabledim *= 2;
260         table = cast(StringEntry*)mem.xcalloc(tabledim, (table[0]).sizeof);
261         for (size_t i = 0; i < odim; ++i)
262         {
263             StringEntry* se = &otab[i];
264             if (!se.vptr)
265                 continue;
266             StringValue* sv = getValue(se.vptr);
267             table[findSlot(se.hash, sv.lstring(), sv.length)] = *se;
268         }
269         mem.xfree(otab);
270     }
271 }