1 // Compiler implementation of the D programming language
2 // Copyright (c) 1999-2015 by Digital Mars
3 // All Rights Reserved
4 // written by Walter Bright
5 // http://www.digitalmars.com
6 // Distributed under the Boost Software License, Version 1.0.
7 // http://www.boost.org/LICENSE_1_0.txt
8 
9 module ddmd.root.stringtable;
10 
11 import core.stdc.stdint, core.stdc..string;
12 import ddmd.root.rmem;
13 
14 enum POOL_BITS = 12;
15 enum POOL_SIZE = (1U << POOL_BITS);
16 
17 // TODO: Merge with root.String
18 // MurmurHash2 was written by Austin Appleby, and is placed in the public
19 // domain. The author hereby disclaims copyright to this source code.
20 // https://sites.google.com/site/murmurhash/
21 extern (C++) static uint32_t calcHash(const(char)* key, size_t len)
22 {
23     // 'm' and 'r' are mixing constants generated offline.
24     // They're not really 'magic', they just happen to work well.
25     const(uint32_t) m = 0x5bd1e995;
26     const(int) r = 24;
27     // Initialize the hash to a 'random' value
28     uint32_t h = cast(uint32_t)len;
29     // Mix 4 bytes at a time into the hash
30     const(uint8_t)* data = cast(const(uint8_t)*)key;
31     while (len >= 4)
32     {
33         uint32_t k = data[3] << 24 | data[2] << 16 | data[1] << 8 | data[0];
34         k *= m;
35         k ^= k >> r;
36         k *= m;
37         h *= m;
38         h ^= k;
39         data += 4;
40         len -= 4;
41     }
42     // Handle the last few bytes of the input array
43     switch (len & 3)
44     {
45     case 3:
46         h ^= data[2] << 16;
47     case 2:
48         h ^= data[1] << 8;
49     case 1:
50         h ^= data[0];
51         h *= m;
52     default:
53         break;
54     }
55     // Do a few final mixes of the hash to ensure the last few
56     // bytes are well-incorporated.
57     h ^= h >> 13;
58     h *= m;
59     h ^= h >> 15;
60     return h;
61 }
62 
63 extern (C++) static size_t nextpow2(size_t val)
64 {
65     size_t res = 1;
66     while (res < val)
67         res <<= 1;
68     return res;
69 }
70 
71 extern (C++) __gshared const(double) loadFactor = 0.8;
72 
73 struct StringEntry
74 {
75     uint32_t hash;
76     uint32_t vptr;
77 }
78 
79 // StringValue is a variable-length structure. It has neither proper c'tors nor a
80 // factory method because the only thing which should be creating these is StringTable.
81 struct StringValue
82 {
83     void* ptrvalue;
84     size_t length;
85 
86     extern (C++) char* lstring()
87     {
88         return cast(char*)(&this + 1);
89     }
90 
91     extern (C++) const(size_t) len()
92     {
93         return length;
94     }
95 
96     extern (C++) const(const(char)*) toDchars()
97     {
98         return cast(const(char)*)(&this + 1);
99     }
100 }
101 
102 struct StringTable
103 {
104 private:
105     StringEntry* table;
106     size_t tabledim;
107     uint8_t** pools;
108     size_t npools;
109     size_t nfill;
110     size_t count;
111 
112 public:
113     extern (C++) void _init(size_t size = 0)
114     {
115         size = nextpow2(cast(size_t)(size / loadFactor));
116         if (size < 32)
117             size = 32;
118         table = cast(StringEntry*)mem.xcalloc(size, (table[0]).sizeof);
119         tabledim = size;
120         pools = null;
121         npools = nfill = 0;
122         count = 0;
123     }
124 
125     extern (C++) void reset(size_t size = 0)
126     {
127         for (size_t i = 0; i < npools; ++i)
128             mem.xfree(pools[i]);
129         mem.xfree(table);
130         mem.xfree(pools);
131         table = null;
132         pools = null;
133         _init(size);
134     }
135 
136     extern (C++) ~this()
137     {
138         for (size_t i = 0; i < npools; ++i)
139             mem.xfree(pools[i]);
140         mem.xfree(table);
141         mem.xfree(pools);
142         table = null;
143         pools = null;
144     }
145 
146     extern (C++) StringValue* lookup(const(char)* s, size_t length)
147     {
148         const(hash_t) hash = calcHash(s, length);
149         const(size_t) i = findSlot(hash, s, length);
150         // printf("lookup %.*s %p\n", (int)length, s, table[i].value ?: NULL);
151         return getValue(table[i].vptr);
152     }
153 
154     extern (C++) StringValue* insert(const(char)* s, size_t length)
155     {
156         const(hash_t) hash = calcHash(s, length);
157         size_t i = findSlot(hash, s, length);
158         if (table[i].vptr)
159             return null; // already in table
160         if (++count > tabledim * loadFactor)
161         {
162             grow();
163             i = findSlot(hash, s, length);
164         }
165         table[i].hash = hash;
166         table[i].vptr = allocValue(s, length);
167         // printf("insert %.*s %p\n", (int)length, s, table[i].value ?: NULL);
168         return getValue(table[i].vptr);
169     }
170 
171     extern (C++) StringValue* update(const(char)* s, size_t length)
172     {
173         const(hash_t) hash = calcHash(s, length);
174         size_t i = findSlot(hash, s, length);
175         if (!table[i].vptr)
176         {
177             if (++count > tabledim * loadFactor)
178             {
179                 grow();
180                 i = findSlot(hash, s, length);
181             }
182             table[i].hash = hash;
183             table[i].vptr = allocValue(s, length);
184         }
185         // printf("update %.*s %p\n", (int)length, s, table[i].value ?: NULL);
186         return getValue(table[i].vptr);
187     }
188 
189     /********************************
190      * Walk the contents of the string table,
191      * calling fp for each entry.
192      * Params:
193      *      fp = function to call. Returns !=0 to stop
194      * Returns:
195      *      last return value of fp call
196      */
197     extern (C++) int apply(int function(StringValue*) fp)
198     {
199         for (size_t i = 0; i < tabledim; ++i)
200         {
201             StringEntry* se = &table[i];
202             if (!se.vptr)
203                 continue;
204             StringValue* sv = getValue(se.vptr);
205             int result = (*fp)(sv);
206             if (result)
207                 return result;
208         }
209         return 0;
210     }
211 
212 private:
213     extern (C++) uint32_t allocValue(const(char)* s, size_t length)
214     {
215         const(size_t) nbytes = StringValue.sizeof + length + 1;
216         if (!npools || nfill + nbytes > POOL_SIZE)
217         {
218             pools = cast(uint8_t**)mem.xrealloc(pools, ++npools * (pools[0]).sizeof);
219             pools[npools - 1] = cast(uint8_t*)mem.xmalloc(nbytes > POOL_SIZE ? nbytes : POOL_SIZE);
220             nfill = 0;
221         }
222         StringValue* sv = cast(StringValue*)&pools[npools - 1][nfill];
223         sv.ptrvalue = null;
224         sv.length = length;
225         .memcpy(sv.lstring(), s, length);
226         sv.lstring()[length] = 0;
227         const(uint32_t) vptr = cast(uint32_t)(npools << POOL_BITS | nfill);
228         nfill += nbytes + (-nbytes & 7); // align to 8 bytes
229         return vptr;
230     }
231 
232     extern (C++) StringValue* getValue(uint32_t vptr)
233     {
234         if (!vptr)
235             return null;
236         const(size_t) idx = (vptr >> POOL_BITS) - 1;
237         const(size_t) off = vptr & POOL_SIZE - 1;
238         return cast(StringValue*)&pools[idx][off];
239     }
240 
241     extern (C++) size_t findSlot(hash_t hash, const(char)* s, size_t length)
242     {
243         // quadratic probing using triangular numbers
244         // http://stackoverflow.com/questions/2348187/moving-from-linear-probing-to-quadratic-probing-hash-collisons/2349774#2349774
245         for (size_t i = hash & (tabledim - 1), j = 1;; ++j)
246         {
247             StringValue* sv;
248             if (!table[i].vptr || table[i].hash == hash && (sv = getValue(table[i].vptr)).length == length && .memcmp(s, sv.lstring(), length) == 0)
249                 return i;
250             i = (i + j) & (tabledim - 1);
251         }
252     }
253 
254     extern (C++) void grow()
255     {
256         const(size_t) odim = tabledim;
257         StringEntry* otab = table;
258         tabledim *= 2;
259         table = cast(StringEntry*)mem.xcalloc(tabledim, (table[0]).sizeof);
260         for (size_t i = 0; i < odim; ++i)
261         {
262             StringEntry* se = &otab[i];
263             if (!se.vptr)
264                 continue;
265             StringValue* sv = getValue(se.vptr);
266             table[findSlot(se.hash, sv.lstring(), sv.length)] = *se;
267         }
268         mem.xfree(otab);
269     }
270 }