|
1 /* |
|
2 * Copyright (c) 2004 Tim J. Robbins. |
|
3 * All rights reserved. |
|
4 * |
|
5 * Redistribution and use in source and binary forms, with or without |
|
6 * modification, are permitted provided that the following conditions |
|
7 * are met: |
|
8 * 1. Redistributions of source code must retain the above copyright |
|
9 * notice, this list of conditions and the following disclaimer. |
|
10 * 2. Redistributions in binary form must reproduce the above copyright |
|
11 * notice, this list of conditions and the following disclaimer in the |
|
12 * documentation and/or other materials provided with the distribution. |
|
13 * |
|
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND |
|
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
|
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
|
17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE |
|
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
|
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
|
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
|
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
|
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
|
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
|
24 * SUCH DAMAGE. |
|
25 */ |
|
26 /* |
|
27 * "Set of characters" ADT implemented as a splay tree of extents, with |
|
28 * a lookup table cache to simplify looking up the first bunch of |
|
29 * characters (which are presumably more common than others). |
|
30 */ |
|
31 |
|
32 #include <assert.h> |
|
33 #include <stdbool.h> |
|
34 #include <stdlib.h> |
|
35 #include <wchar.h> |
|
36 #include <wctype.h> |
|
37 #include "cset.h" |
|
38 |
|
39 static struct csnode *cset_delete(struct csnode *, wchar_t); |
|
40 static int cset_rangecmp(struct csnode *, wchar_t); |
|
41 static struct csnode *cset_splay(struct csnode *, wchar_t); |
|
42 |
|
43 /* |
|
44 * cset_alloc -- |
|
45 * Allocate a set of characters. |
|
46 */ |
|
47 struct cset * |
|
48 cset_alloc(void) |
|
49 { |
|
50 struct cset *cs; |
|
51 |
|
52 if ((cs = malloc(sizeof (*cs))) == NULL) |
|
53 return (NULL); |
|
54 cs->cs_root = NULL; |
|
55 cs->cs_classes = NULL; |
|
56 cs->cs_havecache = false; |
|
57 cs->cs_invert = false; |
|
58 return (cs); |
|
59 } |
|
60 |
|
61 /* |
|
62 * cset_add -- |
|
63 * Add a character to the set. |
|
64 */ |
|
65 bool |
|
66 cset_add(struct cset *cs, wchar_t ch) |
|
67 { |
|
68 struct csnode *csn, *ncsn; |
|
69 wchar_t oval; |
|
70 |
|
71 cs->cs_havecache = false; |
|
72 |
|
73 /* |
|
74 * Inserting into empty tree; new item becomes the root. |
|
75 */ |
|
76 if (cs->cs_root == NULL) { |
|
77 csn = malloc(sizeof (*cs->cs_root)); |
|
78 if (csn == NULL) |
|
79 return (false); |
|
80 csn->csn_left = csn->csn_right = NULL; |
|
81 csn->csn_min = csn->csn_max = ch; |
|
82 cs->cs_root = csn; |
|
83 return (true); |
|
84 } |
|
85 |
|
86 /* |
|
87 * Splay to check whether the item already exists, and otherwise, |
|
88 * where we should put it. |
|
89 */ |
|
90 csn = cs->cs_root = cset_splay(cs->cs_root, ch); |
|
91 |
|
92 /* |
|
93 * Avoid adding duplicate nodes. |
|
94 */ |
|
95 if (cset_rangecmp(csn, ch) == 0) |
|
96 return (true); |
|
97 |
|
98 /* |
|
99 * Allocate a new node and make it the new root. |
|
100 */ |
|
101 ncsn = malloc(sizeof (*ncsn)); |
|
102 if (ncsn == NULL) |
|
103 return (false); |
|
104 ncsn->csn_min = ncsn->csn_max = ch; |
|
105 if (cset_rangecmp(csn, ch) < 0) { |
|
106 ncsn->csn_left = csn->csn_left; |
|
107 ncsn->csn_right = csn; |
|
108 csn->csn_left = NULL; |
|
109 } else { |
|
110 ncsn->csn_right = csn->csn_right; |
|
111 ncsn->csn_left = csn; |
|
112 csn->csn_right = NULL; |
|
113 } |
|
114 cs->cs_root = ncsn; |
|
115 |
|
116 /* |
|
117 * Coalesce with left and right neighbours if possible. |
|
118 */ |
|
119 if (ncsn->csn_left != NULL) { |
|
120 ncsn->csn_left = cset_splay(ncsn->csn_left, ncsn->csn_min - 1); |
|
121 if (ncsn->csn_left->csn_max == ncsn->csn_min - 1) { |
|
122 oval = ncsn->csn_left->csn_min; |
|
123 ncsn->csn_left = cset_delete(ncsn->csn_left, |
|
124 ncsn->csn_left->csn_min); |
|
125 ncsn->csn_min = oval; |
|
126 } |
|
127 } |
|
128 if (ncsn->csn_right != NULL) { |
|
129 ncsn->csn_right = cset_splay(ncsn->csn_right, |
|
130 ncsn->csn_max + 1); |
|
131 if (ncsn->csn_right->csn_min == ncsn->csn_max + 1) { |
|
132 oval = ncsn->csn_right->csn_max; |
|
133 ncsn->csn_right = cset_delete(ncsn->csn_right, |
|
134 ncsn->csn_right->csn_min); |
|
135 ncsn->csn_max = oval; |
|
136 } |
|
137 } |
|
138 |
|
139 return (true); |
|
140 } |
|
141 |
|
142 /* |
|
143 * cset_in_hard -- |
|
144 * Determine whether a character is in the set without using |
|
145 * the cache. |
|
146 */ |
|
147 bool |
|
148 cset_in_hard(struct cset *cs, wchar_t ch) |
|
149 { |
|
150 struct csclass *csc; |
|
151 |
|
152 for (csc = cs->cs_classes; csc != NULL; csc = csc->csc_next) |
|
153 if ((csc->csc_invert ^ iswctype(ch, csc->csc_type)) != 0) |
|
154 return (cs->cs_invert ^ true); |
|
155 if (cs->cs_root != NULL) { |
|
156 cs->cs_root = cset_splay(cs->cs_root, ch); |
|
157 return ((cs->cs_invert ^ cset_rangecmp(cs->cs_root, ch)) == 0); |
|
158 } |
|
159 return (cs->cs_invert ^ false); |
|
160 } |
|
161 |
|
162 /* |
|
163 * cset_cache -- |
|
164 * Update the cache. |
|
165 */ |
|
166 void |
|
167 cset_cache(struct cset *cs) |
|
168 { |
|
169 wchar_t i; |
|
170 |
|
171 for (i = 0; i < CS_CACHE_SIZE; i++) |
|
172 cs->cs_cache[i] = cset_in_hard(cs, i); |
|
173 |
|
174 cs->cs_havecache = true; |
|
175 } |
|
176 |
|
177 /* |
|
178 * cset_invert -- |
|
179 * Invert the character set. |
|
180 */ |
|
181 void |
|
182 cset_invert(struct cset *cs) |
|
183 { |
|
184 |
|
185 cs->cs_invert ^= true; |
|
186 cs->cs_havecache = false; |
|
187 } |
|
188 |
|
189 /* |
|
190 * cset_addclass -- |
|
191 * Add a wctype()-style character class to the set, optionally |
|
192 * inverting it. |
|
193 */ |
|
194 bool |
|
195 cset_addclass(struct cset *cs, wctype_t type, bool invert) |
|
196 { |
|
197 struct csclass *csc; |
|
198 |
|
199 csc = malloc(sizeof (*csc)); |
|
200 if (csc == NULL) |
|
201 return (false); |
|
202 csc->csc_type = type; |
|
203 csc->csc_invert = invert; |
|
204 csc->csc_next = cs->cs_classes; |
|
205 cs->cs_classes = csc; |
|
206 cs->cs_havecache = false; |
|
207 return (true); |
|
208 } |
|
209 |
|
210 static int |
|
211 cset_rangecmp(struct csnode *t, wchar_t ch) |
|
212 { |
|
213 |
|
214 if (ch < t->csn_min) |
|
215 return (-1); |
|
216 if (ch > t->csn_max) |
|
217 return (1); |
|
218 return (0); |
|
219 } |
|
220 |
|
221 static struct csnode * |
|
222 cset_splay(struct csnode *t, wchar_t ch) |
|
223 { |
|
224 struct csnode N, *l, *r, *y; |
|
225 |
|
226 /* |
|
227 * Based on public domain code from Sleator. |
|
228 */ |
|
229 |
|
230 assert(t != NULL); |
|
231 |
|
232 N.csn_left = N.csn_right = NULL; |
|
233 l = r = &N; |
|
234 for (;;) { |
|
235 if (cset_rangecmp(t, ch) < 0) { |
|
236 if (t->csn_left != NULL && |
|
237 cset_rangecmp(t->csn_left, ch) < 0) { |
|
238 y = t->csn_left; |
|
239 t->csn_left = y->csn_right; |
|
240 y->csn_right = t; |
|
241 t = y; |
|
242 } |
|
243 if (t->csn_left == NULL) |
|
244 break; |
|
245 r->csn_left = t; |
|
246 r = t; |
|
247 t = t->csn_left; |
|
248 } else if (cset_rangecmp(t, ch) > 0) { |
|
249 if (t->csn_right != NULL && |
|
250 cset_rangecmp(t->csn_right, ch) > 0) { |
|
251 y = t->csn_right; |
|
252 t->csn_right = y->csn_left; |
|
253 y->csn_left = t; |
|
254 t = y; |
|
255 } |
|
256 if (t->csn_right == NULL) |
|
257 break; |
|
258 l->csn_right = t; |
|
259 l = t; |
|
260 t = t->csn_right; |
|
261 } else |
|
262 break; |
|
263 } |
|
264 l->csn_right = t->csn_left; |
|
265 r->csn_left = t->csn_right; |
|
266 t->csn_left = N.csn_right; |
|
267 t->csn_right = N.csn_left; |
|
268 return (t); |
|
269 } |
|
270 |
|
271 static struct csnode * |
|
272 cset_delete(struct csnode *t, wchar_t ch) |
|
273 { |
|
274 struct csnode *x; |
|
275 |
|
276 assert(t != NULL); |
|
277 t = cset_splay(t, ch); |
|
278 assert(cset_rangecmp(t, ch) == 0); |
|
279 if (t->csn_left == NULL) |
|
280 x = t->csn_right; |
|
281 else { |
|
282 x = cset_splay(t->csn_left, ch); |
|
283 x->csn_right = t->csn_right; |
|
284 } |
|
285 free(t); |
|
286 return (x); |
|
287 } |