Mercurial > hg > early-roguelike
comparison urogue/dict.c @ 256:c495a4f288c6
Import UltraRogue from the Roguelike Restoration Project (r1490)
author | John "Elwin" Edwards |
---|---|
date | Tue, 31 Jan 2017 19:56:04 -0500 |
parents | |
children | 13b482bd9e66 |
comparison
equal
deleted
inserted
replaced
253:d9badb9c0179 | 256:c495a4f288c6 |
---|---|
1 /* | |
2 dict.c | |
3 | |
4 UltraRogue: The Ultimate Adventure in the Dungeons of Doom | |
5 Copyright (C) 1995 Herb Chong | |
6 All rights reserved. | |
7 | |
8 See the file LICENSE.TXT for full copyright and licensing information. | |
9 */ | |
10 | |
11 /****************** | |
12 ** Change history: | |
13 ** | |
14 ** (AK:04/03/95) - In dict_create, initialize hook for extensions to dictionary structure. | |
15 ** (AK:04/04/95) - In dict_insert, only set any_ptr when a new string entry is created. | |
16 ** (AK:04/17/95) - Added dict_union and dict_merge. | |
17 ** (AK:04/18/95) - In dict_create, added code to create signature, | |
18 ** table of contents, and parameter array. | |
19 ** (AK:04/18/95) - Revised dict_load, dict_save | |
20 ** (AK:04/18/95) - Added dict_import | |
21 ** | |
22 ******************/ | |
23 | |
24 static char sccsid[] = "%W% %G%"; | |
25 | |
26 #include <stdlib.h> | |
27 #include <string.h> | |
28 #if !defined(OS2) && !defined(_WIN32) | |
29 #include <unistd.h> | |
30 #endif | |
31 | |
32 #include "dict.h" | |
33 #include "dictutil.h" | |
34 | |
35 #define BUFLEN 300 | |
36 | |
37 /************************************************************************* | |
38 * Generic dictionary and hash table functions for word and | |
39 * string storage. | |
40 * | |
41 * Implements generic dictionary management by using a | |
42 * hash table with direct chaining. All terms are stored in a | |
43 * separate, unsorted, string table. When inserting into the | |
44 * hash table exceeds the current size, allocate a new hash table | |
45 * and rehash the old contents into the new table. When the string | |
46 * table fills, append more empty entries onto the old table and | |
47 * continue adding. Inserting a string that already exists increments | |
48 * the count for the string. Deleting a string only decrements the | |
49 * count to no less than 0. This will allow a 0 occurence string to be | |
50 * retrieved. To remove the 0 occurence string table entries, you must | |
51 * rebuild the dictionary. Merging from one dictionary to a new one will | |
52 * do this because it only copies nonzero occurences. | |
53 * | |
54 * Assumptions: | |
55 * unsigned long >= 32 bits | |
56 * int >= 16 bits | |
57 * char == 8 bits | |
58 * number of entries < 2^28 | |
59 *************************************************************************/ | |
60 | |
61 /************************************************************************* | |
62 * hash_string: calculate hash value for string, modified | |
63 * version of hashpjw() from the new Dragon book. | |
64 * | |
65 * s - string to calculate hash for | |
66 * | |
67 * Returns: hash value | |
68 *************************************************************************/ | |
69 static unsigned long hash_string(const char *s) | |
70 { | |
71 unsigned long h = 0; | |
72 unsigned long g = 0; | |
73 | |
74 while (*s) { | |
75 h <<= 4; | |
76 h += *s++; | |
77 if ((g = h & 0xf0000000) != 0) | |
78 h ^= g >> 24; | |
79 } | |
80 return h ^ g; | |
81 } | |
82 | |
83 /************************************************************************* | |
84 * dict_create: create a dictionary and initialize its structures | |
85 * | |
86 * toc_size - number of entries in table of contents ( >= 4 ) | |
87 * initial_string_count - number of strings to hold as initial allocation | |
88 * initial_hash_chains - size of hash table, must be power of 4 | |
89 * max_chain_length - max number of elements in hash chain before signalling growth | |
90 * | |
91 * Returns: pointer to dictionary | |
92 * NULL - error creating dictionary | |
93 * non-NULL - sucessful creation | |
94 *************************************************************************/ | |
95 | |
96 DICTIONARY *dict_create( | |
97 const long toc_size, | |
98 const long initial_string_count, | |
99 const long initial_hash_chains, | |
100 const long max_chain_length ) | |
101 { | |
102 DICTIONARY *dtemp = NULL; | |
103 STRING_ENTRY *stemp = NULL; | |
104 long *ltemp = NULL; | |
105 char *sltemp; | |
106 long i, j, tocsize; | |
107 | |
108 /* check for a power of 4 in number of initial hash entries */ | |
109 switch(initial_hash_chains) { | |
110 case 0x00000004: | |
111 case 0x00000010: | |
112 case 0x00000040: | |
113 case 0x00000100: | |
114 case 0x00000400: | |
115 case 0x00001000: | |
116 case 0x00004000: | |
117 case 0x00010000: | |
118 case 0x00040000: | |
119 case 0x00100000: | |
120 case 0x00400000: | |
121 case 0x01000000: | |
122 case 0x04000000: | |
123 break; | |
124 default: | |
125 return NULL; | |
126 } | |
127 | |
128 /* Allocate the dictionary structure */ | |
129 if ((dtemp = (DICTIONARY *)malloc(sizeof(DICTIONARY))) == NULL) | |
130 goto err_exit; | |
131 | |
132 /* string count has to be within range */ | |
133 j = initial_string_count; | |
134 if (j > 0x04000000) | |
135 return NULL; | |
136 | |
137 /* force a reasonable value */ | |
138 if (j < 100) | |
139 j = 100; | |
140 | |
141 /* Allocate the string table and string array */ | |
142 if ((stemp = (STRING_ENTRY *)malloc(sizeof(STRING_ENTRY) * j)) == NULL) | |
143 goto err_exit; | |
144 if ((sltemp = (char *)malloc(8 * j)) == NULL) | |
145 goto err_exit; | |
146 | |
147 /* Allocate the hash table */ | |
148 if ((ltemp = (long *)malloc(sizeof(long) * initial_hash_chains)) == NULL) | |
149 goto err_exit; | |
150 | |
151 /* Allocate the parameter array */ | |
152 if ( (dtemp->parm = (DICT_PARM_ENTRY *)malloc(14*sizeof(DICT_PARM_ENTRY))) == NULL) | |
153 goto err_exit; | |
154 | |
155 /* Allocate the signature structure */ | |
156 if ( (dtemp->sig=(DICT_SIG*)malloc(sizeof(DICT_SIG))) == NULL ) | |
157 goto err_exit; | |
158 | |
159 /* Allocate the Table of Contents */ | |
160 tocsize = toc_size; | |
161 if ( tocsize < 4 ) | |
162 tocsize = 4; | |
163 dtemp->sig->toc_size = tocsize; | |
164 if ( (dtemp->toc=(DICT_TOC_ENTRY*)malloc(dtemp->sig->toc_size*sizeof(DICT_TOC_ENTRY))) == NULL ) | |
165 goto err_exit; | |
166 | |
167 dtemp->check_value = DICT_VALIDATE; /* validation value */ | |
168 dtemp->flags = DICT_NONE; /* no flags set */ | |
169 dtemp->entry_count = 0; /* nothing in either table */ | |
170 | |
171 dtemp->string_table = stemp; /* connect string table */ | |
172 dtemp->string_max = j; /* size of string table */ | |
173 dtemp->scan_string_index = DICT_ENTRY_NONE; /* not pointing to anything in scan */ | |
174 dtemp->string_growth_count = 0; /* haven't grown any */ | |
175 | |
176 dtemp->string_array = sltemp; /* character array for strings */ | |
177 dtemp->array_size = j * 8; /* how many bytes available */ | |
178 dtemp->array_used = 0; /* nothing used yet */ | |
179 dtemp->array_growth_count = 0; /* haven't grown any */ | |
180 | |
181 /* force maximum hash chain length to a reasonable value */ | |
182 i = max_chain_length; | |
183 if (i < 1 || i > 200) | |
184 i = 200; | |
185 dtemp->chains = ltemp; /* connect hash table */ | |
186 dtemp->longest_chain_length = 0; /* nothing in table yet */ | |
187 dtemp->allowable_chain_length = i; /* chain length limit */ | |
188 dtemp->table_size = initial_hash_chains; /* size of hash table */ | |
189 dtemp->hash_mask = initial_hash_chains - 1; /* mask for mod() function */ | |
190 dtemp->hash_growth_count = 0; /* haven't grown any */ | |
191 | |
192 /* string table is empty */ | |
193 for (i = 0 ; i < j ; i++) { | |
194 dtemp->string_table[i].string_offset = DICT_ENTRY_NONE; | |
195 dtemp->string_table[i].count = 0; | |
196 dtemp->string_table[i].next = DICT_ENTRY_NONE; | |
197 dtemp->string_table[i].flags = 0; | |
198 dtemp->string_table[i].hash_value = 0; | |
199 } | |
200 | |
201 /* no entries chained */ | |
202 for (i = 0; i < initial_hash_chains; i++) | |
203 dtemp->chains[i] = DICT_ENTRY_NONE; | |
204 | |
205 /* Initialize hook to extended dictionary structure */ | |
206 dtemp->ext = NULL; | |
207 | |
208 /* Set up the parameter array */ | |
209 if ( dict_set_parm_ids(dtemp) == FALSE ) | |
210 goto err_exit; | |
211 if ( dict_set_parm_values(dtemp) == FALSE ) | |
212 goto err_exit; | |
213 | |
214 /* Set up the Table of Contents */ | |
215 strcpy( dtemp->toc[0].id , "PARM" ); | |
216 dtemp->toc[0].size = dtemp->sig->nparms * sizeof(DICT_PARM_ENTRY); | |
217 dtemp->toc[0].ptr = dtemp->parm; | |
218 dtemp->toc[0].type = 0; | |
219 | |
220 strcpy( dtemp->toc[1].id , "HASH" ); | |
221 dtemp->toc[1].size = dtemp->table_size * sizeof(long); | |
222 dtemp->toc[1].ptr = dtemp->chains; | |
223 dtemp->toc[1].type = 0; | |
224 | |
225 strcpy( dtemp->toc[2].id , "STTB" ); | |
226 dtemp->toc[2].size = dtemp->string_max * sizeof(char); | |
227 dtemp->toc[2].ptr = dtemp->string_table; | |
228 dtemp->toc[2].type = 0; | |
229 | |
230 strcpy( dtemp->toc[3].id , "STAR" ); | |
231 dtemp->toc[3].size = dtemp->array_size * sizeof(STRING_ENTRY); | |
232 dtemp->toc[3].ptr = dtemp->string_array; | |
233 dtemp->toc[3].type = 0; | |
234 | |
235 /* Set up the signature */ | |
236 dtemp->sig->check_value = DICT_VALIDATE; | |
237 dtemp->sig->toc_size = tocsize; | |
238 dtemp->sig->nparms = 14; | |
239 | |
240 return dtemp; | |
241 | |
242 /* error exit - saves duplicate code */ | |
243 err_exit: | |
244 | |
245 dict_destroy( dtemp ); | |
246 return NULL; | |
247 } | |
248 | |
249 /************************************************************************* | |
250 * dict_destroy: discard a dictionary and its contents | |
251 * multiple calls to destroy the same dictionary is safe | |
252 * | |
253 * dict - pointer to a dictionary to discard | |
254 * | |
255 * Returns: status code | |
256 * TRUE - dictionary destroyed | |
257 * FALSE - error when destroying dictionary | |
258 * Note: Does free the dictionary structure itself. | |
259 *************************************************************************/ | |
260 BOOLEANC dict_destroy(DICTIONARY *dict) | |
261 { | |
262 int i; | |
263 | |
264 /* have to point to something */ | |
265 if (dict == NULL) | |
266 return FALSE; | |
267 | |
268 /* check value has to be OK */ | |
269 if (dict->check_value != DICT_VALIDATE) | |
270 return FALSE; | |
271 | |
272 if ( (dict->sig==NULL) || (dict->toc==NULL) || (dict->parm==NULL) ) | |
273 return FALSE; | |
274 | |
275 /* Free the type=0 tables */ | |
276 for ( i = 0 ; i < dict->sig->toc_size ; i++ ) { | |
277 if ( dict->toc[i].ptr != NULL && dict->toc[i].type == 0 ) | |
278 free( dict->toc[i].ptr ); | |
279 } /* endfor */ | |
280 | |
281 /* Free the Table of Contents and signature */ | |
282 free( dict->toc ); | |
283 free( dict->sig ); | |
284 | |
285 /* Free the dictionary structure itself */ | |
286 free( dict ); | |
287 | |
288 return TRUE; | |
289 } | |
290 | |
291 /************************************************************************* | |
292 * dict_insert: add entries into the dictionary, growing as necessary | |
293 * | |
294 * dict - pointer to dictionary to insert into | |
295 * s - string to insert | |
296 * count - count of occurences of string | |
297 * flags - flag bits associated with the string | |
298 * number - pointer to long to return word number | |
299 * | |
300 * Returns: pointer to new entry in dictionary | |
301 * NULL - error during insert | |
302 * non-NULL - pointer to inserted or updated entry | |
303 * | |
304 * Notes: if the entry already exists, increment the count. | |
305 * If NULL is returned, the dictionary state can no longer | |
306 * be trusted. Terminating with an error code would be | |
307 * safest. | |
308 *************************************************************************/ | |
309 STRING_ENTRY *dict_insert(DICTIONARY *dict, char *s, | |
310 const long occurences, const unsigned long flags, | |
311 void *any_ptr, long *number) | |
312 { | |
313 unsigned long hash_value; | |
314 long hash_index; | |
315 long string_index = -1; | |
316 long he2; | |
317 int chain_len; | |
318 | |
319 /* have to point to something */ | |
320 if (dict == NULL) | |
321 return FALSE; | |
322 | |
323 /* check value has to be OK */ | |
324 if (dict->check_value != DICT_VALIDATE) | |
325 return FALSE; | |
326 | |
327 /* must have a string */ | |
328 if (s == NULL) { | |
329 *number = -1; | |
330 return FALSE; | |
331 } | |
332 | |
333 /* must be non-NULL */ | |
334 if (s[0] == '\0') { | |
335 *number = -1; | |
336 return FALSE; | |
337 } | |
338 | |
339 /* figure out which chain it should go into */ | |
340 hash_value = hash_string(s); | |
341 hash_index = hash_value & dict->hash_mask; | |
342 | |
343 /* look for the entry in the chain */ | |
344 for (he2 = dict->chains[hash_index], chain_len = 0; he2 != DICT_ENTRY_NONE; | |
345 he2 = dict->string_table[he2].next, chain_len++) { | |
346 char *sa = &dict->string_array[dict->string_table[he2].string_offset]; | |
347 | |
348 /* compare hash_value and then string, in that order, for speed */ | |
349 if (dict->string_table[he2].hash_value == hash_value && !strcmp(s, sa)) { | |
350 string_index = he2; | |
351 break; | |
352 } | |
353 } | |
354 | |
355 /* string wasn't found */ | |
356 if (string_index < 0) { | |
357 /* make sure there is room in string entry table */ | |
358 if (dict->entry_count + 1 > dict->string_max) { | |
359 /* make new table 20% bigger */ | |
360 dict->string_max = (dict->string_max * 6) / 5; | |
361 dict->string_table = (STRING_ENTRY *)realloc(dict->string_table, | |
362 sizeof(STRING_ENTRY) * dict->string_max); | |
363 if (dict->string_table == NULL) | |
364 return NULL; | |
365 dict->string_growth_count++; | |
366 } | |
367 string_index = dict->entry_count; | |
368 dict->entry_count++; | |
369 | |
370 /* make sure there is room in the string array */ | |
371 if (dict->array_used + (long)strlen(s) + 1 > dict->array_size) { | |
372 dict->array_size = (dict->array_size * 6) / 5; | |
373 dict->string_array = (char *) realloc(dict->string_array, | |
374 dict->array_size); | |
375 if (dict->string_array == NULL) | |
376 return NULL; | |
377 } | |
378 | |
379 /* fill in starting values */ | |
380 strcpy(&dict->string_array[dict->array_used], s); | |
381 dict->string_table[string_index].string_offset = dict->array_used; | |
382 dict->array_used += (long) strlen(s) + 1; | |
383 dict->string_table[string_index].count = 0 ; | |
384 dict->string_table[string_index].flags = DICT_NONE; | |
385 dict->string_table[string_index].any_ptr = any_ptr; /* (AK:04/04/95) */ | |
386 dict->string_table[string_index].hash_value = hash_value; | |
387 | |
388 /* hook entry at beginning of chain */ | |
389 dict->string_table[string_index].next = dict->chains[hash_index]; | |
390 dict->chains[hash_index] = string_index; | |
391 | |
392 /* record chain lengths */ | |
393 chain_len++; | |
394 if (chain_len > dict->longest_chain_length) | |
395 dict->longest_chain_length = chain_len; | |
396 | |
397 /* if a chain is too long */ | |
398 if (chain_len > dict->allowable_chain_length) { | |
399 long new_size = dict->table_size * 4; | |
400 long new_mask = new_size - 1; | |
401 long *hetemp; | |
402 long i; | |
403 int longest_chain; | |
404 | |
405 if ((hetemp = (long *)malloc(sizeof(long) * new_size)) == NULL) | |
406 return NULL; | |
407 | |
408 /* hash table chains are empty */ | |
409 for (i = 0; i < new_size; i++) | |
410 hetemp[i] = DICT_ENTRY_NONE; | |
411 | |
412 /* reset all chains */ | |
413 for (i = 0; i < dict->entry_count; i++) | |
414 dict->string_table[i].next = DICT_ENTRY_NONE; | |
415 | |
416 /* recreate hash table entries by reinserting all strings */ | |
417 for (i = 0; i < dict->entry_count; i++) { | |
418 long he = dict->string_table[i].hash_value & new_mask; | |
419 | |
420 dict->string_table[i].next = hetemp[he]; | |
421 hetemp[he] = i; | |
422 } | |
423 | |
424 /* find longest chain length */ | |
425 for (i = 0, longest_chain = 0; i < new_size; i++) { | |
426 int len; | |
427 long cur; | |
428 | |
429 for (cur = hetemp[i], len = 0; cur != DICT_ENTRY_NONE; | |
430 cur = dict->string_table[cur].next) | |
431 len++; | |
432 ; | |
433 if (len > longest_chain) | |
434 longest_chain = len; | |
435 } | |
436 | |
437 /* delete old table and attach new one */ | |
438 free(dict->chains); | |
439 dict->chains = hetemp; | |
440 dict->longest_chain_length = longest_chain; | |
441 dict->table_size = new_size; | |
442 dict->hash_mask = new_mask; | |
443 | |
444 /* keep track of growth */ | |
445 dict->hash_growth_count++; | |
446 } | |
447 } | |
448 | |
449 dict->string_table[string_index].count += occurences; | |
450 dict->string_table[string_index].flags |= flags; | |
451 *number = string_index; | |
452 | |
453 return &dict->string_table[string_index]; | |
454 } | |
455 | |
456 /************************************************************************* | |
457 * dict_delete: deletes an entry from the dictionary | |
458 * (Actually, only decrements the entry's count) | |
459 * | |
460 * dict - pointer to dictionary to delete | |
461 * s - string to find and delete | |
462 * count - count to decrement entry by | |
463 * | |
464 * Returns: status code | |
465 * TRUE - entry has been deleted | |
466 * FALSE - entry wasn't found or error occured | |
467 *************************************************************************/ | |
468 BOOLEANC dict_delete(const DICTIONARY *dict, const char *s, const long count) | |
469 { | |
470 STRING_ENTRY *se; | |
471 long n; | |
472 | |
473 /* find the string */ | |
474 if ((se = dict_search(dict, s, &n)) == NULL) | |
475 return FALSE; | |
476 | |
477 /* decrement count and make sure it stays valid */ | |
478 se->count -= count; | |
479 if (se->count < 0) | |
480 se->count = 0; | |
481 return TRUE; | |
482 } | |
483 | |
484 /************************************************************************* | |
485 * dict_search: look for entries in the dictionary | |
486 * | |
487 * dict - pointer to dictionary to search | |
488 * s - string to search for | |
489 * number - pointer to long to return string number | |
490 * | |
491 * Returns: pointer to string entry | |
492 * NULL - entry not found | |
493 * non-NULL - pointer to string entry | |
494 *************************************************************************/ | |
495 STRING_ENTRY *dict_search(const DICTIONARY *dict, const char *s, | |
496 long *number) | |
497 { | |
498 unsigned long hash_value; | |
499 long hash_index; | |
500 long string_index = -1; | |
501 long he; | |
502 | |
503 /* have to point to something */ | |
504 if (dict == NULL) | |
505 return NULL; | |
506 | |
507 /* check value has to be OK */ | |
508 if (dict->check_value != DICT_VALIDATE) | |
509 return NULL; | |
510 | |
511 /* must have a string */ | |
512 if (s == NULL) { | |
513 *number = -1; | |
514 return NULL; | |
515 } | |
516 | |
517 /* must be non-NULL */ | |
518 if (s[0] == '\0') { | |
519 *number = -1; | |
520 return FALSE; | |
521 } | |
522 | |
523 /* figure out which chain it should be in */ | |
524 hash_value = hash_string(s); | |
525 hash_index = hash_value & dict->hash_mask; | |
526 | |
527 /* look for the entry in the chain */ | |
528 for (he = dict->chains[hash_index]; he != DICT_ENTRY_NONE; | |
529 he = dict->string_table[he].next) { | |
530 char *sa = (char*)(&dict->string_array[dict->string_table[he].string_offset]); | |
531 | |
532 /* compare hash_value and then string, in that order, for speed */ | |
533 if (dict->string_table[he].hash_value == hash_value && !strcmp(s, sa)) { | |
534 string_index = he; | |
535 break; | |
536 } | |
537 } | |
538 if (string_index < 0) { | |
539 *number = -1; | |
540 return NULL; | |
541 } | |
542 | |
543 *number = string_index; | |
544 return dict->string_table+string_index; | |
545 } | |
546 | |
547 /************************************************************************* | |
548 * dict_union: merges contents of 2 dictionaries into | |
549 * a third | |
550 * | |
551 * dict1 - pointer to dictionary 1 | |
552 * dict2 - pointer to dictionary 2 | |
553 * | |
554 * Returns: dictionary pointer | |
555 * NULL - failed to merge | |
556 * non-NULL - pointer to new dictionary | |
557 * | |
558 * Notes: entries of the same string have their counts | |
559 * added and their flags ORed together. | |
560 *************************************************************************/ | |
561 DICTIONARY *dict_union( const DICTIONARY *dict1 , const DICTIONARY *dict2 ) | |
562 { | |
563 DICTIONARY *dict = NULL; | |
564 STRING_ENTRY *se, *se2; | |
565 long initial_string_count, initial_hash_chains, max_chain_length; | |
566 long i, string_index; | |
567 | |
568 /*********** | |
569 ** Initialize the new dictionary. | |
570 ***********/ | |
571 | |
572 if ( dict1==NULL || dict2==NULL ) | |
573 goto err_exit; | |
574 if ((dict=(DICTIONARY *)malloc(sizeof(DICTIONARY))) == NULL) | |
575 goto err_exit; | |
576 | |
577 initial_string_count = dict1->string_max; | |
578 initial_hash_chains = dict1->table_size; | |
579 max_chain_length = dict1->allowable_chain_length; | |
580 dict = dict_create( 4, | |
581 initial_string_count, | |
582 initial_hash_chains, | |
583 max_chain_length ); | |
584 | |
585 /*********** | |
586 ** Copy the entries from dict1 into the new dictionary. | |
587 ***********/ | |
588 | |
589 for ( i = 0 ; i < dict1->entry_count ; i++ ) { | |
590 se = (STRING_ENTRY*)&dict1->string_table[i]; | |
591 if ( se->count > 0 ) { | |
592 se2 = dict_insert( | |
593 dict, | |
594 dict1->string_array+se->string_offset, | |
595 se->count, | |
596 se->flags, | |
597 NULL, | |
598 &string_index ); | |
599 if ( se2 == NULL ) | |
600 goto err_exit; | |
601 } /* endif */ | |
602 } /* endfor */ | |
603 | |
604 /* Merge the entries from dict2 into the new dictionary. */ | |
605 if ( dict_merge(dict,dict2,FALSE) == FALSE ) | |
606 goto err_exit; | |
607 | |
608 /* Success. Return a pointer to the new dictionary. */ | |
609 return( dict ); | |
610 | |
611 /* Failure. Ignominiously erase our tracks and return NULL. */ | |
612 err_exit: | |
613 dict_destroy( dict ); | |
614 return NULL; | |
615 } | |
616 | |
617 /************************************************************************* | |
618 * dict_merge: merges the contents of a dictionary into | |
619 * another one, updating the contents of the destination | |
620 * | |
621 * dst - dictionary to update | |
622 * src - dictionary to add | |
623 * move - boolean flag | |
624 * TRUE - move to dest | |
625 * FALSE - copy to dest | |
626 * | |
627 * Returns: status code | |
628 * TRUE - merge completed | |
629 * FALSE - error on merge | |
630 * | |
631 * Notes: entries of the same string have their counts | |
632 * added. At the end of a move, src is empty. If there | |
633 * is an error during a move, the contents of both | |
634 * dictionaries cannot be trusted. | |
635 *************************************************************************/ | |
636 BOOLEANC dict_merge(const DICTIONARY *dst, const DICTIONARY *src, const BOOLEANC move) | |
637 { | |
638 STRING_ENTRY *se, *se2; | |
639 DICTIONARY *dict1, *dict2; | |
640 long i, string_index, index; | |
641 | |
642 dict1 = (DICTIONARY*)src; | |
643 dict2 = (DICTIONARY*)dst; | |
644 | |
645 /*********** | |
646 ** Copy the dictionary entries into the new dictionary. | |
647 ***********/ | |
648 | |
649 for ( i = 0 ; i < dict1->entry_count ; i++ ) { | |
650 se = (STRING_ENTRY*)&dict1->string_table[i]; | |
651 if ( se->count > 0 ) { | |
652 se2 = dict_insert( | |
653 dict2, | |
654 dict1->string_array+se->string_offset, | |
655 se->count, | |
656 se->flags, | |
657 NULL, | |
658 &string_index ); | |
659 if ( se2 == NULL ) | |
660 goto err_exit; | |
661 } /* endif */ | |
662 } /* endfor */ | |
663 | |
664 /*********** | |
665 ** Set up the dictionary parameter vector. | |
666 ***********/ | |
667 | |
668 if ( dict_set_parm_values(dict2) == FALSE ) | |
669 return( FALSE ); | |
670 | |
671 /*********** | |
672 ** Update the table of contents. | |
673 ** PARM HASH STTB STAR | |
674 ***********/ | |
675 | |
676 if ( (index=dict_toc_index(dict2,"HASH")) == -1) | |
677 goto err_exit; | |
678 dict2->toc[index].size = dict2->table_size * sizeof(long); | |
679 dict2->toc[index].ptr = dict2->chains; | |
680 | |
681 if ( (index=dict_toc_index(dict2,"STTB")) == -1) | |
682 goto err_exit; | |
683 dict2->toc[index].size = dict2->string_max * sizeof(char); | |
684 dict2->toc[index].ptr = dict2->string_table; | |
685 | |
686 if ( (index=dict_toc_index(dict2,"STAR")) == -1) | |
687 goto err_exit; | |
688 dict2->toc[index].size = dict2->array_size * sizeof(STRING_ENTRY); | |
689 dict2->toc[index].ptr = dict2->string_array; | |
690 | |
691 /*********** | |
692 ** Update the signature | |
693 ***********/ | |
694 | |
695 dict2->sig->checksum = | |
696 compute_checksum( 4*sizeof(DICT_TOC_ENTRY) , (char*)(dict2->toc) ); | |
697 | |
698 /*********** | |
699 ** If this is a move, destroy the source dictionary | |
700 ***********/ | |
701 | |
702 if ( move == TRUE ) | |
703 if ( dict_destroy(dict1) == FALSE ) | |
704 goto err_exit; | |
705 | |
706 /*********** | |
707 ** Success | |
708 ***********/ | |
709 | |
710 return( TRUE ); | |
711 | |
712 /*********** | |
713 ** Failure | |
714 ***********/ | |
715 | |
716 err_exit: | |
717 dict_destroy( dict2 ); | |
718 return FALSE; | |
719 } | |
720 | |
721 /************************************************************************* | |
722 * dict_string_by_number: return string pointer for | |
723 * a given string number | |
724 * | |
725 * number - string number | |
726 * | |
727 * Returns: pointer to string entry | |
728 * NULL - entry not found | |
729 * non-NULL - pointer to string entry | |
730 *************************************************************************/ | |
731 STRING_ENTRY *dict_string_by_number(const DICTIONARY *dict, const long number) | |
732 { | |
733 if (dict == NULL) | |
734 return NULL; | |
735 | |
736 /* check value has to be OK */ | |
737 if (dict->check_value != DICT_VALIDATE) | |
738 return NULL; | |
739 | |
740 /* string number has to be within range */ | |
741 if (number < 0 || number >= dict->entry_count) | |
742 return NULL; | |
743 | |
744 return dict->string_table+number; | |
745 } | |
746 | |
747 /************************************************************************* | |
748 * dict_scan_begin: begin a scan of the dictionary | |
749 * | |
750 * dict - pointer to dictionary to scan | |
751 * | |
752 * Returns: status code | |
753 * TRUE - scan initialized | |
754 * FALSE - error | |
755 * | |
756 * Notes: if a scan is already in progress, it is | |
757 * abandoned and the scan is reset to the | |
758 * beginning. | |
759 *************************************************************************/ | |
760 BOOLEANC dict_scan_begin(DICTIONARY *dict) | |
761 { | |
762 /* have to point to something */ | |
763 if (dict == NULL) | |
764 return FALSE; | |
765 | |
766 /* check value has to be OK */ | |
767 if (dict->check_value != DICT_VALIDATE) | |
768 return FALSE; | |
769 | |
770 /* point to first entry in string table */ | |
771 dict->scan_string_index = 0; | |
772 return TRUE; | |
773 } | |
774 | |
775 /************************************************************************* | |
776 * dict_scan_next: get the next entry in a scan | |
777 * | |
778 * dict - pointer to dictionary to continue scanning | |
779 * | |
780 * Returns: pointer to string entry | |
781 * NULL - no more entries | |
782 * non-NULL - next string entry | |
783 *************************************************************************/ | |
784 STRING_ENTRY *dict_scan_next(DICTIONARY *dict) | |
785 { | |
786 /* have to point to something */ | |
787 if (dict == NULL) | |
788 return NULL; | |
789 | |
790 /* check value has to be OK */ | |
791 if (dict->check_value != DICT_VALIDATE) | |
792 return NULL; | |
793 | |
794 /* scan index has to be within range */ | |
795 if (dict->scan_string_index < 0 | |
796 || dict->scan_string_index > dict->entry_count) | |
797 return NULL; | |
798 | |
799 /* for first non-empty table entry */ | |
800 while (dict->scan_string_index < dict->entry_count | |
801 && dict->string_table[dict->scan_string_index].count == 0) | |
802 dict->scan_string_index++; | |
803 | |
804 /* past end of table? */ | |
805 if (dict->scan_string_index >= dict->entry_count) | |
806 return NULL; | |
807 | |
808 return &dict->string_table[dict->scan_string_index++]; | |
809 } | |
810 | |
811 | |
812 /************************************************************************* | |
813 * dict_load - load a compiled dictionary into memory | |
814 * creates a new dictionary | |
815 * | |
816 * fname - fully qualified file name | |
817 * | |
818 * Returns: pointer to created dictionary structure | |
819 * (NULL on failure) | |
820 *************************************************************************/ | |
821 DICTIONARY *dict_load(const char *fname) | |
822 { | |
823 DICTIONARY *dict = NULL; | |
824 int code, index; | |
825 FILE *fi; | |
826 int ntoc; | |
827 | |
828 if ( (fi=fopen((char*)fname,"rb")) == NULL ) { | |
829 signal_error( "dict_load: could not open file" , (char*)fname , 0 ); | |
830 goto err_exit; | |
831 } /* endif */ | |
832 | |
833 if ((dict = (DICTIONARY *)malloc(sizeof(DICTIONARY))) == NULL) { | |
834 /* signal_error( "dict_load: alloc failed" , "" , 0 ); */ | |
835 goto err_exit; | |
836 } /* endif */ | |
837 | |
838 /* Read the dictionary signature record */ | |
839 if ((dict->sig = (DICT_SIG *)malloc(sizeof(DICT_SIG))) == NULL) { | |
840 goto err_exit; | |
841 } /* endif */ | |
842 code = block_read( fi , (char*)(dict->sig) , sizeof(DICT_SIG) , 0 ); | |
843 if ( code == -1 ) { | |
844 signal_error( "dict_load: could not read signature" , (char*)fname , 0 ); | |
845 goto err_exit; | |
846 } /* endif */ | |
847 | |
848 if ( dict->sig->check_value != DICT_VALIDATE ) { | |
849 signal_error( "dict_load: could not validate file" , (char*)fname , 0 ); | |
850 goto err_exit; | |
851 } /* endif */ | |
852 dict->check_value = dict->sig->check_value; | |
853 | |
854 /* Read the dictionary Table of Contents */ | |
855 ntoc = dict->sig->toc_size; | |
856 dict->toc = (DICT_TOC_ENTRY *) malloc( ntoc * sizeof(DICT_TOC_ENTRY) ); | |
857 if ( dict->toc == NULL ) { | |
858 signal_error( "dict_load: alloc of TOC failed" , "" , 0 ); | |
859 goto err_exit; | |
860 } /* endif */ | |
861 code = block_read( fi , | |
862 (char*)(dict->toc) , | |
863 ntoc * sizeof(DICT_TOC_ENTRY) , | |
864 sizeof(DICT_SIG) ); | |
865 if ( code == -1 ) { | |
866 signal_error( "dict_load: could not read Table of Contents" , (char*)fname , 0 ); | |
867 goto err_exit; | |
868 } /* endif */ | |
869 | |
870 /* Read the dictionary parameters */ | |
871 dict->parm = (DICT_PARM_ENTRY *) | |
872 dict_load_block( dict , "PARM" , fi , NULL ); | |
873 if ( dict->parm == NULL ) { | |
874 signal_error( "dict_load: could not load parameter table" , "" , 0 ); | |
875 goto err_exit; | |
876 } /* endif */ | |
877 index = dict_toc_index( dict , "PARM" ); | |
878 dict->sig->nparms = dict->toc[index].size / sizeof(DICT_PARM_ENTRY); | |
879 | |
880 /* Set the parameter values in the dictionary structure */ | |
881 if ( dict_set_parm_variables(dict) == FALSE ) | |
882 goto err_exit; | |
883 | |
884 /* Load the string array */ | |
885 dict->string_array = (char *) | |
886 dict_load_block( dict , "STAR" , fi , NULL ); | |
887 if ( dict->string_array == NULL ) { | |
888 signal_error( "dict_load: could not load string array" , (char*)fname , 0 ); | |
889 goto err_exit; | |
890 } /* endif */ | |
891 | |
892 /* Load the string table */ | |
893 dict->string_table = (STRING_ENTRY *) | |
894 dict_load_block( dict , "STTB" , fi , NULL ); | |
895 if ( dict->string_table == NULL ) { | |
896 signal_error( "dict_load: could not load string table" , (char*)fname , 0 ); | |
897 goto err_exit; | |
898 } /* endif */ | |
899 | |
900 /* Load the hash table */ | |
901 dict->chains = (long *) | |
902 dict_load_block( dict , "HASH" , fi , NULL ); | |
903 if ( dict->chains == NULL ) { | |
904 signal_error( "dict_load: could not load hash table" , (char*)fname , 0 ); | |
905 goto err_exit; | |
906 } /* endif */ | |
907 | |
908 /* Initialize the hook for dictionary extensions */ | |
909 dict->ext = NULL; | |
910 | |
911 /* Success */ | |
912 fclose( fi ); | |
913 return( dict ); | |
914 | |
915 /* Failure */ | |
916 err_exit: | |
917 if ( fi != NULL ) | |
918 fclose( fi ); | |
919 dict_destroy( dict ); | |
920 return NULL; | |
921 } | |
922 | |
923 | |
924 /************************************************************************* | |
925 * dict_save - save a dictionary from memory into a file | |
926 * | |
927 * dict - pointer to dictionary to save | |
928 * fname - full qualified file name prefix of dictionary | |
929 * | |
930 * Returns: status code | |
931 * TRUE - dictionary was saved sucessfully | |
932 * FALSE - error during save | |
933 *************************************************************************/ | |
934 BOOLEANC dict_save( DICTIONARY *dict, const char *fname ) | |
935 { | |
936 int index, ret_code; | |
937 FILE *fo = NULL; | |
938 | |
939 /* Have to be pointing at a valid dictionary */ | |
940 if ( dict == NULL || dict->sig->check_value != DICT_VALIDATE ) | |
941 goto err_exit; | |
942 | |
943 /* Open the file for output */ | |
944 if ( (fo=fopen((char*)fname,"wb")) == NULL ) | |
945 goto err_exit; | |
946 | |
947 /* Make the table of contents entries current */ | |
948 /* Note: This will not be necessary once the data is stored in EVECTORs */ | |
949 | |
950 if ( (index=dict_toc_index(dict,"PARM")) == -1 ) | |
951 goto err_exit; | |
952 dict->toc[index].size = dict->sig->nparms * sizeof(DICT_PARM_ENTRY); | |
953 dict->toc[index].ptr = dict->parm; | |
954 | |
955 if ( (index=dict_toc_index(dict,"STAR")) == -1 ) | |
956 goto err_exit; | |
957 dict->toc[index].size = dict->array_size * sizeof(char); | |
958 dict->toc[index].ptr = dict->string_array; | |
959 | |
960 if ( (index=dict_toc_index(dict,"STTB")) == -1 ) | |
961 goto err_exit; | |
962 dict->toc[index].size = dict->string_max * sizeof(STRING_ENTRY); | |
963 dict->toc[index].ptr = dict->string_table; | |
964 | |
965 if ( (index=dict_toc_index(dict,"HASH")) == -1 ) | |
966 goto err_exit; | |
967 dict->toc[index].size = dict->table_size * sizeof(long); | |
968 dict->toc[index].ptr = dict->chains; | |
969 | |
970 /* Reset the TOC offsets and checksums for ALL tables */ | |
971 /* (not just type=0 tables) */ | |
972 dict_reset_toc_offsets( dict ); | |
973 | |
974 /* Set the dictionary parm structure from the parameter values */ | |
975 if ( dict_set_parm_values(dict) == FALSE ) | |
976 goto err_exit; | |
977 | |
978 /* Save the signature */ | |
979 dict->sig->checksum = compute_checksum( sizeof(DICT_SIG) , (char*)(dict->sig) ); | |
980 ret_code = block_write( fo , | |
981 (char*)dict->sig , | |
982 sizeof(DICT_SIG) ); | |
983 if ( ret_code == -1 ) | |
984 goto err_exit; | |
985 | |
986 /* Save the table of contents */ | |
987 ret_code = block_write( fo, | |
988 (char*)dict->toc, | |
989 dict->sig->toc_size * sizeof(DICT_TOC_ENTRY) ); | |
990 if ( ret_code == -1 ) | |
991 goto err_exit; | |
992 | |
993 /* Save the tables */ | |
994 /* For now, only save type=0 tables */ | |
995 for ( index = 0 ; index < dict->sig->toc_size ; index++ ) { | |
996 if ( dict->toc[index].type == 0 ) { /* Ordinary table */ | |
997 ret_code = dict_save_block( dict , dict->toc[index].id , fo ); | |
998 if ( ret_code == FALSE ) | |
999 goto err_exit; | |
1000 } /* endif */ | |
1001 } /* endfor */ | |
1002 | |
1003 /* Success */ | |
1004 fclose( fo ); | |
1005 return TRUE; | |
1006 | |
1007 /* Failure */ | |
1008 err_exit: | |
1009 if ( fo != NULL ) | |
1010 fclose( fo ); | |
1011 return FALSE; | |
1012 } | |
1013 | |
1014 | |
1015 /************************************************************************* | |
1016 * dict_import: read in an ASCII dictionary. | |
1017 * | |
1018 * dict_fname - name of dictionary file | |
1019 * parameters to create a DICTIONARY structure (see dict_create) | |
1020 * | |
1021 * Returns: pointer to created DICTIONARY structure | |
1022 * (NULL on failure) | |
1023 * | |
1024 *************************************************************************/ | |
1025 DICTIONARY *dict_import( const char *dict_fname , | |
1026 const long initial_string_count , | |
1027 const long initial_hash_entries , | |
1028 const long max_chain_length ) | |
1029 { | |
1030 DICTIONARY *dict; | |
1031 char buffer[BUFLEN], ch; | |
1032 int index, c, c0; | |
1033 long number; | |
1034 FILE *fi = NULL; | |
1035 | |
1036 /*********** | |
1037 ** Dictionary setup. | |
1038 ***********/ | |
1039 | |
1040 dict = dict_create( 4, | |
1041 initial_string_count , | |
1042 initial_hash_entries , | |
1043 max_chain_length ); | |
1044 if ( dict == NULL ) | |
1045 goto err_exit; | |
1046 | |
1047 /*********** | |
1048 ** Read the dictionary file | |
1049 ** Each line should have one word or a string delimited by '|' | |
1050 ***********/ | |
1051 | |
1052 if ( (fi=fopen(dict_fname,"r")) == NULL ) | |
1053 goto err_exit; | |
1054 while( fgets(buffer,BUFLEN,fi) != NULL ) { | |
1055 c0 = 0; | |
1056 /* Skip to non-blank */ | |
1057 while ( (c0<BUFLEN-2) && (buffer[c0]==' ') ) ++c0; | |
1058 if ( buffer[c0] == '|' ) { | |
1059 c = ++c0; | |
1060 ch = '|'; | |
1061 } else { | |
1062 c = c0; | |
1063 ch = ' '; | |
1064 } /* endif */ | |
1065 /* Scan to blank or matching '|' */ | |
1066 while ( (c<BUFLEN-1) && (buffer[c]!='\0') && | |
1067 (buffer[c]!='\n') && (buffer[c]!=ch) ) | |
1068 ++c; | |
1069 buffer[c] = '\0'; | |
1070 /* Insert the word */ | |
1071 if ( dict_insert(dict,buffer+c0,1,0,NULL,&number) == NULL ) | |
1072 goto err_exit; | |
1073 } /* endwhile */ | |
1074 | |
1075 /*********** | |
1076 ** Fill in the dictionary parameter vector. | |
1077 ***********/ | |
1078 | |
1079 if ( dict_set_parm_values(dict) == FALSE ) | |
1080 goto err_exit; | |
1081 | |
1082 /*********** | |
1083 ** Update the table of contents for HASH STTB STAR | |
1084 ***********/ | |
1085 | |
1086 if ( (index=dict_toc_index(dict,"HASH")) == -1 ) | |
1087 goto err_exit; | |
1088 dict->toc[index].size = dict->table_size * sizeof(long); | |
1089 | |
1090 if ( (index=dict_toc_index(dict,"STTB")) == -1 ) | |
1091 goto err_exit; | |
1092 dict->toc[index].size = dict->string_max * sizeof(char); | |
1093 | |
1094 if ( (index=dict_toc_index(dict,"STAR")) == -1 ) | |
1095 goto err_exit; | |
1096 dict->toc[index].size = dict->array_size * sizeof(STRING_ENTRY); | |
1097 | |
1098 /* Success. Return a pointer to the new dictionary. */ | |
1099 fclose(fi); | |
1100 return( dict ); | |
1101 | |
1102 /* Failure. Ignominiously erase our tracks and return NULL. */ | |
1103 err_exit: | |
1104 if ( fi != NULL ) | |
1105 fclose(fi); | |
1106 dict_destroy( dict ); | |
1107 return NULL; | |
1108 } | |
1109 | |
1110 /************************************************************************* | |
1111 * dict_export - save an extended dictionary from memory into | |
1112 * an ASCII file | |
1113 * | |
1114 * dict - pointer to dictionary to save | |
1115 * fname - full qualified file name prefix of dictionary | |
1116 * | |
1117 * Returns: status code | |
1118 * TRUE - dictionary was saved sucessfully | |
1119 * FALSE - error during save | |
1120 * | |
1121 *************************************************************************/ | |
1122 BOOLEANC dict_export( DICTIONARY *dict , const char *fname ) | |
1123 { | |
1124 FILE *fp = NULL; | |
1125 STRING_ENTRY *se; | |
1126 int i; | |
1127 | |
1128 /* have to point to something */ | |
1129 if (dict == NULL) | |
1130 goto err_exit; | |
1131 | |
1132 /* check value has to be OK */ | |
1133 if (dict->check_value != DICT_VALIDATE) | |
1134 goto err_exit; | |
1135 | |
1136 /* must have a filename */ | |
1137 if (fname == NULL) | |
1138 goto err_exit; | |
1139 | |
1140 fp = fopen( (char*)fname , "w" ); | |
1141 if ( fp == NULL ) | |
1142 goto err_exit; | |
1143 | |
1144 for ( i = 0 ; i < dict->entry_count ; i++ ) { | |
1145 se = &dict->string_table[i]; | |
1146 fprintf( fp , "|%s|\n" , dict->string_array + se->string_offset ); | |
1147 } /* endfor */ | |
1148 fclose( fp ); | |
1149 | |
1150 /* Success. */ | |
1151 fclose(fp); | |
1152 return TRUE; | |
1153 | |
1154 /* Failure. */ | |
1155 err_exit: | |
1156 if ( fp != NULL ) | |
1157 fclose(fp); | |
1158 dict_destroy( dict ); | |
1159 return FALSE; | |
1160 } |