comparison urogue/dict.c @ 256:c495a4f288c6

Import UltraRogue from the Roguelike Restoration Project (r1490)
author John "Elwin" Edwards
date Tue, 31 Jan 2017 19:56:04 -0500
parents
children 13b482bd9e66
comparison
equal deleted inserted replaced
253:d9badb9c0179 256:c495a4f288c6
1 /*
2 dict.c
3
4 UltraRogue: The Ultimate Adventure in the Dungeons of Doom
5 Copyright (C) 1995 Herb Chong
6 All rights reserved.
7
8 See the file LICENSE.TXT for full copyright and licensing information.
9 */
10
11 /******************
12 ** Change history:
13 **
14 ** (AK:04/03/95) - In dict_create, initialize hook for extensions to dictionary structure.
15 ** (AK:04/04/95) - In dict_insert, only set any_ptr when a new string entry is created.
16 ** (AK:04/17/95) - Added dict_union and dict_merge.
17 ** (AK:04/18/95) - In dict_create, added code to create signature,
18 ** table of contents, and parameter array.
19 ** (AK:04/18/95) - Revised dict_load, dict_save
20 ** (AK:04/18/95) - Added dict_import
21 **
22 ******************/
23
24 static char sccsid[] = "%W% %G%";
25
26 #include <stdlib.h>
27 #include <string.h>
28 #if !defined(OS2) && !defined(_WIN32)
29 #include <unistd.h>
30 #endif
31
32 #include "dict.h"
33 #include "dictutil.h"
34
35 #define BUFLEN 300
36
37 /*************************************************************************
38 * Generic dictionary and hash table functions for word and
39 * string storage.
40 *
41 * Implements generic dictionary management by using a
42 * hash table with direct chaining. All terms are stored in a
43 * separate, unsorted, string table. When inserting into the
44 * hash table exceeds the current size, allocate a new hash table
45 * and rehash the old contents into the new table. When the string
46 * table fills, append more empty entries onto the old table and
47 * continue adding. Inserting a string that already exists increments
48 * the count for the string. Deleting a string only decrements the
49 * count to no less than 0. This will allow a 0 occurence string to be
50 * retrieved. To remove the 0 occurence string table entries, you must
51 * rebuild the dictionary. Merging from one dictionary to a new one will
52 * do this because it only copies nonzero occurences.
53 *
54 * Assumptions:
55 * unsigned long >= 32 bits
56 * int >= 16 bits
57 * char == 8 bits
58 * number of entries < 2^28
59 *************************************************************************/
60
61 /*************************************************************************
62 * hash_string: calculate hash value for string, modified
63 * version of hashpjw() from the new Dragon book.
64 *
65 * s - string to calculate hash for
66 *
67 * Returns: hash value
68 *************************************************************************/
69 static unsigned long hash_string(const char *s)
70 {
71 unsigned long h = 0;
72 unsigned long g = 0;
73
74 while (*s) {
75 h <<= 4;
76 h += *s++;
77 if ((g = h & 0xf0000000) != 0)
78 h ^= g >> 24;
79 }
80 return h ^ g;
81 }
82
83 /*************************************************************************
84 * dict_create: create a dictionary and initialize its structures
85 *
86 * toc_size - number of entries in table of contents ( >= 4 )
87 * initial_string_count - number of strings to hold as initial allocation
88 * initial_hash_chains - size of hash table, must be power of 4
89 * max_chain_length - max number of elements in hash chain before signalling growth
90 *
91 * Returns: pointer to dictionary
92 * NULL - error creating dictionary
93 * non-NULL - sucessful creation
94 *************************************************************************/
95
96 DICTIONARY *dict_create(
97 const long toc_size,
98 const long initial_string_count,
99 const long initial_hash_chains,
100 const long max_chain_length )
101 {
102 DICTIONARY *dtemp = NULL;
103 STRING_ENTRY *stemp = NULL;
104 long *ltemp = NULL;
105 char *sltemp;
106 long i, j, tocsize;
107
108 /* check for a power of 4 in number of initial hash entries */
109 switch(initial_hash_chains) {
110 case 0x00000004:
111 case 0x00000010:
112 case 0x00000040:
113 case 0x00000100:
114 case 0x00000400:
115 case 0x00001000:
116 case 0x00004000:
117 case 0x00010000:
118 case 0x00040000:
119 case 0x00100000:
120 case 0x00400000:
121 case 0x01000000:
122 case 0x04000000:
123 break;
124 default:
125 return NULL;
126 }
127
128 /* Allocate the dictionary structure */
129 if ((dtemp = (DICTIONARY *)malloc(sizeof(DICTIONARY))) == NULL)
130 goto err_exit;
131
132 /* string count has to be within range */
133 j = initial_string_count;
134 if (j > 0x04000000)
135 return NULL;
136
137 /* force a reasonable value */
138 if (j < 100)
139 j = 100;
140
141 /* Allocate the string table and string array */
142 if ((stemp = (STRING_ENTRY *)malloc(sizeof(STRING_ENTRY) * j)) == NULL)
143 goto err_exit;
144 if ((sltemp = (char *)malloc(8 * j)) == NULL)
145 goto err_exit;
146
147 /* Allocate the hash table */
148 if ((ltemp = (long *)malloc(sizeof(long) * initial_hash_chains)) == NULL)
149 goto err_exit;
150
151 /* Allocate the parameter array */
152 if ( (dtemp->parm = (DICT_PARM_ENTRY *)malloc(14*sizeof(DICT_PARM_ENTRY))) == NULL)
153 goto err_exit;
154
155 /* Allocate the signature structure */
156 if ( (dtemp->sig=(DICT_SIG*)malloc(sizeof(DICT_SIG))) == NULL )
157 goto err_exit;
158
159 /* Allocate the Table of Contents */
160 tocsize = toc_size;
161 if ( tocsize < 4 )
162 tocsize = 4;
163 dtemp->sig->toc_size = tocsize;
164 if ( (dtemp->toc=(DICT_TOC_ENTRY*)malloc(dtemp->sig->toc_size*sizeof(DICT_TOC_ENTRY))) == NULL )
165 goto err_exit;
166
167 dtemp->check_value = DICT_VALIDATE; /* validation value */
168 dtemp->flags = DICT_NONE; /* no flags set */
169 dtemp->entry_count = 0; /* nothing in either table */
170
171 dtemp->string_table = stemp; /* connect string table */
172 dtemp->string_max = j; /* size of string table */
173 dtemp->scan_string_index = DICT_ENTRY_NONE; /* not pointing to anything in scan */
174 dtemp->string_growth_count = 0; /* haven't grown any */
175
176 dtemp->string_array = sltemp; /* character array for strings */
177 dtemp->array_size = j * 8; /* how many bytes available */
178 dtemp->array_used = 0; /* nothing used yet */
179 dtemp->array_growth_count = 0; /* haven't grown any */
180
181 /* force maximum hash chain length to a reasonable value */
182 i = max_chain_length;
183 if (i < 1 || i > 200)
184 i = 200;
185 dtemp->chains = ltemp; /* connect hash table */
186 dtemp->longest_chain_length = 0; /* nothing in table yet */
187 dtemp->allowable_chain_length = i; /* chain length limit */
188 dtemp->table_size = initial_hash_chains; /* size of hash table */
189 dtemp->hash_mask = initial_hash_chains - 1; /* mask for mod() function */
190 dtemp->hash_growth_count = 0; /* haven't grown any */
191
192 /* string table is empty */
193 for (i = 0 ; i < j ; i++) {
194 dtemp->string_table[i].string_offset = DICT_ENTRY_NONE;
195 dtemp->string_table[i].count = 0;
196 dtemp->string_table[i].next = DICT_ENTRY_NONE;
197 dtemp->string_table[i].flags = 0;
198 dtemp->string_table[i].hash_value = 0;
199 }
200
201 /* no entries chained */
202 for (i = 0; i < initial_hash_chains; i++)
203 dtemp->chains[i] = DICT_ENTRY_NONE;
204
205 /* Initialize hook to extended dictionary structure */
206 dtemp->ext = NULL;
207
208 /* Set up the parameter array */
209 if ( dict_set_parm_ids(dtemp) == FALSE )
210 goto err_exit;
211 if ( dict_set_parm_values(dtemp) == FALSE )
212 goto err_exit;
213
214 /* Set up the Table of Contents */
215 strcpy( dtemp->toc[0].id , "PARM" );
216 dtemp->toc[0].size = dtemp->sig->nparms * sizeof(DICT_PARM_ENTRY);
217 dtemp->toc[0].ptr = dtemp->parm;
218 dtemp->toc[0].type = 0;
219
220 strcpy( dtemp->toc[1].id , "HASH" );
221 dtemp->toc[1].size = dtemp->table_size * sizeof(long);
222 dtemp->toc[1].ptr = dtemp->chains;
223 dtemp->toc[1].type = 0;
224
225 strcpy( dtemp->toc[2].id , "STTB" );
226 dtemp->toc[2].size = dtemp->string_max * sizeof(char);
227 dtemp->toc[2].ptr = dtemp->string_table;
228 dtemp->toc[2].type = 0;
229
230 strcpy( dtemp->toc[3].id , "STAR" );
231 dtemp->toc[3].size = dtemp->array_size * sizeof(STRING_ENTRY);
232 dtemp->toc[3].ptr = dtemp->string_array;
233 dtemp->toc[3].type = 0;
234
235 /* Set up the signature */
236 dtemp->sig->check_value = DICT_VALIDATE;
237 dtemp->sig->toc_size = tocsize;
238 dtemp->sig->nparms = 14;
239
240 return dtemp;
241
242 /* error exit - saves duplicate code */
243 err_exit:
244
245 dict_destroy( dtemp );
246 return NULL;
247 }
248
249 /*************************************************************************
250 * dict_destroy: discard a dictionary and its contents
251 * multiple calls to destroy the same dictionary is safe
252 *
253 * dict - pointer to a dictionary to discard
254 *
255 * Returns: status code
256 * TRUE - dictionary destroyed
257 * FALSE - error when destroying dictionary
258 * Note: Does free the dictionary structure itself.
259 *************************************************************************/
260 BOOLEANC dict_destroy(DICTIONARY *dict)
261 {
262 int i;
263
264 /* have to point to something */
265 if (dict == NULL)
266 return FALSE;
267
268 /* check value has to be OK */
269 if (dict->check_value != DICT_VALIDATE)
270 return FALSE;
271
272 if ( (dict->sig==NULL) || (dict->toc==NULL) || (dict->parm==NULL) )
273 return FALSE;
274
275 /* Free the type=0 tables */
276 for ( i = 0 ; i < dict->sig->toc_size ; i++ ) {
277 if ( dict->toc[i].ptr != NULL && dict->toc[i].type == 0 )
278 free( dict->toc[i].ptr );
279 } /* endfor */
280
281 /* Free the Table of Contents and signature */
282 free( dict->toc );
283 free( dict->sig );
284
285 /* Free the dictionary structure itself */
286 free( dict );
287
288 return TRUE;
289 }
290
291 /*************************************************************************
292 * dict_insert: add entries into the dictionary, growing as necessary
293 *
294 * dict - pointer to dictionary to insert into
295 * s - string to insert
296 * count - count of occurences of string
297 * flags - flag bits associated with the string
298 * number - pointer to long to return word number
299 *
300 * Returns: pointer to new entry in dictionary
301 * NULL - error during insert
302 * non-NULL - pointer to inserted or updated entry
303 *
304 * Notes: if the entry already exists, increment the count.
305 * If NULL is returned, the dictionary state can no longer
306 * be trusted. Terminating with an error code would be
307 * safest.
308 *************************************************************************/
309 STRING_ENTRY *dict_insert(DICTIONARY *dict, char *s,
310 const long occurences, const unsigned long flags,
311 void *any_ptr, long *number)
312 {
313 unsigned long hash_value;
314 long hash_index;
315 long string_index = -1;
316 long he2;
317 int chain_len;
318
319 /* have to point to something */
320 if (dict == NULL)
321 return FALSE;
322
323 /* check value has to be OK */
324 if (dict->check_value != DICT_VALIDATE)
325 return FALSE;
326
327 /* must have a string */
328 if (s == NULL) {
329 *number = -1;
330 return FALSE;
331 }
332
333 /* must be non-NULL */
334 if (s[0] == '\0') {
335 *number = -1;
336 return FALSE;
337 }
338
339 /* figure out which chain it should go into */
340 hash_value = hash_string(s);
341 hash_index = hash_value & dict->hash_mask;
342
343 /* look for the entry in the chain */
344 for (he2 = dict->chains[hash_index], chain_len = 0; he2 != DICT_ENTRY_NONE;
345 he2 = dict->string_table[he2].next, chain_len++) {
346 char *sa = &dict->string_array[dict->string_table[he2].string_offset];
347
348 /* compare hash_value and then string, in that order, for speed */
349 if (dict->string_table[he2].hash_value == hash_value && !strcmp(s, sa)) {
350 string_index = he2;
351 break;
352 }
353 }
354
355 /* string wasn't found */
356 if (string_index < 0) {
357 /* make sure there is room in string entry table */
358 if (dict->entry_count + 1 > dict->string_max) {
359 /* make new table 20% bigger */
360 dict->string_max = (dict->string_max * 6) / 5;
361 dict->string_table = (STRING_ENTRY *)realloc(dict->string_table,
362 sizeof(STRING_ENTRY) * dict->string_max);
363 if (dict->string_table == NULL)
364 return NULL;
365 dict->string_growth_count++;
366 }
367 string_index = dict->entry_count;
368 dict->entry_count++;
369
370 /* make sure there is room in the string array */
371 if (dict->array_used + (long)strlen(s) + 1 > dict->array_size) {
372 dict->array_size = (dict->array_size * 6) / 5;
373 dict->string_array = (char *) realloc(dict->string_array,
374 dict->array_size);
375 if (dict->string_array == NULL)
376 return NULL;
377 }
378
379 /* fill in starting values */
380 strcpy(&dict->string_array[dict->array_used], s);
381 dict->string_table[string_index].string_offset = dict->array_used;
382 dict->array_used += (long) strlen(s) + 1;
383 dict->string_table[string_index].count = 0 ;
384 dict->string_table[string_index].flags = DICT_NONE;
385 dict->string_table[string_index].any_ptr = any_ptr; /* (AK:04/04/95) */
386 dict->string_table[string_index].hash_value = hash_value;
387
388 /* hook entry at beginning of chain */
389 dict->string_table[string_index].next = dict->chains[hash_index];
390 dict->chains[hash_index] = string_index;
391
392 /* record chain lengths */
393 chain_len++;
394 if (chain_len > dict->longest_chain_length)
395 dict->longest_chain_length = chain_len;
396
397 /* if a chain is too long */
398 if (chain_len > dict->allowable_chain_length) {
399 long new_size = dict->table_size * 4;
400 long new_mask = new_size - 1;
401 long *hetemp;
402 long i;
403 int longest_chain;
404
405 if ((hetemp = (long *)malloc(sizeof(long) * new_size)) == NULL)
406 return NULL;
407
408 /* hash table chains are empty */
409 for (i = 0; i < new_size; i++)
410 hetemp[i] = DICT_ENTRY_NONE;
411
412 /* reset all chains */
413 for (i = 0; i < dict->entry_count; i++)
414 dict->string_table[i].next = DICT_ENTRY_NONE;
415
416 /* recreate hash table entries by reinserting all strings */
417 for (i = 0; i < dict->entry_count; i++) {
418 long he = dict->string_table[i].hash_value & new_mask;
419
420 dict->string_table[i].next = hetemp[he];
421 hetemp[he] = i;
422 }
423
424 /* find longest chain length */
425 for (i = 0, longest_chain = 0; i < new_size; i++) {
426 int len;
427 long cur;
428
429 for (cur = hetemp[i], len = 0; cur != DICT_ENTRY_NONE;
430 cur = dict->string_table[cur].next)
431 len++;
432 ;
433 if (len > longest_chain)
434 longest_chain = len;
435 }
436
437 /* delete old table and attach new one */
438 free(dict->chains);
439 dict->chains = hetemp;
440 dict->longest_chain_length = longest_chain;
441 dict->table_size = new_size;
442 dict->hash_mask = new_mask;
443
444 /* keep track of growth */
445 dict->hash_growth_count++;
446 }
447 }
448
449 dict->string_table[string_index].count += occurences;
450 dict->string_table[string_index].flags |= flags;
451 *number = string_index;
452
453 return &dict->string_table[string_index];
454 }
455
456 /*************************************************************************
457 * dict_delete: deletes an entry from the dictionary
458 * (Actually, only decrements the entry's count)
459 *
460 * dict - pointer to dictionary to delete
461 * s - string to find and delete
462 * count - count to decrement entry by
463 *
464 * Returns: status code
465 * TRUE - entry has been deleted
466 * FALSE - entry wasn't found or error occured
467 *************************************************************************/
468 BOOLEANC dict_delete(const DICTIONARY *dict, const char *s, const long count)
469 {
470 STRING_ENTRY *se;
471 long n;
472
473 /* find the string */
474 if ((se = dict_search(dict, s, &n)) == NULL)
475 return FALSE;
476
477 /* decrement count and make sure it stays valid */
478 se->count -= count;
479 if (se->count < 0)
480 se->count = 0;
481 return TRUE;
482 }
483
484 /*************************************************************************
485 * dict_search: look for entries in the dictionary
486 *
487 * dict - pointer to dictionary to search
488 * s - string to search for
489 * number - pointer to long to return string number
490 *
491 * Returns: pointer to string entry
492 * NULL - entry not found
493 * non-NULL - pointer to string entry
494 *************************************************************************/
495 STRING_ENTRY *dict_search(const DICTIONARY *dict, const char *s,
496 long *number)
497 {
498 unsigned long hash_value;
499 long hash_index;
500 long string_index = -1;
501 long he;
502
503 /* have to point to something */
504 if (dict == NULL)
505 return NULL;
506
507 /* check value has to be OK */
508 if (dict->check_value != DICT_VALIDATE)
509 return NULL;
510
511 /* must have a string */
512 if (s == NULL) {
513 *number = -1;
514 return NULL;
515 }
516
517 /* must be non-NULL */
518 if (s[0] == '\0') {
519 *number = -1;
520 return FALSE;
521 }
522
523 /* figure out which chain it should be in */
524 hash_value = hash_string(s);
525 hash_index = hash_value & dict->hash_mask;
526
527 /* look for the entry in the chain */
528 for (he = dict->chains[hash_index]; he != DICT_ENTRY_NONE;
529 he = dict->string_table[he].next) {
530 char *sa = (char*)(&dict->string_array[dict->string_table[he].string_offset]);
531
532 /* compare hash_value and then string, in that order, for speed */
533 if (dict->string_table[he].hash_value == hash_value && !strcmp(s, sa)) {
534 string_index = he;
535 break;
536 }
537 }
538 if (string_index < 0) {
539 *number = -1;
540 return NULL;
541 }
542
543 *number = string_index;
544 return dict->string_table+string_index;
545 }
546
547 /*************************************************************************
548 * dict_union: merges contents of 2 dictionaries into
549 * a third
550 *
551 * dict1 - pointer to dictionary 1
552 * dict2 - pointer to dictionary 2
553 *
554 * Returns: dictionary pointer
555 * NULL - failed to merge
556 * non-NULL - pointer to new dictionary
557 *
558 * Notes: entries of the same string have their counts
559 * added and their flags ORed together.
560 *************************************************************************/
561 DICTIONARY *dict_union( const DICTIONARY *dict1 , const DICTIONARY *dict2 )
562 {
563 DICTIONARY *dict = NULL;
564 STRING_ENTRY *se, *se2;
565 long initial_string_count, initial_hash_chains, max_chain_length;
566 long i, string_index;
567
568 /***********
569 ** Initialize the new dictionary.
570 ***********/
571
572 if ( dict1==NULL || dict2==NULL )
573 goto err_exit;
574 if ((dict=(DICTIONARY *)malloc(sizeof(DICTIONARY))) == NULL)
575 goto err_exit;
576
577 initial_string_count = dict1->string_max;
578 initial_hash_chains = dict1->table_size;
579 max_chain_length = dict1->allowable_chain_length;
580 dict = dict_create( 4,
581 initial_string_count,
582 initial_hash_chains,
583 max_chain_length );
584
585 /***********
586 ** Copy the entries from dict1 into the new dictionary.
587 ***********/
588
589 for ( i = 0 ; i < dict1->entry_count ; i++ ) {
590 se = (STRING_ENTRY*)&dict1->string_table[i];
591 if ( se->count > 0 ) {
592 se2 = dict_insert(
593 dict,
594 dict1->string_array+se->string_offset,
595 se->count,
596 se->flags,
597 NULL,
598 &string_index );
599 if ( se2 == NULL )
600 goto err_exit;
601 } /* endif */
602 } /* endfor */
603
604 /* Merge the entries from dict2 into the new dictionary. */
605 if ( dict_merge(dict,dict2,FALSE) == FALSE )
606 goto err_exit;
607
608 /* Success. Return a pointer to the new dictionary. */
609 return( dict );
610
611 /* Failure. Ignominiously erase our tracks and return NULL. */
612 err_exit:
613 dict_destroy( dict );
614 return NULL;
615 }
616
617 /*************************************************************************
618 * dict_merge: merges the contents of a dictionary into
619 * another one, updating the contents of the destination
620 *
621 * dst - dictionary to update
622 * src - dictionary to add
623 * move - boolean flag
624 * TRUE - move to dest
625 * FALSE - copy to dest
626 *
627 * Returns: status code
628 * TRUE - merge completed
629 * FALSE - error on merge
630 *
631 * Notes: entries of the same string have their counts
632 * added. At the end of a move, src is empty. If there
633 * is an error during a move, the contents of both
634 * dictionaries cannot be trusted.
635 *************************************************************************/
636 BOOLEANC dict_merge(const DICTIONARY *dst, const DICTIONARY *src, const BOOLEANC move)
637 {
638 STRING_ENTRY *se, *se2;
639 DICTIONARY *dict1, *dict2;
640 long i, string_index, index;
641
642 dict1 = (DICTIONARY*)src;
643 dict2 = (DICTIONARY*)dst;
644
645 /***********
646 ** Copy the dictionary entries into the new dictionary.
647 ***********/
648
649 for ( i = 0 ; i < dict1->entry_count ; i++ ) {
650 se = (STRING_ENTRY*)&dict1->string_table[i];
651 if ( se->count > 0 ) {
652 se2 = dict_insert(
653 dict2,
654 dict1->string_array+se->string_offset,
655 se->count,
656 se->flags,
657 NULL,
658 &string_index );
659 if ( se2 == NULL )
660 goto err_exit;
661 } /* endif */
662 } /* endfor */
663
664 /***********
665 ** Set up the dictionary parameter vector.
666 ***********/
667
668 if ( dict_set_parm_values(dict2) == FALSE )
669 return( FALSE );
670
671 /***********
672 ** Update the table of contents.
673 ** PARM HASH STTB STAR
674 ***********/
675
676 if ( (index=dict_toc_index(dict2,"HASH")) == -1)
677 goto err_exit;
678 dict2->toc[index].size = dict2->table_size * sizeof(long);
679 dict2->toc[index].ptr = dict2->chains;
680
681 if ( (index=dict_toc_index(dict2,"STTB")) == -1)
682 goto err_exit;
683 dict2->toc[index].size = dict2->string_max * sizeof(char);
684 dict2->toc[index].ptr = dict2->string_table;
685
686 if ( (index=dict_toc_index(dict2,"STAR")) == -1)
687 goto err_exit;
688 dict2->toc[index].size = dict2->array_size * sizeof(STRING_ENTRY);
689 dict2->toc[index].ptr = dict2->string_array;
690
691 /***********
692 ** Update the signature
693 ***********/
694
695 dict2->sig->checksum =
696 compute_checksum( 4*sizeof(DICT_TOC_ENTRY) , (char*)(dict2->toc) );
697
698 /***********
699 ** If this is a move, destroy the source dictionary
700 ***********/
701
702 if ( move == TRUE )
703 if ( dict_destroy(dict1) == FALSE )
704 goto err_exit;
705
706 /***********
707 ** Success
708 ***********/
709
710 return( TRUE );
711
712 /***********
713 ** Failure
714 ***********/
715
716 err_exit:
717 dict_destroy( dict2 );
718 return FALSE;
719 }
720
721 /*************************************************************************
722 * dict_string_by_number: return string pointer for
723 * a given string number
724 *
725 * number - string number
726 *
727 * Returns: pointer to string entry
728 * NULL - entry not found
729 * non-NULL - pointer to string entry
730 *************************************************************************/
731 STRING_ENTRY *dict_string_by_number(const DICTIONARY *dict, const long number)
732 {
733 if (dict == NULL)
734 return NULL;
735
736 /* check value has to be OK */
737 if (dict->check_value != DICT_VALIDATE)
738 return NULL;
739
740 /* string number has to be within range */
741 if (number < 0 || number >= dict->entry_count)
742 return NULL;
743
744 return dict->string_table+number;
745 }
746
747 /*************************************************************************
748 * dict_scan_begin: begin a scan of the dictionary
749 *
750 * dict - pointer to dictionary to scan
751 *
752 * Returns: status code
753 * TRUE - scan initialized
754 * FALSE - error
755 *
756 * Notes: if a scan is already in progress, it is
757 * abandoned and the scan is reset to the
758 * beginning.
759 *************************************************************************/
760 BOOLEANC dict_scan_begin(DICTIONARY *dict)
761 {
762 /* have to point to something */
763 if (dict == NULL)
764 return FALSE;
765
766 /* check value has to be OK */
767 if (dict->check_value != DICT_VALIDATE)
768 return FALSE;
769
770 /* point to first entry in string table */
771 dict->scan_string_index = 0;
772 return TRUE;
773 }
774
775 /*************************************************************************
776 * dict_scan_next: get the next entry in a scan
777 *
778 * dict - pointer to dictionary to continue scanning
779 *
780 * Returns: pointer to string entry
781 * NULL - no more entries
782 * non-NULL - next string entry
783 *************************************************************************/
784 STRING_ENTRY *dict_scan_next(DICTIONARY *dict)
785 {
786 /* have to point to something */
787 if (dict == NULL)
788 return NULL;
789
790 /* check value has to be OK */
791 if (dict->check_value != DICT_VALIDATE)
792 return NULL;
793
794 /* scan index has to be within range */
795 if (dict->scan_string_index < 0
796 || dict->scan_string_index > dict->entry_count)
797 return NULL;
798
799 /* for first non-empty table entry */
800 while (dict->scan_string_index < dict->entry_count
801 && dict->string_table[dict->scan_string_index].count == 0)
802 dict->scan_string_index++;
803
804 /* past end of table? */
805 if (dict->scan_string_index >= dict->entry_count)
806 return NULL;
807
808 return &dict->string_table[dict->scan_string_index++];
809 }
810
811
812 /*************************************************************************
813 * dict_load - load a compiled dictionary into memory
814 * creates a new dictionary
815 *
816 * fname - fully qualified file name
817 *
818 * Returns: pointer to created dictionary structure
819 * (NULL on failure)
820 *************************************************************************/
821 DICTIONARY *dict_load(const char *fname)
822 {
823 DICTIONARY *dict = NULL;
824 int code, index;
825 FILE *fi;
826 int ntoc;
827
828 if ( (fi=fopen((char*)fname,"rb")) == NULL ) {
829 signal_error( "dict_load: could not open file" , (char*)fname , 0 );
830 goto err_exit;
831 } /* endif */
832
833 if ((dict = (DICTIONARY *)malloc(sizeof(DICTIONARY))) == NULL) {
834 /* signal_error( "dict_load: alloc failed" , "" , 0 ); */
835 goto err_exit;
836 } /* endif */
837
838 /* Read the dictionary signature record */
839 if ((dict->sig = (DICT_SIG *)malloc(sizeof(DICT_SIG))) == NULL) {
840 goto err_exit;
841 } /* endif */
842 code = block_read( fi , (char*)(dict->sig) , sizeof(DICT_SIG) , 0 );
843 if ( code == -1 ) {
844 signal_error( "dict_load: could not read signature" , (char*)fname , 0 );
845 goto err_exit;
846 } /* endif */
847
848 if ( dict->sig->check_value != DICT_VALIDATE ) {
849 signal_error( "dict_load: could not validate file" , (char*)fname , 0 );
850 goto err_exit;
851 } /* endif */
852 dict->check_value = dict->sig->check_value;
853
854 /* Read the dictionary Table of Contents */
855 ntoc = dict->sig->toc_size;
856 dict->toc = (DICT_TOC_ENTRY *) malloc( ntoc * sizeof(DICT_TOC_ENTRY) );
857 if ( dict->toc == NULL ) {
858 signal_error( "dict_load: alloc of TOC failed" , "" , 0 );
859 goto err_exit;
860 } /* endif */
861 code = block_read( fi ,
862 (char*)(dict->toc) ,
863 ntoc * sizeof(DICT_TOC_ENTRY) ,
864 sizeof(DICT_SIG) );
865 if ( code == -1 ) {
866 signal_error( "dict_load: could not read Table of Contents" , (char*)fname , 0 );
867 goto err_exit;
868 } /* endif */
869
870 /* Read the dictionary parameters */
871 dict->parm = (DICT_PARM_ENTRY *)
872 dict_load_block( dict , "PARM" , fi , NULL );
873 if ( dict->parm == NULL ) {
874 signal_error( "dict_load: could not load parameter table" , "" , 0 );
875 goto err_exit;
876 } /* endif */
877 index = dict_toc_index( dict , "PARM" );
878 dict->sig->nparms = dict->toc[index].size / sizeof(DICT_PARM_ENTRY);
879
880 /* Set the parameter values in the dictionary structure */
881 if ( dict_set_parm_variables(dict) == FALSE )
882 goto err_exit;
883
884 /* Load the string array */
885 dict->string_array = (char *)
886 dict_load_block( dict , "STAR" , fi , NULL );
887 if ( dict->string_array == NULL ) {
888 signal_error( "dict_load: could not load string array" , (char*)fname , 0 );
889 goto err_exit;
890 } /* endif */
891
892 /* Load the string table */
893 dict->string_table = (STRING_ENTRY *)
894 dict_load_block( dict , "STTB" , fi , NULL );
895 if ( dict->string_table == NULL ) {
896 signal_error( "dict_load: could not load string table" , (char*)fname , 0 );
897 goto err_exit;
898 } /* endif */
899
900 /* Load the hash table */
901 dict->chains = (long *)
902 dict_load_block( dict , "HASH" , fi , NULL );
903 if ( dict->chains == NULL ) {
904 signal_error( "dict_load: could not load hash table" , (char*)fname , 0 );
905 goto err_exit;
906 } /* endif */
907
908 /* Initialize the hook for dictionary extensions */
909 dict->ext = NULL;
910
911 /* Success */
912 fclose( fi );
913 return( dict );
914
915 /* Failure */
916 err_exit:
917 if ( fi != NULL )
918 fclose( fi );
919 dict_destroy( dict );
920 return NULL;
921 }
922
923
924 /*************************************************************************
925 * dict_save - save a dictionary from memory into a file
926 *
927 * dict - pointer to dictionary to save
928 * fname - full qualified file name prefix of dictionary
929 *
930 * Returns: status code
931 * TRUE - dictionary was saved sucessfully
932 * FALSE - error during save
933 *************************************************************************/
934 BOOLEANC dict_save( DICTIONARY *dict, const char *fname )
935 {
936 int index, ret_code;
937 FILE *fo = NULL;
938
939 /* Have to be pointing at a valid dictionary */
940 if ( dict == NULL || dict->sig->check_value != DICT_VALIDATE )
941 goto err_exit;
942
943 /* Open the file for output */
944 if ( (fo=fopen((char*)fname,"wb")) == NULL )
945 goto err_exit;
946
947 /* Make the table of contents entries current */
948 /* Note: This will not be necessary once the data is stored in EVECTORs */
949
950 if ( (index=dict_toc_index(dict,"PARM")) == -1 )
951 goto err_exit;
952 dict->toc[index].size = dict->sig->nparms * sizeof(DICT_PARM_ENTRY);
953 dict->toc[index].ptr = dict->parm;
954
955 if ( (index=dict_toc_index(dict,"STAR")) == -1 )
956 goto err_exit;
957 dict->toc[index].size = dict->array_size * sizeof(char);
958 dict->toc[index].ptr = dict->string_array;
959
960 if ( (index=dict_toc_index(dict,"STTB")) == -1 )
961 goto err_exit;
962 dict->toc[index].size = dict->string_max * sizeof(STRING_ENTRY);
963 dict->toc[index].ptr = dict->string_table;
964
965 if ( (index=dict_toc_index(dict,"HASH")) == -1 )
966 goto err_exit;
967 dict->toc[index].size = dict->table_size * sizeof(long);
968 dict->toc[index].ptr = dict->chains;
969
970 /* Reset the TOC offsets and checksums for ALL tables */
971 /* (not just type=0 tables) */
972 dict_reset_toc_offsets( dict );
973
974 /* Set the dictionary parm structure from the parameter values */
975 if ( dict_set_parm_values(dict) == FALSE )
976 goto err_exit;
977
978 /* Save the signature */
979 dict->sig->checksum = compute_checksum( sizeof(DICT_SIG) , (char*)(dict->sig) );
980 ret_code = block_write( fo ,
981 (char*)dict->sig ,
982 sizeof(DICT_SIG) );
983 if ( ret_code == -1 )
984 goto err_exit;
985
986 /* Save the table of contents */
987 ret_code = block_write( fo,
988 (char*)dict->toc,
989 dict->sig->toc_size * sizeof(DICT_TOC_ENTRY) );
990 if ( ret_code == -1 )
991 goto err_exit;
992
993 /* Save the tables */
994 /* For now, only save type=0 tables */
995 for ( index = 0 ; index < dict->sig->toc_size ; index++ ) {
996 if ( dict->toc[index].type == 0 ) { /* Ordinary table */
997 ret_code = dict_save_block( dict , dict->toc[index].id , fo );
998 if ( ret_code == FALSE )
999 goto err_exit;
1000 } /* endif */
1001 } /* endfor */
1002
1003 /* Success */
1004 fclose( fo );
1005 return TRUE;
1006
1007 /* Failure */
1008 err_exit:
1009 if ( fo != NULL )
1010 fclose( fo );
1011 return FALSE;
1012 }
1013
1014
1015 /*************************************************************************
1016 * dict_import: read in an ASCII dictionary.
1017 *
1018 * dict_fname - name of dictionary file
1019 * parameters to create a DICTIONARY structure (see dict_create)
1020 *
1021 * Returns: pointer to created DICTIONARY structure
1022 * (NULL on failure)
1023 *
1024 *************************************************************************/
1025 DICTIONARY *dict_import( const char *dict_fname ,
1026 const long initial_string_count ,
1027 const long initial_hash_entries ,
1028 const long max_chain_length )
1029 {
1030 DICTIONARY *dict;
1031 char buffer[BUFLEN], ch;
1032 int index, c, c0;
1033 long number;
1034 FILE *fi = NULL;
1035
1036 /***********
1037 ** Dictionary setup.
1038 ***********/
1039
1040 dict = dict_create( 4,
1041 initial_string_count ,
1042 initial_hash_entries ,
1043 max_chain_length );
1044 if ( dict == NULL )
1045 goto err_exit;
1046
1047 /***********
1048 ** Read the dictionary file
1049 ** Each line should have one word or a string delimited by '|'
1050 ***********/
1051
1052 if ( (fi=fopen(dict_fname,"r")) == NULL )
1053 goto err_exit;
1054 while( fgets(buffer,BUFLEN,fi) != NULL ) {
1055 c0 = 0;
1056 /* Skip to non-blank */
1057 while ( (c0<BUFLEN-2) && (buffer[c0]==' ') ) ++c0;
1058 if ( buffer[c0] == '|' ) {
1059 c = ++c0;
1060 ch = '|';
1061 } else {
1062 c = c0;
1063 ch = ' ';
1064 } /* endif */
1065 /* Scan to blank or matching '|' */
1066 while ( (c<BUFLEN-1) && (buffer[c]!='\0') &&
1067 (buffer[c]!='\n') && (buffer[c]!=ch) )
1068 ++c;
1069 buffer[c] = '\0';
1070 /* Insert the word */
1071 if ( dict_insert(dict,buffer+c0,1,0,NULL,&number) == NULL )
1072 goto err_exit;
1073 } /* endwhile */
1074
1075 /***********
1076 ** Fill in the dictionary parameter vector.
1077 ***********/
1078
1079 if ( dict_set_parm_values(dict) == FALSE )
1080 goto err_exit;
1081
1082 /***********
1083 ** Update the table of contents for HASH STTB STAR
1084 ***********/
1085
1086 if ( (index=dict_toc_index(dict,"HASH")) == -1 )
1087 goto err_exit;
1088 dict->toc[index].size = dict->table_size * sizeof(long);
1089
1090 if ( (index=dict_toc_index(dict,"STTB")) == -1 )
1091 goto err_exit;
1092 dict->toc[index].size = dict->string_max * sizeof(char);
1093
1094 if ( (index=dict_toc_index(dict,"STAR")) == -1 )
1095 goto err_exit;
1096 dict->toc[index].size = dict->array_size * sizeof(STRING_ENTRY);
1097
1098 /* Success. Return a pointer to the new dictionary. */
1099 fclose(fi);
1100 return( dict );
1101
1102 /* Failure. Ignominiously erase our tracks and return NULL. */
1103 err_exit:
1104 if ( fi != NULL )
1105 fclose(fi);
1106 dict_destroy( dict );
1107 return NULL;
1108 }
1109
1110 /*************************************************************************
1111 * dict_export - save an extended dictionary from memory into
1112 * an ASCII file
1113 *
1114 * dict - pointer to dictionary to save
1115 * fname - full qualified file name prefix of dictionary
1116 *
1117 * Returns: status code
1118 * TRUE - dictionary was saved sucessfully
1119 * FALSE - error during save
1120 *
1121 *************************************************************************/
1122 BOOLEANC dict_export( DICTIONARY *dict , const char *fname )
1123 {
1124 FILE *fp = NULL;
1125 STRING_ENTRY *se;
1126 int i;
1127
1128 /* have to point to something */
1129 if (dict == NULL)
1130 goto err_exit;
1131
1132 /* check value has to be OK */
1133 if (dict->check_value != DICT_VALIDATE)
1134 goto err_exit;
1135
1136 /* must have a filename */
1137 if (fname == NULL)
1138 goto err_exit;
1139
1140 fp = fopen( (char*)fname , "w" );
1141 if ( fp == NULL )
1142 goto err_exit;
1143
1144 for ( i = 0 ; i < dict->entry_count ; i++ ) {
1145 se = &dict->string_table[i];
1146 fprintf( fp , "|%s|\n" , dict->string_array + se->string_offset );
1147 } /* endfor */
1148 fclose( fp );
1149
1150 /* Success. */
1151 fclose(fp);
1152 return TRUE;
1153
1154 /* Failure. */
1155 err_exit:
1156 if ( fp != NULL )
1157 fclose(fp);
1158 dict_destroy( dict );
1159 return FALSE;
1160 }