Mercurial > hg > early-roguelike
diff urogue/dict.c @ 256:c495a4f288c6
Import UltraRogue from the Roguelike Restoration Project (r1490)
author | John "Elwin" Edwards |
---|---|
date | Tue, 31 Jan 2017 19:56:04 -0500 |
parents | |
children | 13b482bd9e66 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/urogue/dict.c Tue Jan 31 19:56:04 2017 -0500 @@ -0,0 +1,1160 @@ +/* + dict.c + + UltraRogue: The Ultimate Adventure in the Dungeons of Doom + Copyright (C) 1995 Herb Chong + All rights reserved. + + See the file LICENSE.TXT for full copyright and licensing information. +*/ + +/****************** +** Change history: +** +** (AK:04/03/95) - In dict_create, initialize hook for extensions to dictionary structure. +** (AK:04/04/95) - In dict_insert, only set any_ptr when a new string entry is created. +** (AK:04/17/95) - Added dict_union and dict_merge. +** (AK:04/18/95) - In dict_create, added code to create signature, +** table of contents, and parameter array. +** (AK:04/18/95) - Revised dict_load, dict_save +** (AK:04/18/95) - Added dict_import +** +******************/ + +static char sccsid[] = "%W% %G%"; + +#include <stdlib.h> +#include <string.h> +#if !defined(OS2) && !defined(_WIN32) + #include <unistd.h> +#endif + +#include "dict.h" +#include "dictutil.h" + +#define BUFLEN 300 + +/************************************************************************* +* Generic dictionary and hash table functions for word and +* string storage. +* +* Implements generic dictionary management by using a +* hash table with direct chaining. All terms are stored in a +* separate, unsorted, string table. When inserting into the +* hash table exceeds the current size, allocate a new hash table +* and rehash the old contents into the new table. When the string +* table fills, append more empty entries onto the old table and +* continue adding. Inserting a string that already exists increments +* the count for the string. Deleting a string only decrements the +* count to no less than 0. This will allow a 0 occurence string to be +* retrieved. To remove the 0 occurence string table entries, you must +* rebuild the dictionary. Merging from one dictionary to a new one will +* do this because it only copies nonzero occurences. +* +* Assumptions: +* unsigned long >= 32 bits +* int >= 16 bits +* char == 8 bits +* number of entries < 2^28 +*************************************************************************/ + +/************************************************************************* +* hash_string: calculate hash value for string, modified +* version of hashpjw() from the new Dragon book. +* +* s - string to calculate hash for +* +* Returns: hash value +*************************************************************************/ +static unsigned long hash_string(const char *s) +{ + unsigned long h = 0; + unsigned long g = 0; + + while (*s) { + h <<= 4; + h += *s++; + if ((g = h & 0xf0000000) != 0) + h ^= g >> 24; + } + return h ^ g; +} + +/************************************************************************* +* dict_create: create a dictionary and initialize its structures +* +* toc_size - number of entries in table of contents ( >= 4 ) +* initial_string_count - number of strings to hold as initial allocation +* initial_hash_chains - size of hash table, must be power of 4 +* max_chain_length - max number of elements in hash chain before signalling growth +* +* Returns: pointer to dictionary +* NULL - error creating dictionary +* non-NULL - sucessful creation +*************************************************************************/ + +DICTIONARY *dict_create( + const long toc_size, + const long initial_string_count, + const long initial_hash_chains, + const long max_chain_length ) +{ + DICTIONARY *dtemp = NULL; + STRING_ENTRY *stemp = NULL; + long *ltemp = NULL; + char *sltemp; + long i, j, tocsize; + + /* check for a power of 4 in number of initial hash entries */ + switch(initial_hash_chains) { + case 0x00000004: + case 0x00000010: + case 0x00000040: + case 0x00000100: + case 0x00000400: + case 0x00001000: + case 0x00004000: + case 0x00010000: + case 0x00040000: + case 0x00100000: + case 0x00400000: + case 0x01000000: + case 0x04000000: + break; + default: + return NULL; + } + + /* Allocate the dictionary structure */ + if ((dtemp = (DICTIONARY *)malloc(sizeof(DICTIONARY))) == NULL) + goto err_exit; + + /* string count has to be within range */ + j = initial_string_count; + if (j > 0x04000000) + return NULL; + + /* force a reasonable value */ + if (j < 100) + j = 100; + + /* Allocate the string table and string array */ + if ((stemp = (STRING_ENTRY *)malloc(sizeof(STRING_ENTRY) * j)) == NULL) + goto err_exit; + if ((sltemp = (char *)malloc(8 * j)) == NULL) + goto err_exit; + + /* Allocate the hash table */ + if ((ltemp = (long *)malloc(sizeof(long) * initial_hash_chains)) == NULL) + goto err_exit; + + /* Allocate the parameter array */ + if ( (dtemp->parm = (DICT_PARM_ENTRY *)malloc(14*sizeof(DICT_PARM_ENTRY))) == NULL) + goto err_exit; + + /* Allocate the signature structure */ + if ( (dtemp->sig=(DICT_SIG*)malloc(sizeof(DICT_SIG))) == NULL ) + goto err_exit; + + /* Allocate the Table of Contents */ + tocsize = toc_size; + if ( tocsize < 4 ) + tocsize = 4; + dtemp->sig->toc_size = tocsize; + if ( (dtemp->toc=(DICT_TOC_ENTRY*)malloc(dtemp->sig->toc_size*sizeof(DICT_TOC_ENTRY))) == NULL ) + goto err_exit; + + dtemp->check_value = DICT_VALIDATE; /* validation value */ + dtemp->flags = DICT_NONE; /* no flags set */ + dtemp->entry_count = 0; /* nothing in either table */ + + dtemp->string_table = stemp; /* connect string table */ + dtemp->string_max = j; /* size of string table */ + dtemp->scan_string_index = DICT_ENTRY_NONE; /* not pointing to anything in scan */ + dtemp->string_growth_count = 0; /* haven't grown any */ + + dtemp->string_array = sltemp; /* character array for strings */ + dtemp->array_size = j * 8; /* how many bytes available */ + dtemp->array_used = 0; /* nothing used yet */ + dtemp->array_growth_count = 0; /* haven't grown any */ + + /* force maximum hash chain length to a reasonable value */ + i = max_chain_length; + if (i < 1 || i > 200) + i = 200; + dtemp->chains = ltemp; /* connect hash table */ + dtemp->longest_chain_length = 0; /* nothing in table yet */ + dtemp->allowable_chain_length = i; /* chain length limit */ + dtemp->table_size = initial_hash_chains; /* size of hash table */ + dtemp->hash_mask = initial_hash_chains - 1; /* mask for mod() function */ + dtemp->hash_growth_count = 0; /* haven't grown any */ + + /* string table is empty */ + for (i = 0 ; i < j ; i++) { + dtemp->string_table[i].string_offset = DICT_ENTRY_NONE; + dtemp->string_table[i].count = 0; + dtemp->string_table[i].next = DICT_ENTRY_NONE; + dtemp->string_table[i].flags = 0; + dtemp->string_table[i].hash_value = 0; + } + + /* no entries chained */ + for (i = 0; i < initial_hash_chains; i++) + dtemp->chains[i] = DICT_ENTRY_NONE; + + /* Initialize hook to extended dictionary structure */ + dtemp->ext = NULL; + + /* Set up the parameter array */ + if ( dict_set_parm_ids(dtemp) == FALSE ) + goto err_exit; + if ( dict_set_parm_values(dtemp) == FALSE ) + goto err_exit; + + /* Set up the Table of Contents */ + strcpy( dtemp->toc[0].id , "PARM" ); + dtemp->toc[0].size = dtemp->sig->nparms * sizeof(DICT_PARM_ENTRY); + dtemp->toc[0].ptr = dtemp->parm; + dtemp->toc[0].type = 0; + + strcpy( dtemp->toc[1].id , "HASH" ); + dtemp->toc[1].size = dtemp->table_size * sizeof(long); + dtemp->toc[1].ptr = dtemp->chains; + dtemp->toc[1].type = 0; + + strcpy( dtemp->toc[2].id , "STTB" ); + dtemp->toc[2].size = dtemp->string_max * sizeof(char); + dtemp->toc[2].ptr = dtemp->string_table; + dtemp->toc[2].type = 0; + + strcpy( dtemp->toc[3].id , "STAR" ); + dtemp->toc[3].size = dtemp->array_size * sizeof(STRING_ENTRY); + dtemp->toc[3].ptr = dtemp->string_array; + dtemp->toc[3].type = 0; + + /* Set up the signature */ + dtemp->sig->check_value = DICT_VALIDATE; + dtemp->sig->toc_size = tocsize; + dtemp->sig->nparms = 14; + + return dtemp; + + /* error exit - saves duplicate code */ +err_exit: + + dict_destroy( dtemp ); + return NULL; +} + +/************************************************************************* +* dict_destroy: discard a dictionary and its contents +* multiple calls to destroy the same dictionary is safe +* +* dict - pointer to a dictionary to discard +* +* Returns: status code +* TRUE - dictionary destroyed +* FALSE - error when destroying dictionary +* Note: Does free the dictionary structure itself. +*************************************************************************/ +BOOLEANC dict_destroy(DICTIONARY *dict) +{ + int i; + + /* have to point to something */ + if (dict == NULL) + return FALSE; + + /* check value has to be OK */ + if (dict->check_value != DICT_VALIDATE) + return FALSE; + + if ( (dict->sig==NULL) || (dict->toc==NULL) || (dict->parm==NULL) ) + return FALSE; + + /* Free the type=0 tables */ + for ( i = 0 ; i < dict->sig->toc_size ; i++ ) { + if ( dict->toc[i].ptr != NULL && dict->toc[i].type == 0 ) + free( dict->toc[i].ptr ); + } /* endfor */ + + /* Free the Table of Contents and signature */ + free( dict->toc ); + free( dict->sig ); + + /* Free the dictionary structure itself */ + free( dict ); + + return TRUE; +} + +/************************************************************************* +* dict_insert: add entries into the dictionary, growing as necessary +* +* dict - pointer to dictionary to insert into +* s - string to insert +* count - count of occurences of string +* flags - flag bits associated with the string +* number - pointer to long to return word number +* +* Returns: pointer to new entry in dictionary +* NULL - error during insert +* non-NULL - pointer to inserted or updated entry +* +* Notes: if the entry already exists, increment the count. +* If NULL is returned, the dictionary state can no longer +* be trusted. Terminating with an error code would be +* safest. +*************************************************************************/ +STRING_ENTRY *dict_insert(DICTIONARY *dict, char *s, + const long occurences, const unsigned long flags, + void *any_ptr, long *number) +{ + unsigned long hash_value; + long hash_index; + long string_index = -1; + long he2; + int chain_len; + + /* have to point to something */ + if (dict == NULL) + return FALSE; + + /* check value has to be OK */ + if (dict->check_value != DICT_VALIDATE) + return FALSE; + + /* must have a string */ + if (s == NULL) { + *number = -1; + return FALSE; + } + + /* must be non-NULL */ + if (s[0] == '\0') { + *number = -1; + return FALSE; + } + + /* figure out which chain it should go into */ + hash_value = hash_string(s); + hash_index = hash_value & dict->hash_mask; + + /* look for the entry in the chain */ + for (he2 = dict->chains[hash_index], chain_len = 0; he2 != DICT_ENTRY_NONE; + he2 = dict->string_table[he2].next, chain_len++) { + char *sa = &dict->string_array[dict->string_table[he2].string_offset]; + + /* compare hash_value and then string, in that order, for speed */ + if (dict->string_table[he2].hash_value == hash_value && !strcmp(s, sa)) { + string_index = he2; + break; + } + } + + /* string wasn't found */ + if (string_index < 0) { + /* make sure there is room in string entry table */ + if (dict->entry_count + 1 > dict->string_max) { + /* make new table 20% bigger */ + dict->string_max = (dict->string_max * 6) / 5; + dict->string_table = (STRING_ENTRY *)realloc(dict->string_table, + sizeof(STRING_ENTRY) * dict->string_max); + if (dict->string_table == NULL) + return NULL; + dict->string_growth_count++; + } + string_index = dict->entry_count; + dict->entry_count++; + + /* make sure there is room in the string array */ + if (dict->array_used + (long)strlen(s) + 1 > dict->array_size) { + dict->array_size = (dict->array_size * 6) / 5; + dict->string_array = (char *) realloc(dict->string_array, + dict->array_size); + if (dict->string_array == NULL) + return NULL; + } + + /* fill in starting values */ + strcpy(&dict->string_array[dict->array_used], s); + dict->string_table[string_index].string_offset = dict->array_used; + dict->array_used += (long) strlen(s) + 1; + dict->string_table[string_index].count = 0 ; + dict->string_table[string_index].flags = DICT_NONE; + dict->string_table[string_index].any_ptr = any_ptr; /* (AK:04/04/95) */ + dict->string_table[string_index].hash_value = hash_value; + + /* hook entry at beginning of chain */ + dict->string_table[string_index].next = dict->chains[hash_index]; + dict->chains[hash_index] = string_index; + + /* record chain lengths */ + chain_len++; + if (chain_len > dict->longest_chain_length) + dict->longest_chain_length = chain_len; + + /* if a chain is too long */ + if (chain_len > dict->allowable_chain_length) { + long new_size = dict->table_size * 4; + long new_mask = new_size - 1; + long *hetemp; + long i; + int longest_chain; + + if ((hetemp = (long *)malloc(sizeof(long) * new_size)) == NULL) + return NULL; + + /* hash table chains are empty */ + for (i = 0; i < new_size; i++) + hetemp[i] = DICT_ENTRY_NONE; + + /* reset all chains */ + for (i = 0; i < dict->entry_count; i++) + dict->string_table[i].next = DICT_ENTRY_NONE; + + /* recreate hash table entries by reinserting all strings */ + for (i = 0; i < dict->entry_count; i++) { + long he = dict->string_table[i].hash_value & new_mask; + + dict->string_table[i].next = hetemp[he]; + hetemp[he] = i; + } + + /* find longest chain length */ + for (i = 0, longest_chain = 0; i < new_size; i++) { + int len; + long cur; + + for (cur = hetemp[i], len = 0; cur != DICT_ENTRY_NONE; + cur = dict->string_table[cur].next) + len++; + ; + if (len > longest_chain) + longest_chain = len; + } + + /* delete old table and attach new one */ + free(dict->chains); + dict->chains = hetemp; + dict->longest_chain_length = longest_chain; + dict->table_size = new_size; + dict->hash_mask = new_mask; + + /* keep track of growth */ + dict->hash_growth_count++; + } + } + + dict->string_table[string_index].count += occurences; + dict->string_table[string_index].flags |= flags; + *number = string_index; + + return &dict->string_table[string_index]; +} + +/************************************************************************* +* dict_delete: deletes an entry from the dictionary +* (Actually, only decrements the entry's count) +* +* dict - pointer to dictionary to delete +* s - string to find and delete +* count - count to decrement entry by +* +* Returns: status code +* TRUE - entry has been deleted +* FALSE - entry wasn't found or error occured +*************************************************************************/ +BOOLEANC dict_delete(const DICTIONARY *dict, const char *s, const long count) +{ + STRING_ENTRY *se; + long n; + + /* find the string */ + if ((se = dict_search(dict, s, &n)) == NULL) + return FALSE; + + /* decrement count and make sure it stays valid */ + se->count -= count; + if (se->count < 0) + se->count = 0; + return TRUE; +} + +/************************************************************************* +* dict_search: look for entries in the dictionary +* +* dict - pointer to dictionary to search +* s - string to search for +* number - pointer to long to return string number +* +* Returns: pointer to string entry +* NULL - entry not found +* non-NULL - pointer to string entry +*************************************************************************/ +STRING_ENTRY *dict_search(const DICTIONARY *dict, const char *s, + long *number) +{ + unsigned long hash_value; + long hash_index; + long string_index = -1; + long he; + + /* have to point to something */ + if (dict == NULL) + return NULL; + + /* check value has to be OK */ + if (dict->check_value != DICT_VALIDATE) + return NULL; + + /* must have a string */ + if (s == NULL) { + *number = -1; + return NULL; + } + + /* must be non-NULL */ + if (s[0] == '\0') { + *number = -1; + return FALSE; + } + + /* figure out which chain it should be in */ + hash_value = hash_string(s); + hash_index = hash_value & dict->hash_mask; + + /* look for the entry in the chain */ + for (he = dict->chains[hash_index]; he != DICT_ENTRY_NONE; + he = dict->string_table[he].next) { + char *sa = (char*)(&dict->string_array[dict->string_table[he].string_offset]); + + /* compare hash_value and then string, in that order, for speed */ + if (dict->string_table[he].hash_value == hash_value && !strcmp(s, sa)) { + string_index = he; + break; + } + } + if (string_index < 0) { + *number = -1; + return NULL; + } + + *number = string_index; + return dict->string_table+string_index; +} + +/************************************************************************* +* dict_union: merges contents of 2 dictionaries into +* a third +* +* dict1 - pointer to dictionary 1 +* dict2 - pointer to dictionary 2 +* +* Returns: dictionary pointer +* NULL - failed to merge +* non-NULL - pointer to new dictionary +* +* Notes: entries of the same string have their counts +* added and their flags ORed together. +*************************************************************************/ +DICTIONARY *dict_union( const DICTIONARY *dict1 , const DICTIONARY *dict2 ) +{ + DICTIONARY *dict = NULL; + STRING_ENTRY *se, *se2; + long initial_string_count, initial_hash_chains, max_chain_length; + long i, string_index; + + /*********** + ** Initialize the new dictionary. + ***********/ + + if ( dict1==NULL || dict2==NULL ) + goto err_exit; + if ((dict=(DICTIONARY *)malloc(sizeof(DICTIONARY))) == NULL) + goto err_exit; + + initial_string_count = dict1->string_max; + initial_hash_chains = dict1->table_size; + max_chain_length = dict1->allowable_chain_length; + dict = dict_create( 4, + initial_string_count, + initial_hash_chains, + max_chain_length ); + + /*********** + ** Copy the entries from dict1 into the new dictionary. + ***********/ + + for ( i = 0 ; i < dict1->entry_count ; i++ ) { + se = (STRING_ENTRY*)&dict1->string_table[i];