view urogue/dict.c @ 302:fa70bba6bb3f rel2021.03

Update the README.
author John "Elwin" Edwards
date Thu, 18 Mar 2021 20:53:49 -0400
parents c495a4f288c6
children 13b482bd9e66
line wrap: on
line source

/*
    dict.c

    UltraRogue: The Ultimate Adventure in the Dungeons of Doom
    Copyright (C) 1995 Herb Chong
    All rights reserved.

    See the file LICENSE.TXT for full copyright and licensing information.
*/

/******************
**  Change history:
**
**  (AK:04/03/95) - In dict_create, initialize hook for extensions to dictionary structure.
**  (AK:04/04/95) - In dict_insert, only set any_ptr when a new string entry is created.
**  (AK:04/17/95) - Added dict_union and dict_merge.
**  (AK:04/18/95) - In dict_create, added code to create signature,
**                  table of contents, and parameter array.
**  (AK:04/18/95) - Revised dict_load, dict_save
**  (AK:04/18/95) - Added dict_import
**
******************/

static char sccsid[] = "%W% %G%";

#include <stdlib.h>
#include <string.h>
#if !defined(OS2) && !defined(_WIN32)
   #include <unistd.h>
#endif

#include "dict.h"
#include "dictutil.h"

#define BUFLEN      300

/*************************************************************************
*       Generic dictionary and hash table functions for word and
*       string storage.
*
*       Implements generic dictionary management by using a
*       hash table with direct chaining.  All terms are stored in a
*       separate, unsorted, string table. When inserting into the
*       hash table exceeds the current size, allocate a new hash table
*       and rehash the old contents into the new table. When the string
*       table fills, append more empty entries onto the old table and
*       continue adding. Inserting a string that already exists increments
*       the count for the string. Deleting a string only decrements the
*       count to no less than 0. This will allow a 0 occurence string to be
*       retrieved. To remove the 0 occurence string table entries, you must
*       rebuild the dictionary. Merging from one dictionary to a new one will
*       do this because it only copies nonzero occurences.
*
*       Assumptions:
*               unsigned long >= 32 bits
*               int >= 16 bits
*               char == 8 bits
*               number of entries < 2^28
*************************************************************************/

/*************************************************************************
*       hash_string: calculate hash value for string, modified
*       version of hashpjw() from the new Dragon book.
*
*       s - string to calculate hash for
*
*       Returns: hash value
*************************************************************************/
static unsigned long hash_string(const char *s)
{
        unsigned long h = 0;
        unsigned long g = 0;

        while (*s) {
                h <<= 4;
                h += *s++;
                if ((g = h & 0xf0000000) != 0)
                        h ^= g >> 24;
        }
        return h ^ g;
}

/*************************************************************************
*       dict_create: create a dictionary and initialize its structures
*
*       toc_size - number of entries in table of contents ( >= 4 )
*       initial_string_count - number of strings to hold as initial allocation
*       initial_hash_chains - size of hash table, must be power of 4
*       max_chain_length - max number of elements in hash chain before signalling growth
*
*       Returns: pointer to dictionary
*               NULL - error creating dictionary
*               non-NULL - sucessful creation
*************************************************************************/

DICTIONARY *dict_create(
        const long toc_size,
        const long initial_string_count,
        const long initial_hash_chains,
        const long max_chain_length )
{
        DICTIONARY      *dtemp = NULL;
        STRING_ENTRY    *stemp = NULL;
        long    *ltemp = NULL;
        char    *sltemp;
        long    i, j, tocsize;

        /* check for a power of 4 in number of initial hash entries */
        switch(initial_hash_chains) {
        case 0x00000004:
        case 0x00000010:
        case 0x00000040:
        case 0x00000100:
        case 0x00000400:
        case 0x00001000:
        case 0x00004000:
        case 0x00010000:
        case 0x00040000:
        case 0x00100000:
        case 0x00400000:
        case 0x01000000:
        case 0x04000000:
                break;
        default:
                return NULL;
        }

        /* Allocate the dictionary structure */
        if ((dtemp = (DICTIONARY *)malloc(sizeof(DICTIONARY))) == NULL)
                goto err_exit;

        /* string count has to be within range */
        j = initial_string_count;
        if (j > 0x04000000)
                return NULL;

        /* force a reasonable value */
        if (j < 100)
                j = 100;

        /* Allocate the string table and string array */
        if ((stemp = (STRING_ENTRY *)malloc(sizeof(STRING_ENTRY) * j)) == NULL)
                goto err_exit;
        if ((sltemp = (char *)malloc(8 * j)) == NULL)
                goto err_exit;

        /* Allocate the hash table */
        if ((ltemp = (long *)malloc(sizeof(long) * initial_hash_chains)) == NULL)
                goto err_exit;

        /* Allocate the parameter array */
        if ( (dtemp->parm = (DICT_PARM_ENTRY *)malloc(14*sizeof(DICT_PARM_ENTRY))) == NULL)
                goto err_exit;

        /* Allocate the signature structure */
        if ( (dtemp->sig=(DICT_SIG*)malloc(sizeof(DICT_SIG))) == NULL )
                goto err_exit;

        /* Allocate the Table of Contents */
        tocsize = toc_size;
        if ( tocsize < 4 )
           tocsize = 4;
        dtemp->sig->toc_size = tocsize;
        if ( (dtemp->toc=(DICT_TOC_ENTRY*)malloc(dtemp->sig->toc_size*sizeof(DICT_TOC_ENTRY))) == NULL )
                goto err_exit;

        dtemp->check_value = DICT_VALIDATE;                     /* validation value */
        dtemp->flags = DICT_NONE;                               /* no flags set */
        dtemp->entry_count = 0;                                 /* nothing in either table */

        dtemp->string_table = stemp;                            /* connect string table */
        dtemp->string_max = j;                                  /* size of string table */
        dtemp->scan_string_index = DICT_ENTRY_NONE;             /* not pointing to anything in scan */
        dtemp->string_growth_count = 0;                         /* haven't grown any */

        dtemp->string_array = sltemp;                           /* character array for strings */
        dtemp->array_size = j * 8;                              /* how many bytes available */
        dtemp->array_used = 0;                                  /* nothing used yet */
        dtemp->array_growth_count = 0;                          /* haven't grown any */

        /* force maximum hash chain length to a reasonable value */
        i = max_chain_length;
        if (i < 1 || i > 200)
                i = 200;
        dtemp->chains = ltemp;                                  /* connect hash table */
        dtemp->longest_chain_length = 0;                        /* nothing in table yet */
        dtemp->allowable_chain_length = i;                      /* chain length limit */
        dtemp->table_size = initial_hash_chains;                /* size of hash table */
        dtemp->hash_mask = initial_hash_chains - 1;             /* mask for mod() function */
        dtemp->hash_growth_count = 0;                           /* haven't grown any */

        /* string table is empty */
        for (i = 0 ; i < j ; i++) {
                dtemp->string_table[i].string_offset = DICT_ENTRY_NONE;
                dtemp->string_table[i].count = 0;
                dtemp->string_table[i].next = DICT_ENTRY_NONE;
                dtemp->string_table[i].flags = 0;
                dtemp->string_table[i].hash_value = 0;
        }

        /* no entries chained */
        for (i = 0; i < initial_hash_chains; i++)
                dtemp->chains[i] = DICT_ENTRY_NONE;

        /* Initialize hook to extended dictionary structure  */
        dtemp->ext = NULL;

        /* Set up the parameter array */
        if ( dict_set_parm_ids(dtemp) == FALSE )
                goto err_exit;
        if ( dict_set_parm_values(dtemp) == FALSE )
                goto err_exit;

        /* Set up the Table of Contents */
        strcpy( dtemp->toc[0].id , "PARM" );
        dtemp->toc[0].size = dtemp->sig->nparms * sizeof(DICT_PARM_ENTRY);
        dtemp->toc[0].ptr = dtemp->parm;
        dtemp->toc[0].type = 0;

        strcpy( dtemp->toc[1].id , "HASH" );
        dtemp->toc[1].size = dtemp->table_size * sizeof(long);
        dtemp->toc[1].ptr = dtemp->chains;
        dtemp->toc[1].type = 0;

        strcpy( dtemp->toc[2].id , "STTB" );
        dtemp->toc[2].size = dtemp->string_max * sizeof(char);
        dtemp->toc[2].ptr = dtemp->string_table;
        dtemp->toc[2].type = 0;

        strcpy( dtemp->toc[3].id , "STAR" );
        dtemp->toc[3].size = dtemp->array_size * sizeof(STRING_ENTRY);
        dtemp->toc[3].ptr = dtemp->string_array;
        dtemp->toc[3].type = 0;

        /* Set up the signature */
        dtemp->sig->check_value = DICT_VALIDATE;
        dtemp->sig->toc_size = tocsize;
        dtemp->sig->nparms = 14;

        return dtemp;

        /* error exit - saves duplicate code */
err_exit:

        dict_destroy( dtemp );
        return NULL;
}

/*************************************************************************
*       dict_destroy: discard a dictionary and its contents
*               multiple calls to destroy the same dictionary is safe
*
*       dict - pointer to a dictionary to discard
*
*       Returns: status code
*               TRUE - dictionary destroyed
*               FALSE - error when destroying dictionary
*       Note: Does free the dictionary structure itself.
*************************************************************************/
BOOLEANC dict_destroy(DICTIONARY *dict)
{
        int  i;

        /* have to point to something */
        if (dict == NULL)
                return FALSE;

        /* check value has to be OK */
        if (dict->check_value != DICT_VALIDATE)
                return FALSE;

        if ( (dict->sig==NULL) || (dict->toc==NULL) || (dict->parm==NULL) )
                return FALSE;

        /* Free the type=0 tables */
        for ( i = 0 ; i < dict->sig->toc_size ; i++ ) {
           if ( dict->toc[i].ptr != NULL && dict->toc[i].type == 0 )
                free( dict->toc[i].ptr );
        } /* endfor */

        /* Free the Table of Contents and signature */
        free( dict->toc );
        free( dict->sig );

        /* Free the dictionary structure itself */
        free( dict );

        return TRUE;
}

/*************************************************************************
*       dict_insert: add entries into the dictionary, growing as necessary
*
*       dict - pointer to dictionary to insert into
*       s - string to insert
*       count - count of occurences of string
*       flags - flag bits associated with the string
*       number - pointer to long to return word number
*
*       Returns: pointer to new entry in dictionary
*               NULL - error during insert
*               non-NULL - pointer to inserted or updated entry
*
*       Notes: if the entry already exists, increment the count.
*       If NULL is returned, the dictionary state can no longer
*       be trusted. Terminating with an error code would be
*       safest.
*************************************************************************/
STRING_ENTRY *dict_insert(DICTIONARY *dict, char *s,
        const long occurences, const unsigned long flags,
        void *any_ptr, long *number)
{
        unsigned long   hash_value;
        long    hash_index;
        long    string_index = -1;
        long    he2;
        int     chain_len;

        /* have to point to something */
        if (dict == NULL)
                return FALSE;

        /* check value has to be OK */
        if (dict->check_value != DICT_VALIDATE)
                return FALSE;

        /* must have a string */
        if (s == NULL) {
                *number = -1;
                return FALSE;
        }

        /* must be non-NULL */
        if (s[0] == '\0') {
                *number = -1;
                return FALSE;
        }

        /* figure out which chain it should go into */
        hash_value = hash_string(s);
        hash_index = hash_value & dict->hash_mask;

        /* look for the entry in the chain */
        for (he2 = dict->chains[hash_index], chain_len = 0; he2 != DICT_ENTRY_NONE;
                        he2 = dict->string_table[he2].next, chain_len++) {
                char    *sa = &dict->string_array[dict->string_table[he2].string_offset];

                /* compare hash_value and then string, in that order, for speed */
                if (dict->string_table[he2].hash_value == hash_value && !strcmp(s, sa)) {
                        string_index = he2;
                        break;
                }
        }

        /* string wasn't found */
        if (string_index < 0) {
                /* make sure there is room in string entry table */
                if (dict->entry_count + 1 > dict->string_max) {
                        /* make new table 20% bigger */
                        dict->string_max = (dict->string_max * 6) / 5;
                        dict->string_table = (STRING_ENTRY *)realloc(dict->string_table,
                                        sizeof(STRING_ENTRY) * dict->string_max);
                        if (dict->string_table == NULL)
                                return NULL;
                        dict->string_growth_count++;
                }
                string_index = dict->entry_count;
                dict->entry_count++;

                /* make sure there is room in the string array */
                if (dict->array_used + (long)strlen(s) + 1 > dict->array_size) {
                        dict->array_size = (dict->array_size * 6) / 5;
                        dict->string_array = (char *) realloc(dict->string_array,
                                dict->array_size);
                        if (dict->string_array == NULL)
                                return NULL;
                }

                /* fill in starting values */
                strcpy(&dict->string_array[dict->array_used], s);
                dict->string_table[string_index].string_offset = dict->array_used;
                dict->array_used += (long) strlen(s) + 1;
                dict->string_table[string_index].count = 0 ;
                dict->string_table[string_index].flags = DICT_NONE;
                dict->string_table[string_index].any_ptr = any_ptr; /* (AK:04/04/95) */
                dict->string_table[string_index].hash_value = hash_value;

                /* hook entry at beginning of chain */
                dict->string_table[string_index].next = dict->chains[hash_index];
                dict->chains[hash_index] = string_index;

                /* record chain lengths */
                chain_len++;
                if (chain_len > dict->longest_chain_length)
                        dict->longest_chain_length = chain_len;

                /* if a chain is too long */
                if (chain_len > dict->allowable_chain_length) {
                        long    new_size = dict->table_size * 4;
                        long    new_mask = new_size - 1;
                        long    *hetemp;
                        long    i;
                        int     longest_chain;

                        if ((hetemp = (long *)malloc(sizeof(long) * new_size)) == NULL)
                                return NULL;

                        /* hash table chains are empty */
                        for (i = 0; i < new_size; i++)
                                hetemp[i] = DICT_ENTRY_NONE;

                        /* reset all chains */
                        for (i = 0; i < dict->entry_count; i++)
                                dict->string_table[i].next = DICT_ENTRY_NONE;

                        /* recreate hash table entries by reinserting all strings */
                        for (i = 0; i < dict->entry_count; i++) {
                                long    he = dict->string_table[i].hash_value & new_mask;

                                dict->string_table[i].next = hetemp[he];
                                hetemp[he] = i;
                        }

                        /* find longest chain length */
                        for (i = 0, longest_chain = 0; i < new_size; i++) {
                                int     len;
                                long    cur;

                                for (cur = hetemp[i], len = 0; cur != DICT_ENTRY_NONE;
                                                cur = dict->string_table[cur].next)
                                    len++;
                                        ;
                                if (len > longest_chain)
                                        longest_chain = len;
                        }

                        /* delete old table and attach new one */
                        free(dict->chains);
                        dict->chains = hetemp;
                        dict->longest_chain_length = longest_chain;
                        dict->table_size = new_size;
                        dict->hash_mask = new_mask;

                        /* keep track of growth */
                        dict->hash_growth_count++;
                }
        }

        dict->string_table[string_index].count += occurences;
        dict->string_table[string_index].flags |= flags;
        *number = string_index;

        return &dict->string_table[string_index];
}

/*************************************************************************
*       dict_delete: deletes an entry from the dictionary
*       (Actually, only decrements the entry's count)
*
*       dict - pointer to dictionary to delete
*       s - string to find and delete
*       count - count to decrement entry by
*
*       Returns: status code
*               TRUE - entry has been deleted
*               FALSE - entry wasn't found or error occured
*************************************************************************/
BOOLEANC dict_delete(const DICTIONARY *dict, const char *s, const long count)
{
        STRING_ENTRY    *se;
        long    n;

        /* find the string */
        if ((se = dict_search(dict, s, &n)) == NULL)
                return FALSE;

        /* decrement count and make sure it stays valid */
        se->count -= count;
        if (se->count < 0)
                se->count = 0;
        return TRUE;
}

/*************************************************************************
*       dict_search: look for entries in the dictionary
*
*       dict - pointer to dictionary to search
*       s - string to search for
*       number - pointer to long to return string number
*
*       Returns: pointer to string entry
*               NULL - entry not found
*               non-NULL - pointer to string entry
*************************************************************************/
STRING_ENTRY *dict_search(const DICTIONARY *dict, const char *s,
        long *number)
{
        unsigned long   hash_value;
        long    hash_index;
        long    string_index = -1;
        long    he;

        /* have to point to something */
        if (dict == NULL)
                return NULL;

        /* check value has to be OK */
        if (dict->check_value != DICT_VALIDATE)
                return NULL;

        /* must have a string */
        if (s == NULL) {
                *number = -1;
                return NULL;
        }

        /* must be non-NULL */
        if (s[0] == '\0') {
                *number = -1;
                return FALSE;
        }

        /* figure out which chain it should be in */
        hash_value = hash_string(s);
        hash_index = hash_value & dict->hash_mask;

        /* look for the entry in the chain */
        for (he = dict->chains[hash_index]; he != DICT_ENTRY_NONE;
                        he = dict->string_table[he].next) {
                char    *sa = (char*)(&dict->string_array[dict->string_table[he].string_offset]);

                /* compare hash_value and then string, in that order, for speed */
                if (dict->string_table[he].hash_value == hash_value && !strcmp(s, sa)) {
                        string_index = he;
                        break;
                }
        }
        if (string_index < 0) {
                *number = -1;
                return NULL;
        }

        *number = string_index;
        return dict->string_table+string_index;
}

/*************************************************************************
*       dict_union: merges contents of 2 dictionaries into
*       a third
*
*       dict1 - pointer to dictionary 1
*       dict2 - pointer to dictionary 2
*
*       Returns: dictionary pointer
*               NULL - failed to merge
*               non-NULL - pointer to new dictionary
*
*       Notes: entries of the same string have their counts
*       added and their flags ORed together.
*************************************************************************/
DICTIONARY *dict_union( const DICTIONARY *dict1 , const DICTIONARY *dict2 )
{
        DICTIONARY    *dict = NULL;
        STRING_ENTRY  *se, *se2;
        long          initial_string_count, initial_hash_chains, max_chain_length;
        long          i, string_index;

        /***********
        **  Initialize the new dictionary.
        ***********/

        if ( dict1==NULL || dict2==NULL )
                goto err_exit;
        if ((dict=(DICTIONARY *)malloc(sizeof(DICTIONARY))) == NULL)
                goto err_exit;

        initial_string_count = dict1->string_max;
        initial_hash_chains = dict1->table_size;
        max_chain_length = dict1->allowable_chain_length;
        dict = dict_create( 4,
                            initial_string_count,
                            initial_hash_chains,
                            max_chain_length );

        /***********
        **  Copy the entries from dict1 into the new dictionary.
        ***********/

        for ( i = 0 ; i < dict1->entry_count ; i++ ) {
                se = (STRING_ENTRY*)&dict1->string_table[i];
                if ( se->count > 0 ) {
                        se2 = dict_insert(
                                  dict,
                                  dict1->string_array+se->string_offset,
                                  se->count,
                                  se->flags,
                                  NULL,
                                  &string_index );
                        if ( se2 == NULL )
                                goto err_exit;
                } /* endif */
        } /* endfor */

        /*  Merge the entries from dict2 into the new dictionary. */
        if ( dict_merge(dict,dict2,FALSE) == FALSE )
                goto err_exit;

        /*  Success. Return a pointer to the new dictionary.  */
        return( dict );

        /*  Failure. Ignominiously erase our tracks and return NULL. */
err_exit:
        dict_destroy( dict );
        return NULL;
}

/*************************************************************************
*       dict_merge: merges the contents of a dictionary into
*       another one, updating the contents of the destination
*
*       dst - dictionary to update
*       src - dictionary to add
*       move - boolean flag
*               TRUE - move to dest
*               FALSE - copy to dest
*
*       Returns: status code
*               TRUE - merge completed
*               FALSE - error on merge
*
*       Notes: entries of the same string have their counts
*       added. At the end of a move, src is empty. If there
*       is an error during a move, the contents of both
*       dictionaries cannot be trusted.
*************************************************************************/
BOOLEANC dict_merge(const DICTIONARY *dst, const DICTIONARY *src, const BOOLEANC move)
{
        STRING_ENTRY  *se, *se2;
        DICTIONARY    *dict1, *dict2;
        long          i, string_index, index;

        dict1 = (DICTIONARY*)src;
        dict2 = (DICTIONARY*)dst;

        /***********
        **  Copy the dictionary entries into the new dictionary.
        ***********/

        for ( i = 0 ; i < dict1->entry_count ; i++ ) {
                se = (STRING_ENTRY*)&dict1->string_table[i];
                if ( se->count > 0 ) {
                        se2 = dict_insert(
                                  dict2,
                                  dict1->string_array+se->string_offset,
                                  se->count,
                                  se->flags,
                                  NULL,
                                  &string_index );
                        if ( se2 == NULL )
                                goto err_exit;
                } /* endif */
        } /* endfor */

        /***********
        **  Set up the dictionary parameter vector.
        ***********/

        if ( dict_set_parm_values(dict2) == FALSE )
                return( FALSE );

        /***********
        **  Update the table of contents.
        **     PARM HASH STTB STAR
        ***********/

        if ( (index=dict_toc_index(dict2,"HASH")) == -1)
                goto err_exit;
        dict2->toc[index].size = dict2->table_size * sizeof(long);
        dict2->toc[index].ptr = dict2->chains;

        if ( (index=dict_toc_index(dict2,"STTB")) == -1)
                goto err_exit;
        dict2->toc[index].size = dict2->string_max * sizeof(char);
        dict2->toc[index].ptr = dict2->string_table;

        if ( (index=dict_toc_index(dict2,"STAR")) == -1)
                goto err_exit;
        dict2->toc[index].size = dict2->array_size * sizeof(STRING_ENTRY);
        dict2->toc[index].ptr = dict2->string_array;

        /***********
        **  Update the signature
        ***********/

        dict2->sig->checksum =
                compute_checksum( 4*sizeof(DICT_TOC_ENTRY) , (char*)(dict2->toc) );

        /***********
        **  If this is a move, destroy the source dictionary
        ***********/

        if ( move == TRUE )
                if ( dict_destroy(dict1) == FALSE )
                        goto err_exit;

        /***********
        **  Success
        ***********/

        return( TRUE );

        /***********
        **  Failure
        ***********/

err_exit:
        dict_destroy( dict2 );
        return FALSE;
}

/*************************************************************************
*       dict_string_by_number: return string pointer for
*       a given string number
*
*       number - string number
*
*       Returns: pointer to string entry
*               NULL - entry not found
*               non-NULL - pointer to string entry
*************************************************************************/
STRING_ENTRY *dict_string_by_number(const DICTIONARY *dict, const long number)
{
        if (dict == NULL)
                return NULL;

        /* check value has to be OK */
        if (dict->check_value != DICT_VALIDATE)
                return NULL;

        /* string number has to be within range */
        if (number < 0 || number >= dict->entry_count)
                return NULL;

        return dict->string_table+number;
}

/*************************************************************************
*       dict_scan_begin: begin a scan of the dictionary
*
*       dict - pointer to dictionary to scan
*
*       Returns: status code
*               TRUE - scan initialized
*               FALSE - error
*
*       Notes: if a scan is already in progress, it is
*       abandoned and the scan is reset to the
*       beginning.
*************************************************************************/
BOOLEANC dict_scan_begin(DICTIONARY *dict)
{
        /* have to point to something */