view urogue/dict.c @ 310:827441d05b3e

Advanced Rogue family: fix some potential buffer overflows. Some code for determining the score file location assumed that PATH_MAX would be less than 1024, which cannot be guaranteed. Advanced Rogue 5 and 7, and XRogue, have had the buffers for the file name enlarged. UltraRogue never called the functions, so the code has been deleted instead.
author John "Elwin" Edwards
date Mon, 03 May 2021 19:05:37 -0400
parents 13b482bd9e66
children
line wrap: on
line source

/*
    dict.c

    UltraRogue: The Ultimate Adventure in the Dungeons of Doom
    Copyright (C) 1995 Herb Chong
    All rights reserved.

    See the file LICENSE.TXT for full copyright and licensing information.
*/

/******************
**  Change history:
**
**  (AK:04/03/95) - In dict_create, initialize hook for extensions to dictionary structure.
**  (AK:04/04/95) - In dict_insert, only set any_ptr when a new string entry is created.
**  (AK:04/17/95) - Added dict_union and dict_merge.
**  (AK:04/18/95) - In dict_create, added code to create signature,
**                  table of contents, and parameter array.
**  (AK:04/18/95) - Revised dict_load, dict_save
**  (AK:04/18/95) - Added dict_import
**
******************/

#include <stdlib.h>
#include <string.h>
#if !defined(OS2) && !defined(_WIN32)
   #include <unistd.h>
#endif

#include "dict.h"
#include "dictutil.h"

#define BUFLEN      300

/*************************************************************************
*       Generic dictionary and hash table functions for word and
*       string storage.
*
*       Implements generic dictionary management by using a
*       hash table with direct chaining.  All terms are stored in a
*       separate, unsorted, string table. When inserting into the
*       hash table exceeds the current size, allocate a new hash table
*       and rehash the old contents into the new table. When the string
*       table fills, append more empty entries onto the old table and
*       continue adding. Inserting a string that already exists increments
*       the count for the string. Deleting a string only decrements the
*       count to no less than 0. This will allow a 0 occurence string to be
*       retrieved. To remove the 0 occurence string table entries, you must
*       rebuild the dictionary. Merging from one dictionary to a new one will
*       do this because it only copies nonzero occurences.
*
*       Assumptions:
*               unsigned long >= 32 bits
*               int >= 16 bits
*               char == 8 bits
*               number of entries < 2^28
*************************************************************************/

/*************************************************************************
*       hash_string: calculate hash value for string, modified
*       version of hashpjw() from the new Dragon book.
*
*       s - string to calculate hash for
*
*       Returns: hash value
*************************************************************************/
static unsigned long hash_string(const char *s)
{
        unsigned long h = 0;
        unsigned long g = 0;

        while (*s) {
                h <<= 4;
                h += *s++;
                if ((g = h & 0xf0000000) != 0)
                        h ^= g >> 24;
        }
        return h ^ g;
}

/*************************************************************************
*       dict_create: create a dictionary and initialize its structures
*
*       toc_size - number of entries in table of contents ( >= 4 )
*       initial_string_count - number of strings to hold as initial allocation
*       initial_hash_chains - size of hash table, must be power of 4
*       max_chain_length - max number of elements in hash chain before signalling growth
*
*       Returns: pointer to dictionary
*               NULL - error creating dictionary
*               non-NULL - sucessful creation
*************************************************************************/

DICTIONARY *dict_create(
        const long toc_size,
        const long initial_string_count,
        const long initial_hash_chains,
        const long max_chain_length )
{
        DICTIONARY      *dtemp = NULL;
        STRING_ENTRY    *stemp = NULL;
        long    *ltemp = NULL;
        char    *sltemp;
        long    i, j, tocsize;

        /* check for a power of 4 in number of initial hash entries */
        switch(initial_hash_chains) {
        case 0x00000004:
        case 0x00000010:
        case 0x00000040:
        case 0x00000100:
        case 0x00000400:
        case 0x00001000:
        case 0x00004000:
        case 0x00010000:
        case 0x00040000:
        case 0x00100000:
        case 0x00400000:
        case 0x01000000:
        case 0x04000000:
                break;
        default:
                return NULL;
        }

        /* Allocate the dictionary structure */
        if ((dtemp = (DICTIONARY *)malloc(sizeof(DICTIONARY))) == NULL)
                goto err_exit;

        /* string count has to be within range */
        j = initial_string_count;
        if (j > 0x04000000)
                return NULL;

        /* force a reasonable value */
        if (j < 100)
                j = 100;

        /* Allocate the string table and string array */
        if ((stemp = (STRING_ENTRY *)malloc(sizeof(STRING_ENTRY) * j)) == NULL)
                goto err_exit;
        if ((sltemp = (char *)malloc(8 * j)) == NULL)
                goto err_exit;

        /* Allocate the hash table */
        if ((ltemp = (long *)malloc(sizeof(long) * initial_hash_chains)) == NULL)
                goto err_exit;

        /* Allocate the parameter array */
        if ( (dtemp->parm = (DICT_PARM_ENTRY *)malloc(14*sizeof(DICT_PARM_ENTRY))) == NULL)
                goto err_exit;

        /* Allocate the signature structure */
        if ( (dtemp->sig=(DICT_SIG*)malloc(sizeof(DICT_SIG))) == NULL )
                goto err_exit;

        /* Allocate the Table of Contents */
        tocsize = toc_size;
        if ( tocsize < 4 )
           tocsize = 4;
        dtemp->sig->toc_size = tocsize;
        if ( (dtemp->toc=(DICT_TOC_ENTRY*)malloc(dtemp->sig->toc_size*sizeof(DICT_TOC_ENTRY))) == NULL )
                goto err_exit;

        dtemp->check_value = DICT_VALIDATE;                     /* validation value */
        dtemp->flags = DICT_NONE;                               /* no flags set */
        dtemp->entry_count = 0;                                 /* nothing in either table */

        dtemp->string_table = stemp;                            /* connect string table */
        dtemp->string_max = j;                                  /* size of string table */
        dtemp->scan_string_index = DICT_ENTRY_NONE;             /* not pointing to anything in scan */
        dtemp->string_growth_count = 0;                         /* haven't grown any */

        dtemp->string_array = sltemp;                           /* character array for strings */
        dtemp->array_size = j * 8;                              /* how many bytes available */
        dtemp->array_used = 0;                                  /* nothing used yet */
        dtemp->array_growth_count = 0;                          /* haven't grown any */

        /* force maximum hash chain length to a reasonable value */
        i = max_chain_length;
        if (i < 1 || i > 200)
                i = 200;
        dtemp->chains = ltemp;                                  /* connect hash table */
        dtemp->longest_chain_length = 0;                        /* nothing in table yet */
        dtemp->allowable_chain_length = i;                      /* chain length limit */
        dtemp->table_size = initial_hash_chains;                /* size of hash table */
        dtemp->hash_mask = initial_hash_chains - 1;             /* mask for mod() function */
        dtemp->hash_growth_count = 0;                           /* haven't grown any */

        /* string table is empty */
        for (i = 0 ; i < j ; i++) {
                dtemp->string_table[i].string_offset = DICT_ENTRY_NONE;
                dtemp->string_table[i].count = 0;
                dtemp->string_table[i].next = DICT_ENTRY_NONE;
                dtemp->string_table[i].flags = 0;
                dtemp->string_table[i].hash_value = 0;
        }

        /* no entries chained */
        for (i = 0; i < initial_hash_chains; i++)
                dtemp->chains[i] = DICT_ENTRY_NONE;

        /* Initialize hook to extended dictionary structure  */
        dtemp->ext = NULL;

        /* Set up the parameter array */
        if ( dict_set_parm_ids(dtemp) == FALSE )
                goto err_exit;
        if ( dict_set_parm_values(dtemp) == FALSE )
                goto err_exit;

        /* Set up the Table of Contents */
        strcpy( dtemp->toc[0].id , "PARM" );
        dtemp->toc[0].size = dtemp->sig->nparms * sizeof(DICT_PARM_ENTRY);
        dtemp->toc[0].ptr = dtemp->parm;
        dtemp->toc[0].type = 0;

        strcpy( dtemp->toc[1].id , "HASH" );
        dtemp->toc[1].size = dtemp->table_size * sizeof(long);
        dtemp->toc[1].ptr = dtemp->chains;
        dtemp->toc[1].type = 0;

        strcpy( dtemp->toc[2].id , "STTB" );
        dtemp->toc[2].size = dtemp->string_max * sizeof(char);
        dtemp->toc[2].ptr = dtemp->string_table;
        dtemp->toc[2].type = 0;

        strcpy( dtemp->toc[3].id , "STAR" );
        dtemp->toc[3].size = dtemp->array_size * sizeof(STRING_ENTRY);
        dtemp->toc[3].ptr = dtemp->string_array;
        dtemp->toc[3].type = 0;

        /* Set up the signature */
        dtemp->sig->check_value = DICT_VALIDATE;
        dtemp->sig->toc_size = tocsize;
        dtemp->sig->nparms = 14;

        return dtemp;

        /* error exit - saves duplicate code */
err_exit:

        dict_destroy( dtemp );
        return NULL;
}

/*************************************************************************
*       dict_destroy: discard a dictionary and its contents
*               multiple calls to destroy the same dictionary is safe
*
*       dict - pointer to a dictionary to discard
*
*       Returns: status code
*               TRUE - dictionary destroyed
*               FALSE - error when destroying dictionary
*       Note: Does free the dictionary structure itself.
*************************************************************************/
BOOLEANC dict_destroy(DICTIONARY *dict)
{
        int  i;

        /* have to point to something */
        if (dict == NULL)
                return FALSE;

        /* check value has to be OK */
        if (dict->check_value != DICT_VALIDATE)
                return FALSE;

        if ( (dict->sig==NULL) || (dict->toc==NULL) || (dict->parm==NULL) )
                return FALSE;

        /* Free the type=0 tables */
        for ( i = 0 ; i < dict->sig->toc_size ; i++ ) {
           if ( dict->toc[i].ptr != NULL && dict->toc[i].type == 0 )
                free( dict->toc[i].ptr );
        } /* endfor */

        /* Free the Table of Contents and signature */
        free( dict->toc );
        free( dict->sig );

        /* Free the dictionary structure itself */
        free( dict );

        return TRUE;
}

/*************************************************************************
*       dict_insert: add entries into the dictionary, growing as necessary
*
*       dict - pointer to dictionary to insert into
*       s - string to insert
*       count - count of occurences of string
*       flags - flag bits associated with the string
*       number - pointer to long to return word number
*
*       Returns: pointer to new entry in dictionary
*               NULL - error during insert
*               non-NULL - pointer to inserted or updated entry
*
*       Notes: if the entry already exists, increment the count.
*       If NULL is returned, the dictionary state can no longer
*       be trusted. Terminating with an error code would be
*       safest.
*************************************************************************/
STRING_ENTRY *dict_insert(DICTIONARY *dict, char *s,
        const long occurences, const unsigned long flags,
        void *any_ptr, long *number)
{
        unsigned long   hash_value;
        long    hash_index;
        long    string_index = -1;
        long    he2;
        int     chain_len;

        /* have to point to something */
        if (dict == NULL)
                return FALSE;

        /* check value has to be OK */
        if (dict->check_value != DICT_VALIDATE)
                return FALSE;

        /* must have a string */
        if (s == NULL) {
                *number = -1;
                return FALSE;
        }

        /* must be non-NULL */
        if (s[0] == '\0') {
                *number = -1;
                return FALSE;
        }

        /* figure out which chain it should go into */
        hash_value = hash_string(s);
        hash_index = hash_value & dict->hash_mask;

        /* look for the entry in the chain */
        for (he2 = dict->chains[hash_index], chain_len = 0; he2 != DICT_ENTRY_NONE;
                        he2 = dict->string_table[he2].next, chain_len++) {
                char    *sa = &dict->string_array[dict->string_table[he2].string_offset];

                /* compare hash_value and then string, in that order, for speed */
                if (dict->string_table[he2].hash_value == hash_value && !strcmp(s, sa)) {
                        string_index = he2;
                        break;
                }
        }

        /* string wasn't found */
        if (string_index < 0) {
                /* make sure there is room in string entry table */
                if (dict->entry_count + 1 > dict->string_max) {
                        /* make new table 20% bigger */
                        dict->string_max = (dict->string_max * 6) / 5;
                        dict->string_table = (STRING_ENTRY *)realloc(dict->string_table,
                                        sizeof(STRING_ENTRY) * dict->string_max);
                        if (dict->string_table == NULL)
                                return NULL;
                        dict->string_growth_count++;
                }
                string_index = dict->entry_count;
                dict->entry_count++;

                /* make sure there is room in the string array */
                if (dict->array_used + (long)strlen(s) + 1 > dict->array_size) {
                        dict->array_size = (dict->array_size * 6) / 5;
                        dict->string_array = (char *) realloc(dict->string_array,
                                dict->array_size);
                        if (dict->string_array == NULL)
                                return NULL;
                }

                /* fill in starting values */
                strcpy(&dict->string_array[dict->array_used], s);
                dict->string_table[string_index].string_offset = dict->array_used;
                dict->array_used += (long) strlen(s) + 1;
                dict->string_table[string_index].count = 0 ;
                dict->string_table[string_index].flags = DICT_NONE;
                dict->string_table[string_index].any_ptr = any_ptr; /* (AK:04/04/95) */
                dict->string_table[string_index].hash_value = hash_value;

                /* hook entry at beginning of chain */
                dict->string_table[string_index].next = dict->chains[hash_index];
                dict->chains[hash_index] = string_index;

                /* record chain lengths */
                chain_len++;
                if (chain_len > dict->longest_chain_length)
                        dict->longest_chain_length = chain_len;

                /* if a chain is too long */
                if (chain_len > dict->allowable_chain_length) {
                        long    new_size = dict->table_size * 4;
                        long    new_mask = new_size - 1;
                        long    *hetemp;
                        long    i;
                        int     longest_chain;

                        if ((hetemp = (long *)malloc(sizeof(long) * new_size)) == NULL)
                                return NULL;

                        /* hash table chains are empty */
                        for (i = 0; i < new_size; i++)
                                hetemp[i] = DICT_ENTRY_NONE;

                        /* reset all chains */
                        for (i = 0; i < dict->entry_count; i++)
                                dict->string_table[i].next = DICT_ENTRY_NONE;

                        /* recreate hash table entries by reinserting all strings */
                        for (i = 0; i < dict->entry_count; i++) {
                                long    he = dict->string_table[i].hash_value & new_mask;

                                dict->string_table[i].next = hetemp[he];
                                hetemp[he] = i;
                        }

                        /* find longest chain length */
                        for (i = 0, longest_chain = 0; i < new_size; i++) {
                                int     len;
                                long    cur;

                                for (cur = hetemp[i], len = 0; cur != DICT_ENTRY_NONE;
                                                cur = dict->string_table[cur].next)
                                    len++;
                                        ;
                                if (len > longest_chain)
                                        longest_chain = len;
                        }

                        /* delete old table and attach new one */
                        free(dict->chains);
                        dict->chains = hetemp;
                        dict->longest_chain_length = longest_chain;
                        dict->table_size = new_size;
                        dict->hash_mask = new_mask;

                        /* keep track of growth */
                        dict->hash_growth_count++;
                }
        }

        dict->string_table[string_index].count += occurences;
        dict->string_table[string_index].flags |= flags;
        *number = string_index;

        return &dict->string_table[string_index];
}

/*************************************************************************
*       dict_delete: deletes an entry from the dictionary
*       (Actually, only decrements the entry's count)
*
*       dict - pointer to dictionary to delete
*       s - string to find and delete
*       count - count to decrement entry by
*
*       Returns: status code
*               TRUE - entry has been deleted
*               FALSE - entry wasn't found or error occured
*************************************************************************/
BOOLEANC dict_delete(const DICTIONARY *dict, const char *s, const long count)
{
        STRING_ENTRY    *se;
        long    n;

        /* find the string */
        if ((se = dict_search(dict, s, &n)) == NULL)
                return FALSE;

        /* decrement count and make sure it stays valid */
        se->count -= count;
        if (se->count < 0)
                se->count = 0;
        return TRUE;
}

/*************************************************************************
*       dict_search: look for entries in the dictionary
*
*       dict - pointer to dictionary to search
*       s - string to search for
*       number - pointer to long to return string number
*
*       Returns: pointer to string entry
*               NULL - entry not found
*               non-NULL - pointer to string entry
*************************************************************************/
STRING_ENTRY *dict_search(const DICTIONARY *dict, const char *s,
        long *number)
{
        unsigned long   hash_value;
        long    hash_index;
        long    string_index = -1;
        long    he;

        /* have to point to something */
        if (dict == NULL)
                return NULL;

        /* check value has to be OK */
        if (dict->check_value != DICT_VALIDATE)
                return NULL;

        /* must have a string */
        if (s == NULL) {
                *number = -1;
                return NULL;
        }

        /* must be non-NULL */
        if (s[0] == '\0') {
                *number = -1;
                return FALSE;
        }

        /* figure out which chain it should be in */
        hash_value = hash_string(s);
        hash_index = hash_value & dict->hash_mask;

        /* look for the entry in the chain */
        for (he = dict->chains[hash_index]; he != DICT_ENTRY_NONE;
                        he = dict->string_table[he].next) {
                char    *sa = (char*)(&dict->string_array[dict->string_table[he].string_offset]);

                /* compare hash_value and then string, in that order, for speed */
                if (dict->string_table[he].hash_value == hash_value && !strcmp(s, sa)) {
                        string_index = he;
                        break;
                }
        }
        if (string_index < 0) {
                *number = -1;
                return NULL;
        }

        *number = string_index;
        return dict->string_table+string_index;
}

/*************************************************************************
*       dict_union: merges contents of 2 dictionaries into
*       a third
*
*       dict1 - pointer to dictionary 1
*       dict2 - pointer to dictionary 2
*
*       Returns: dictionary pointer
*               NULL - failed to merge
*               non-NULL - pointer to new dictionary
*
*       Notes: entries of the same string have their counts
*       added and their flags ORed together.
*************************************************************************/
DICTIONARY *dict_union( const DICTIONARY *dict1 , const DICTIONARY *dict2 )
{
        DICTIONARY    *dict = NULL;
        STRING_ENTRY  *se, *se2;
        long          initial_string_count, initial_hash_chains, max_chain_length;
        long          i, string_index;

        /***********
        **  Initialize the new dictionary.
        ***********/

        if ( dict1==NULL || dict2==NULL )
                goto err_exit;
        if ((dict=(DICTIONARY *)malloc(sizeof(DICTIONARY))) == NULL)
                goto err_exit;

        initial_string_count = dict1->string_max;
        initial_hash_chains = dict1->table_size;
        max_chain_length = dict1->allowable_chain_length;
        dict = dict_create( 4,
                            initial_string_count,
                            initial_hash_chains,
                            max_chain_length );

        /***********
        **  Copy the entries from dict1 into the new dictionary.
        ***********/

        for ( i = 0 ; i < dict1->entry_count ; i++ ) {
                se = (STRING_ENTRY*)&dict1->string_table[i];
                if ( se->count > 0 ) {
                        se2 = dict_insert(
                                  dict,
                                  dict1->string_array+se->string_offset,
                                  se->count,
                                  se->flags,
                                  NULL,
                                  &string_index );
                        if ( se2 == NULL )
                                goto err_exit;
                } /* endif */
        } /* endfor */

        /*  Merge the entries from dict2 into the new dictionary. */
        if ( dict_merge(dict,dict2,FALSE) == FALSE )
                goto err_exit;

        /*  Success. Return a pointer to the new dictionary.  */
        return( dict );

        /*  Failure. Ignominiously erase our tracks and return NULL. */
err_exit:
        dict_destroy( dict );
        return NULL;
}

/*************************************************************************
*       dict_merge: merges the contents of a dictionary into
*       another one, updating the contents of the destination
*
*       dst - dictionary to update
*       src - dictionary to add
*       move - boolean flag
*               TRUE - move to dest
*               FALSE - copy to dest
*
*       Returns: status code
*               TRUE - merge completed
*               FALSE - error on merge
*
*       Notes: entries of the same string have their counts
*       added. At the end of a move, src is empty. If there
*       is an error during a move, the contents of both
*       dictionaries cannot be trusted.
*************************************************************************/
BOOLEANC dict_merge(const DICTIONARY *dst, const DICTIONARY *src, const BOOLEANC move)
{
        STRING_ENTRY  *se, *se2;
        DICTIONARY    *dict1, *dict2;
        long          i, string_index, index;

        dict1 = (DICTIONARY*)src;
        dict2 = (DICTIONARY*)dst;

        /***********
        **  Copy the dictionary entries into the new dictionary.
        ***********/

        for ( i = 0 ; i < dict1->entry_count ; i++ ) {
                se = (STRING_ENTRY*)&dict1->string_table[i];
                if ( se->count > 0 ) {
                        se2 = dict_insert(
                                  dict2,
                                  dict1->string_array+se->string_offset,
                                  se->count,
                                  se->flags,
                                  NULL,
                                  &string_index );
                        if ( se2 == NULL )
                                goto err_exit;
                } /* endif */
        } /* endfor */

        /***********
        **  Set up the dictionary parameter vector.
        ***********/

        if ( dict_set_parm_values(dict2) == FALSE )
                return( FALSE );

        /***********
        **  Update the table of contents.
        **     PARM HASH STTB STAR
        ***********/

        if ( (index=dict_toc_index(dict2,"HASH")) == -1)
                goto err_exit;
        dict2->toc[index].size = dict2->table_size * sizeof(long);
        dict2->toc[index].ptr = dict2->chains;

        if ( (index=dict_toc_index(dict2,"STTB")) == -1)
                goto err_exit;
        dict2->toc[index].size = dict2->string_max * sizeof(char);
        dict2->toc[index].ptr = dict2->string_table;

        if ( (index=dict_toc_index(dict2,"STAR")) == -1)
                goto err_exit;
        dict2->toc[index].size = dict2->array_size * sizeof(STRING_ENTRY);
        dict2->toc[index].ptr = dict2->string_array;

        /***********
        **  Update the signature
        ***********/

        dict2->sig->checksum =
                compute_checksum( 4*sizeof(DICT_TOC_ENTRY) , (char*)(dict2->toc) );

        /***********
        **  If this is a move, destroy the source dictionary
        ***********/

        if ( move == TRUE )
                if ( dict_destroy(dict1) == FALSE )
                        goto err_exit;

        /***********
        **  Success
        ***********/

        return( TRUE );

        /***********
        **  Failure
        ***********/

err_exit:
        dict_destroy( dict2 );
        return FALSE;
}

/*************************************************************************
*       dict_string_by_number: return string pointer for
*       a given string number
*
*       number - string number
*
*       Returns: pointer to string entry
*               NULL - entry not found
*               non-NULL - pointer to string entry
*************************************************************************/
STRING_ENTRY *dict_string_by_number(const DICTIONARY *dict, const long number)
{
        if (dict == NULL)
                return NULL;

        /* check value has to be OK */
        if (dict->check_value != DICT_VALIDATE)
                return NULL;

        /* string number has to be within range */
        if (number < 0 || number >= dict->entry_count)
                return NULL;

        return dict->string_table+number;
}

/*************************************************************************
*       dict_scan_begin: begin a scan of the dictionary
*
*       dict - pointer to dictionary to scan
*
*       Returns: status code
*               TRUE - scan initialized
*               FALSE - error
*
*       Notes: if a scan is already in progress, it is
*       abandoned and the scan is reset to the
*       beginning.