(* Modified for Poly/ML from SML/NJ Library version 0.2 * * COPYRIGHT (c) 1992 by AT&T Bell Laboratories. * See file mosml/copyrght/copyrght.att for details. * * Original author: John Reppy, AT&T Bell Laboratories, Murray Hill, NJ 07974 * Current version from the Moscow ML distribution, copied by permission. *) (* Polyhash -- polymorphic hashtables as in the SML/NJ Library *) signature Polyhash = sig type ('key, 'data) hash_table val mkTable : ('_key -> int) * ('_key * '_key -> bool) -> int * exn -> ('_key, '_data) hash_table val numItems : ('key, 'data) hash_table -> int val insert : ('_key, '_data) hash_table -> '_key * '_data -> unit val peekInsert : ('_key, '_data) hash_table -> '_key * '_data -> '_data option val find : ('key, 'data) hash_table -> 'key -> 'data val peek : ('key, 'data) hash_table -> 'key -> 'data option val remove : ('key, 'data) hash_table -> 'key -> 'data val listItems : ('key, 'data) hash_table -> ('key * 'data) list val apply : ('key * 'data -> unit) -> ('key, 'data) hash_table -> unit val map : ('_key * 'data -> '_res) -> ('_key, 'data) hash_table -> ('_key, '_res) hash_table val filter : ('key * 'data -> bool) -> ('key, 'data) hash_table -> unit val transform : ('data -> '_res) -> ('_key, 'data) hash_table -> ('_key, '_res) hash_table val copy : ('_key, '_data) hash_table -> ('_key, '_data) hash_table val bucketSizes : ('key, 'data) hash_table -> int list (*Additions due to L. C. Paulson and Jia Meng*) val hashw : word * word -> word val hashw_char : Char.char * word -> word val hashw_int : int * word -> word val hashw_vector : word Vector.vector * word -> word val hashw_string : string * word -> word val hashw_strings : string list * word -> word val hash_string : string -> int (* [('key, 'data) hash_table] is the type of hashtables with keys of type 'key and data values of type 'data. [mkTable (hashVal, sameKey) (sz, exc)] returns a new hashtable, using hash function hashVal and equality predicate sameKey. The sz is a size hint, and exc is the exception raised by function find. It must be the case that sameKey(k1, k2) implies hashVal(k1) = hashVal(k2) for all k1,k2. [numItems htbl] is the number of items in the hash table. [insert htbl (k, d)] inserts data d for key k. If k already had an item associated with it, then the old item is overwritten. [find htbl k] returns d, where d is the data item associated with key k, or raises the exception (given at creation of htbl) if there is no such d. [peek htbl k] returns SOME d, where d is the data item associated with key k, or NONE if there is no such d. [peekInsert htbl (k, d)] inserts data d for key k, if k is not already in the table, returning NONE. If k is already in the table, and the associated data value is d', then returns SOME d' and leaves the table unmodified. [remove htbl k] returns d, where d is the data item associated with key k, removing d from the table; or raises the exception if there is no such d. [listItems htbl] returns a list of the (key, data) pairs in the hashtable. [apply f htbl] applies function f to all (key, data) pairs in the hashtable, in some order. [map f htbl] returns a new hashtable, whose data items have been obtained by applying f to the (key, data) pairs in htbl. The new tables have the same keys, hash function, equality predicate, and exception, as htbl. [filter p htbl] deletes from htbl all data items which do not satisfy predicate p. [transform f htbl] as map, but only the (old) data values are used when computing the new data values. [copy htbl] returns a complete copy of htbl. [bucketSizes htbl] returns a list of the sizes of the buckets. This is to allow users to gauge the quality of their hashing function. *) end structure Polyhash :> Polyhash = struct datatype ('key, 'data) bucket_t = NIL | B of int * 'key * 'data * ('key, 'data) bucket_t datatype ('key, 'data) hash_table = HT of {hashVal : 'key -> int, sameKey : 'key * 'key -> bool, not_found : exn, table : ('key, 'data) bucket_t Array.array ref, n_items : int ref} local (* prim_val andb_ : int -> int -> int = 2 "and"; prim_val lshift_ : int -> int -> int = 2 "shift_left"; *) fun andb_ x y = Word.toInt (Word.andb (Word.fromInt x, Word.fromInt y)); fun lshift_ x y = Word.toInt (Word.<< (Word.fromInt x, Word.fromInt y)); in fun index (i, sz) = andb_ i (sz-1) (* find smallest power of 2 (>= 32) that is >= n *) fun roundUp n = let fun f i = if (i >= n) then i else f (lshift_ i 1) in f 32 end end; (* Create a new table; the int is a size hint and the exception * is to be raised by find. *) fun mkTable (hashVal, sameKey) (sizeHint, notFound) = HT{ hashVal=hashVal, sameKey=sameKey, not_found = notFound, table = ref (Array.array(roundUp sizeHint, NIL)), n_items = ref 0 }; (* conditionally grow a table *) fun growTable (HT{table, n_items, ...}) = let val arr = !table val sz = Array.length arr in if (!n_items >= sz) then let val newSz = sz+sz val newArr = Array.array (newSz, NIL) fun copy NIL = () | copy (B(h, key, v, rest)) = let val indx = index (h, newSz) in Array.update (newArr, indx, B(h, key, v, Array.sub(newArr, indx))); copy rest end fun bucket n = (copy (Array.sub(arr, n)); bucket (n+1)) in (bucket 0) handle _ => (); table := newArr end else () end (* growTable *); (* Insert an item. If the key already has an item associated with it, * then the old item is discarded. *) fun insert (tbl as HT{hashVal, sameKey, table, n_items, ...}) (key, item) = let val arr = !table val sz = Array.length arr val hash = hashVal key val indx = index (hash, sz) fun look NIL = ( Array.update(arr, indx, B(hash, key, item, Array.sub(arr, indx))); inc n_items; growTable tbl; NIL) | look (B(h, k, v, r)) = if ((hash = h) andalso sameKey(key, k)) then B(hash, key, item, r) else (case (look r) of NIL => NIL | rest => B(h, k, v, rest) (* end case *)) in case (look (Array.sub (arr, indx))) of NIL => () | b => Array.update(arr, indx, b) end; (* Insert an item if not there already; if it is there already, then return the old data value and leave the table unmodified.. *) fun peekInsert (tbl as HT{hashVal, sameKey, table, n_items, ...}) (key, item) = let val arr = !table val sz = Array.length arr val hash = hashVal key val indx = index (hash, sz) fun look NIL = (Array.update(arr, indx, B(hash, key, item, Array.sub(arr, indx))); inc n_items; growTable tbl; NONE) | look (B(h, k, v, r)) = if hash = h andalso sameKey(key, k) then SOME v else look r in look (Array.sub (arr, indx)) end; (* find an item, the table's exception is raised if the item doesn't exist *) fun find (HT{hashVal, sameKey, table, not_found, ...}) key = let val arr = !table val sz = Array.length arr val hash = hashVal key val indx = index (hash, sz) fun look NIL = raise not_found | look (B(h, k, v, r)) = if ((hash = h) andalso sameKey(key, k)) then v else look r in look (Array.sub (arr, indx)) end; (* look for an item, return NONE if the item doesn't exist *) fun peek (HT{hashVal, sameKey, table, ...}) key = let val arr = !table val sz = Array.length arr val hash = hashVal key val indx = index (hash, sz) fun look NIL = NONE | look (B(h, k, v, r)) = if ((hash = h) andalso sameKey(key, k)) then SOME v else look r in look (Array.sub (arr, indx)) end; (* Remove an item. The table's exception is raised if * the item doesn't exist. *) fun remove (HT{hashVal, sameKey, not_found, table, n_items}) key = let val arr = !table val sz = Array.length arr val hash = hashVal key val indx = index (hash, sz) fun look NIL = raise not_found | look (B(h, k, v, r)) = if ((hash = h) andalso sameKey(key, k)) then (v, r) else let val (item, r') = look r in (item, B(h, k, v, r')) end val (item, bucket) = look (Array.sub (arr, indx)) in Array.update (arr, indx, bucket); n_items := !n_items - 1; item end (* remove *); (* Return the number of items in the table *) fun numItems (HT{n_items, ...}) = !n_items (* return a list of the items in the table *) fun listItems (HT{table = ref arr, n_items, ...}) = let fun f (_, l, 0) = l | f (~1, l, _) = l | f (i, l, n) = let fun g (NIL, l, n) = f (i-1, l, n) | g (B(_, k, v, r), l, n) = g(r, (k, v)::l, n-1) in g (Array.sub(arr, i), l, n) end in f ((Array.length arr) - 1, [], !n_items) end (* listItems *); (* Apply a function to the entries of the table *) fun apply f (HT{table, ...}) = let fun appF NIL = () | appF (B(_, key, item, rest)) = ( f (key, item); appF rest) val arr = !table val sz = Array.length arr fun appToTbl i = if (i < sz) then (appF (Array.sub (arr, i)); appToTbl(i+1)) else () in appToTbl 0 end (* apply *); (* Map a table to a new table that has the same keys and exception *) fun map f (HT{hashVal, sameKey, table, n_items, not_found}) = let fun mapF NIL = NIL | mapF (B(hash, key, item, rest)) = B(hash, key, f (key, item), mapF rest) val arr = !table val sz = Array.length arr val newArr = Array.array (sz, NIL) fun mapTbl i = if (i < sz) then ( Array.update(newArr, i, mapF (Array.sub(arr, i))); mapTbl (i+1)) else () in mapTbl 0; HT{hashVal=hashVal, sameKey=sameKey, table = ref newArr, n_items = ref(!n_items), not_found = not_found} end (* transform *); (* remove any hash table items that do not satisfy the given * predicate. *) fun filter pred (HT{table, n_items, not_found, ...}) = let fun filterP NIL = NIL | filterP (B(hash, key, item, rest)) = if (pred(key, item)) then B(hash, key, item, filterP rest) else filterP rest val arr = !table val sz = Array.length arr fun filterTbl i = if (i < sz) then ( Array.update (arr, i, filterP (Array.sub (arr, i))); filterTbl (i+1)) else () in filterTbl 0 end (* filter *); (* Map a table to a new table that has the same keys, exception, hash function, and equality function *) fun transform f (HT{hashVal, sameKey, table, n_items, not_found}) = let fun mapF NIL = NIL | mapF (B(hash, key, item, rest)) = B(hash, key, f item, mapF rest) val arr = !table val sz = Array.length arr val newArr = Array.array (sz, NIL) fun mapTbl i = if (i < sz) then ( Array.update(newArr, i, mapF (Array.sub(arr, i))); mapTbl (i+1)) else () in mapTbl 0; HT{hashVal=hashVal, sameKey=sameKey, table = ref newArr, n_items = ref(!n_items), not_found = not_found} end (* transform *); (* Create a copy of a hash table *) fun copy (HT{hashVal, sameKey, table, n_items, not_found}) = let val arr = !table val sz = Array.length arr val newArr = Array.array (sz, NIL) fun mapTbl i = ( Array.update (newArr, i, Array.sub(arr, i)); mapTbl (i+1)) in (mapTbl 0) handle _ => (); HT{hashVal=hashVal, sameKey=sameKey, table = ref newArr, n_items = ref(!n_items), not_found = not_found} end (* copy *); (* returns a list of the sizes of the various buckets. This is to * allow users to gauge the quality of their hashing function. *) fun bucketSizes (HT{table = ref arr, ...}) = let fun len (NIL, n) = n | len (B(_, _, _, r), n) = len(r, n+1) fun f (~1, l) = l | f (i, l) = f (i-1, len (Array.sub (arr, i), 0) :: l) in f ((Array.length arr)-1, []) end (*Added by lcp. This is essentially the described in Compilers: Principles, Techniques, and Tools, by Aho, Sethi and Ullman.*) (*This hash function is recommended in Compilers: Principles, Techniques, and Tools, by Aho, Sethi and Ullman. The hashpjw function, which they particularly recommend, triggers a bug in versions of Poly/ML up to 4.2.0.*) fun hashw (u,w) = Word.+ (u, Word.*(0w65599,w)) fun hashw_char (c,w) = hashw (Word.fromInt (Char.ord c), w); fun hashw_int (i,w) = hashw (Word.fromInt i, w); fun hashw_vector (v,w) = Vector.foldl hashw w v; fun hashw_string (s:string, w) = CharVector.foldl hashw_char w s; fun hashw_strings (ss, w) = List.foldl hashw_string w ss; fun hash_string s = Word.toIntX (hashw_string(s,0w0)); end