...
Run Format

Text file src/pkg/runtime/hashmap.h

     1	// Copyright 2009 The Go Authors. All rights reserved.
     2	// Use of this source code is governed by a BSD-style
     3	// license that can be found in the LICENSE file.
     4	
     5	// This file contains the implementation of Go's map type.
     6	//
     7	// The map is just a hash table.  The data is arranged
     8	// into an array of buckets.  Each bucket contains up to
     9	// 8 key/value pairs.  The low-order bits of the hash are
    10	// used to select a bucket.  Each bucket contains a few
    11	// high-order bits of each hash to distinguish the entries
    12	// within a single bucket.
    13	//
    14	// If more than 8 keys hash to a bucket, we chain on
    15	// extra buckets.
    16	//
    17	// When the hashtable grows, we allocate a new array
    18	// of buckets twice as big.  Buckets are incrementally
    19	// copied from the old bucket array to the new bucket array.
    20	//
    21	// Map iterators walk through the array of buckets and
    22	// return the keys in walk order (bucket #, then overflow
    23	// chain order, then bucket index).  To maintain iteration
    24	// semantics, we never move keys within their bucket (if
    25	// we did, keys might be returned 0 or 2 times).  When
    26	// growing the table, iterators remain iterating through the
    27	// old table and must check the new table if the bucket
    28	// they are iterating through has been moved ("evacuated")
    29	// to the new table.
    30	
    31	// Maximum number of key/value pairs a bucket can hold.
    32	#define BUCKETSIZE 8
    33	
    34	// Maximum average load of a bucket that triggers growth.
    35	#define LOAD 6.5
    36	
    37	// Picking LOAD: too large and we have lots of overflow
    38	// buckets, too small and we waste a lot of space.  I wrote
    39	// a simple program to check some stats for different loads:
    40	// (64-bit, 8 byte keys and values)
    41	//        LOAD    %overflow  bytes/entry     hitprobe    missprobe
    42	//        4.00         2.13        20.77         3.00         4.00
    43	//        4.50         4.05        17.30         3.25         4.50
    44	//        5.00         6.85        14.77         3.50         5.00
    45	//        5.50        10.55        12.94         3.75         5.50
    46	//        6.00        15.27        11.67         4.00         6.00
    47	//        6.50        20.90        10.79         4.25         6.50
    48	//        7.00        27.14        10.15         4.50         7.00
    49	//        7.50        34.03         9.73         4.75         7.50
    50	//        8.00        41.10         9.40         5.00         8.00
    51	//
    52	// %overflow   = percentage of buckets which have an overflow bucket
    53	// bytes/entry = overhead bytes used per key/value pair
    54	// hitprobe    = # of entries to check when looking up a present key
    55	// missprobe   = # of entries to check when looking up an absent key
    56	//
    57	// Keep in mind this data is for maximally loaded tables, i.e. just
    58	// before the table grows.  Typical tables will be somewhat less loaded.
    59	
    60	// Maximum key or value size to keep inline (instead of mallocing per element).
    61	// Must fit in a uint8.
    62	// Fast versions cannot handle big values - the cutoff size for
    63	// fast versions in ../../cmd/gc/walk.c must be at most this value.
    64	#define MAXKEYSIZE 128
    65	#define MAXVALUESIZE 128
    66	
    67	typedef struct Bucket Bucket;
    68	struct Bucket
    69	{
    70		// Note: the format of the Bucket is encoded in ../../cmd/gc/reflect.c and
    71		// ../reflect/type.go.  Don't change this structure without also changing that code!
    72		uint8  tophash[BUCKETSIZE]; // top 8 bits of hash of each entry (or special mark below)
    73		Bucket *overflow;           // overflow bucket, if any
    74		uint64 data[1];             // BUCKETSIZE keys followed by BUCKETSIZE values
    75	};
    76	// NOTE: packing all the keys together and then all the values together makes the
    77	// code a bit more complicated than alternating key/value/key/value/... but it allows
    78	// us to eliminate padding which would be needed for, e.g., map[int64]int8.
    79	
    80	// tophash values.  We reserve a few possibilities for special marks.
    81	// Each bucket (including its overflow buckets, if any) will have either all or none of its
    82	// entries in the Evacuated* states (except during the evacuate() method, which only happens
    83	// during map writes and thus no one else can observe the map during that time).
    84	enum
    85	{
    86		Empty = 0,		// cell is empty
    87		EvacuatedEmpty = 1,	// cell is empty, bucket is evacuated.
    88		EvacuatedX = 2,		// key/value is valid.  Entry has been evacuated to first half of larger table.
    89		EvacuatedY = 3,		// same as above, but evacuated to second half of larger table.
    90		MinTopHash = 4, 	// minimum tophash for a normal filled cell.
    91	};
    92	#define evacuated(b) ((b)->tophash[0] > Empty && (b)->tophash[0] < MinTopHash)
    93	
    94	struct Hmap
    95	{
    96		// Note: the format of the Hmap is encoded in ../../cmd/gc/reflect.c and
    97		// ../reflect/type.go.  Don't change this structure without also changing that code!
    98		uintgo  count;        // # live cells == size of map.  Must be first (used by len() builtin)
    99		uint32  flags;
   100		uint32  hash0;        // hash seed
   101		uint8   B;            // log_2 of # of buckets (can hold up to LOAD * 2^B items)
   102		uint8   keysize;      // key size in bytes
   103		uint8   valuesize;    // value size in bytes
   104		uint16  bucketsize;   // bucket size in bytes
   105	
   106		byte    *buckets;     // array of 2^B Buckets. may be nil if count==0.
   107		byte    *oldbuckets;  // previous bucket array of half the size, non-nil only when growing
   108		uintptr nevacuate;    // progress counter for evacuation (buckets less than this have been evacuated)
   109	};
   110	
   111	// possible flags
   112	enum
   113	{
   114		IndirectKey = 1,    // storing pointers to keys
   115		IndirectValue = 2,  // storing pointers to values
   116		Iterator = 4,       // there may be an iterator using buckets
   117		OldIterator = 8,    // there may be an iterator using oldbuckets
   118	};
   119	
   120	// Macros for dereferencing indirect keys
   121	#define IK(h, p) (((h)->flags & IndirectKey) != 0 ? *(byte**)(p) : (p))
   122	#define IV(h, p) (((h)->flags & IndirectValue) != 0 ? *(byte**)(p) : (p))
   123	
   124	// If you modify Hiter, also change cmd/gc/reflect.c to indicate
   125	// the layout of this structure.
   126	struct Hiter
   127	{
   128		uint8* key; // Must be in first position.  Write nil to indicate iteration end (see cmd/gc/range.c).
   129		uint8* value; // Must be in second position (see cmd/gc/range.c).
   130	
   131		MapType *t;
   132		Hmap *h;
   133		byte *buckets; // bucket ptr at hash_iter initialization time
   134		struct Bucket *bptr; // current bucket
   135	
   136		uint8 offset; // intra-bucket offset to start from during iteration (should be big enough to hold BUCKETSIZE-1)
   137		bool done;
   138	
   139		// state of table at time iterator is initialized
   140		uint8 B;
   141	
   142		// iter state
   143		uintptr bucket;
   144		uintptr i;
   145		intptr check_bucket;
   146	};
   147	

View as plain text