Fix [Issue-21] for 1.9.9

commit: 37ec337e09479bcd525efaebc3a66c651cb16264 [log] [tgz]
author: cowtowncoder <cowtowncoder@cc757fca-8a48-0410-80b4-e22f7f27f4c6> Sat Jul 14 04:03:50 2012 +0000
committer: cowtowncoder <cowtowncoder@cc757fca-8a48-0410-80b4-e22f7f27f4c6> Sat Jul 14 04:03:50 2012 +0000
tree: 31379426d63aae2199ec521f803e219b9641ba85
parent: c45febda21b6ec411b9926fb5b867a03b7fc4643 [diff]
diff --git a/release-notes/VERSION b/release-notes/VERSION
index be10e23..77bbdb2 100644
--- a/release-notes/VERSION
+++ b/release-notes/VERSION

@@ -8,6 +8,10 @@
 
 Fixes:
 
+  * [Issue-21]: Improve handling of String hash code collisions for
+   symbol tables; exception for degenerate cases (attacks), improvements
+   for calculation otherwise
+
 ------------------------------------------------------------------------
 === History: ===
 ------------------------------------------------------------------------

diff --git a/src/java/org/codehaus/jackson/sym/BytesToNameCanonicalizer.java b/src/java/org/codehaus/jackson/sym/BytesToNameCanonicalizer.java
index 9eb505c..fc50806 100644
--- a/src/java/org/codehaus/jackson/sym/BytesToNameCanonicalizer.java
+++ b/src/java/org/codehaus/jackson/sym/BytesToNameCanonicalizer.java

@@ -18,9 +18,7 @@
     /**
      * Let's not expand symbol tables past some maximum size;
      * this should protected against OOMEs caused by large documents
-     * with uniquer (~= random) names.
-     * 
-     * @since 1.5
+     * with unique (~= random) names.
      */
     protected static final int MAX_TABLE_SIZE = 0x10000; // 64k entries == 256k mem
     
@@ -31,6 +29,29 @@
      */
     final static int MAX_ENTRIES_FOR_REUSE = 6000;
 
+    /**
+     * Also: to thwart attacks based on hash collisions (which may or may not
+     * be cheap to calculate), we will need to detect "too long"
+     * collision chains. Let's start with static value of 255 entries
+     * for the longest legal chain.
+     *<p>
+     * Note: longest chain we have been able to produce without malicious
+     * intent has been 60 (with "com.fasterxml.jackson.core.main.TestWithTonsaSymbols");
+     * our setting should be reasonable here.
+     * 
+     * @since 1.9.9
+     */
+    final static int MAX_COLL_CHAIN_LENGTH = 255;
+
+    /**
+     * And to support reduce likelihood of accidental collisions causing
+     * exceptions, let's prevent reuse of tables with long collision
+     * overflow lists as well.
+     * 
+     * @since 1.9.9
+     */
+    final static int MAX_COLL_CHAIN_FOR_REUSE  = 63;
+
     final static int MIN_HASH_SIZE = 16;
 
     final static int INITIAL_COLLISION_LEN = 32;
@@ -47,8 +68,19 @@
     /**********************************************************
      */
 
-    final BytesToNameCanonicalizer _parent;
+    final protected BytesToNameCanonicalizer _parent;
 
+    /**
+     * Seed value we use as the base to make hash codes non-static between
+     * different runs, but still stable for lifetime of a single symbol table
+     * instance.
+     * This is done for security reasons, to avoid potential DoS attack via
+     * hash collisions.
+     * 
+     * @since 1.9.9
+     */
+    final private int _hashSeed;
+    
     /*
     /**********************************************************
     /* Main table state
@@ -59,7 +91,7 @@
      * Whether canonial symbol Strings are to be intern()ed before added
      * to the table or not
      */
-    final boolean _intern;
+    private final boolean _intern;
     
     // // // First, global information
 
@@ -68,6 +100,15 @@
      */
     private int _count;
 
+    /**
+     * We need to keep track of the longest collision list; this is needed
+     * both to indicate problems with attacks and to allow flushing for
+     * other cases.
+     * 
+     * @since 1.9.9
+     */
+    protected int _longestCollisionList;
+    
     // // // Then information regarding primary hash array and its
     // // // matching Name array
 
@@ -162,19 +203,37 @@
     /**********************************************************
      */
 
+    /**
+     * Factory method to call to create a symbol table instance with a
+     * randomized seed value.
+     */
     public static BytesToNameCanonicalizer createRoot()
     {
-        return new BytesToNameCanonicalizer(DEFAULT_TABLE_SIZE, true);
+        /* [Issue-21]: Need to use a variable seed, to thwart hash-collision
+         * based attacks.
+         */
+        long now = System.currentTimeMillis();
+        // ensure it's not 0; and might as well require to be odd so:
+        int seed = (((int) now) + ((int) now >>> 32)) | 1;
+        return createRoot(seed);
     }
 
     /**
+     * Factory method that should only be called from unit tests, where seed
+     * value should remain the same.
+     */
+    protected static BytesToNameCanonicalizer createRoot(int hashSeed) {
+        return new BytesToNameCanonicalizer(DEFAULT_TABLE_SIZE, true, hashSeed);
+    }
+    
+    /**
      * @param intern Whether canonical symbol Strings should be interned
      *   or not
      */
     public synchronized BytesToNameCanonicalizer makeChild(boolean canonicalize,
         boolean intern)
     {
-        return new BytesToNameCanonicalizer(this, intern);
+        return new BytesToNameCanonicalizer(this, intern, _hashSeed);
     }
 
     /**
@@ -196,13 +255,12 @@
         }
     }
 
-    private BytesToNameCanonicalizer(int hashSize, boolean intern)
+    private BytesToNameCanonicalizer(int hashSize, boolean intern, int seed)
     {
         _parent = null;
+        _hashSeed = seed;
         _intern = intern;
-        /* Sanity check: let's now allow hash sizes below certain
-         * min. value
-         */
+        // Sanity check: let's now allow hash sizes below certain minimum value
         if (hashSize < MIN_HASH_SIZE) {
             hashSize = MIN_HASH_SIZE;
         } else {
@@ -223,9 +281,11 @@
     /**
      * Constructor used when creating a child instance
      */
-    private BytesToNameCanonicalizer(BytesToNameCanonicalizer parent, boolean intern)
+    private BytesToNameCanonicalizer(BytesToNameCanonicalizer parent, boolean intern,
+            int seed)
     {
         _parent = parent;
+        _hashSeed = seed;
         _intern = intern;
 
         // First, let's copy the state as is:
@@ -236,6 +296,7 @@
         _collList = parent._collList;
         _collCount = parent._collCount;
         _collEnd = parent._collEnd;
+        _longestCollisionList = parent._longestCollisionList;
         _needRehash = false;
         // And consider all shared, so far:
         _mainHashShared = true;
@@ -246,6 +307,7 @@
     private void initTables(int hashSize)
     {
         _count = 0;
+        _longestCollisionList = 0;
         _mainHash = new int[hashSize];
         _mainNames = new Name[hashSize];
         _mainHashShared = false;
@@ -273,7 +335,8 @@
          * One way to do this is to just purge tables if they grow
          * too large, and that's what we'll do here.
          */
-        if (child.size() > MAX_ENTRIES_FOR_REUSE) {
+        if (child.size() > MAX_ENTRIES_FOR_REUSE
+                || child._longestCollisionList > MAX_COLL_CHAIN_FOR_REUSE) {
             /* Should there be a way to get notified about this
              * event, to log it or such? (as it's somewhat abnormal
              * thing to happen)
@@ -282,6 +345,7 @@
             initTables(DEFAULT_TABLE_SIZE);
         } else {
             _count = child._count;
+            _longestCollisionList = child._longestCollisionList;
             _mainHash = child._mainHash;
             _mainNames = child._mainNames;
             _mainHashShared = true; // shouldn't matter for parent
@@ -309,15 +373,52 @@
     public int size() { return _count; }
 
     /**
+     * @since 1.9.9
+     */
+    public int bucketCount() { return _mainHash.length; }
+    
+    /**
      * Method called to check to quickly see if a child symbol table
      * may have gotten additional entries. Used for checking to see
      * if a child table should be merged into shared table.
      */
-    public boolean maybeDirty()
-    {
+    public boolean maybeDirty() {
         return !_mainHashShared;
     }
 
+    /**
+     * @since 1.9.9
+     */
+    public int hashSeed() { return _hashSeed; }
+    
+    /**
+     * Method mostly needed by unit tests; calculates number of
+     * entries that are in collision list. Value can be at most
+     * ({@link #size} - 1), but should usually be much lower, ideally 0.
+     * 
+     * @since 1.9.9
+     */
+    public int collisionCount() {
+        return _collCount;
+    }
+
+    /**
+     * Method mostly needed by unit tests; calculates length of the
+     * longest collision chain. This should typically be a low number,
+     * but may be up to {@link #size} - 1 in the pathological case
+     * 
+     * @since 1.9.9
+     */
+    public int maxCollisionLength() {
+        return _longestCollisionList;
+    }
+
+    /*
+    /**********************************************************
+    /* Public API, accessing symbols:
+    /**********************************************************
+     */
+    
     public static Name getEmptyName()
     {
         return Name1.getEmptyName();
@@ -441,11 +542,9 @@
      */
     public Name findName(int[] quads, int qlen)
     {
-        /* // Not needed, never gets called
         if (qlen < 3) { // another sanity check
             return findName(quads[0], (qlen < 2) ? 0 : quads[1]);
         }
-        */
         int hash = calcHash(quads, qlen);
         // (for rest of comments regarding logic, see method above)
         int ix = (hash & _mainHashMask);
@@ -476,9 +575,6 @@
     /**********************************************************
      */
 
-    /**
-     * @since 1.6.0
-     */
     public Name addName(String symbolStr, int q1, int q2)
     {
         if (_intern) {
@@ -495,7 +591,12 @@
         if (_intern) {
             symbolStr = InternCache.instance.intern(symbolStr);
         }
-        int hash = calcHash(quads, qlen);
+        int hash;
+        if (qlen < 3) {
+            hash = (qlen == 1) ? calcHash(quads[0]) : calcHash(quads[0], quads[1]);
+        } else {
+            hash = calcHash(quads, qlen);
+        }
         Name symbol = constructName(hash, symbolStr, quads, qlen);
         _addSymbol(hash, symbol);
         return symbol;
@@ -507,45 +608,72 @@
     /**********************************************************
      */
 
-    public final static int calcHash(int firstQuad)
+    /* Note on hash calculation: we try to make it more difficult to
+     * generate collisions automatically; part of this is to avoid
+     * simple "multiply-add" algorithm (like JDK String.hashCode()),
+     * and add bit of shifting. And other part is to make this
+     * non-linear, at least for shorter symbols.
+     */
+    
+    // JDK uses 31; other fine choices are 33 and 65599, let's use 33
+    // as it seems to give fewest collisions for us
+    // (see [http://www.cse.yorku.ca/~oz/hash.html] for details)
+    private final static int MULT = 33;
+    
+    public final int calcHash(int firstQuad)
     {
+        int hash = firstQuad ^ _hashSeed;
+        hash += (hash >>> 15); // to xor hi- and low- 16-bits
+        hash ^= (hash >>> 9); // as well as lowest 2 bytes
+        return hash;
+    }
+
+    public final int calcHash(int firstQuad, int secondQuad)
+    {
+        /* For two quads, let's change algorithm a bit, to spice
+         * things up (can do bit more processing anyway)
+         */
+        
         int hash = firstQuad;
-        hash ^= (hash >>> 16); // to xor hi- and low- 16-bits
-        hash ^= (hash >>> 8); // as well as lowest 2 bytes
+        hash += (hash >>> 15); // try mixing first and second byte pairs first
+        hash ^= ((secondQuad + _hashSeed) * MULT); // then add second quad
+        hash += (hash >>> 9); // and shuffle some more
         return hash;
     }
 
-    public final static int calcHash(int firstQuad, int secondQuad)
+    public final int calcHash(int[] quads, int qlen)
     {
-        int hash = (firstQuad * 31) + secondQuad;
-
-        // If this was called for single-quad instance:
-        //int hash = (secondQuad == 0) ? firstQuad : ((firstQuad * 31) + secondQuad);
-
-        hash ^= (hash >>> 16); // to xor hi- and low- 16-bits
-        hash ^= (hash >>> 8); // as well as lowest 2 bytes
-        return hash;
-    }
-
-    public final static int calcHash(int[] quads, int qlen)
-    {
-        // Note: may be called for qlen < 3
-        int hash = quads[0];
-        for (int i = 1; i < qlen; ++i) {
-            hash = (hash * 31) + quads[i];
+        // Note: may be called for qlen < 3; but has at least one int
+        if (qlen < 3) {
+            throw new IllegalArgumentException();
         }
 
-        hash ^= (hash >>> 16); // to xor hi- and low- 16-bits
-        hash ^= (hash >>> 8); // as well as lowest 2 bytes
+        /* And then change handling again for "multi-quad" case; mostly
+         * to make calculation of collisions less fun. For example,
+         * add seed bit later in the game, and switch plus/xor around,
+         * use different shift lengths.
+         */
+        int hash = quads[0];
+        hash ^= (hash >>> 9);
+        hash += ((quads[1] + _hashSeed) * MULT);
+        hash ^= (hash >>> 15);
+        hash ^= (quads[2] * MULT);
+        hash += (hash >>> 17);
+        
+        for (int i = 3; i < qlen; ++i) {
+            hash = (hash * MULT) + (quads[i] + 1);
+            // for longer entries, mess a bit in-between too
+            hash ^= (hash >>> 3);
+        }
 
+        // and finally shuffle some more once done
+        hash += (hash >>> 15); // to get high-order bits to mix more
+        hash ^= (hash >>> 9); // as well as lowest 2 bytes
         return hash;
     }
 
-    /* 26-Nov-2008, tatu: not used currently; if not used in near future,
-     *   let's just delete it.
-     */
-    /*
-    public static int[] calcQuads(byte[] wordBytes)
+    // Method only used by unit tests
+    protected static int[] calcQuads(byte[] wordBytes)
     {
         int blen = wordBytes.length;
         int[] result = new int[(blen + 3) / 4];
@@ -565,7 +693,6 @@
         }
         return result;
     }
-    */
 
     /*
     /**********************************************************
@@ -647,7 +774,6 @@
             if (_collListShared) {
                 unshareCollision(); // also allocates if list was null
             }
-
             ++_collCount;
             int entryValue = _mainHash[ix];
             int bucket = entryValue & 0xFF;
@@ -669,7 +795,13 @@
             }
             
             // And then just need to link the new bucket entry in
-            _collList[bucket] = new Bucket(symbol, _collList[bucket]);
+            Bucket newB = new Bucket(symbol, _collList[bucket]);
+            _collList[bucket] = newB;
+            // but, be careful wrt attacks
+            _longestCollisionList = Math.max(newB.length(), _longestCollisionList);
+            if (_longestCollisionList > MAX_COLL_CHAIN_LENGTH) {
+                reportTooManyCollisions(MAX_COLL_CHAIN_LENGTH);
+            }
         }
 
         /* Ok. Now, do we need a rehash next time? Need to have at least
@@ -735,6 +867,7 @@
          */
         int oldEnd = _collEnd;
         if (oldEnd == 0) { // no prior collisions...
+            _longestCollisionList = 0;
             return;
         }
 
@@ -742,6 +875,8 @@
         _collEnd = 0;
         _collListShared = false;
 
+        int maxColl = 0;
+        
         Bucket[] oldBuckets = _collList;
         _collList = new Bucket[oldBuckets.length];
         for (int i = 0; i < oldEnd; ++i) {
@@ -774,11 +909,15 @@
                         --bucket; // 1-based index in value
                     }
                     // And then just need to link the new bucket entry in
-                    _collList[bucket] = new Bucket(symbol, _collList[bucket]);
+                    Bucket newB = new Bucket(symbol, _collList[bucket]);
+                    _collList[bucket] = newB;
+                    maxColl = Math.max(maxColl, newB.length());
                 }
             } // for (... buckets in the chain ...)
         } // for (... list of bucket heads ... )
 
+        _longestCollisionList = maxColl;
+        
         if (symbolsSeen != _count) { // sanity check
             throw new RuntimeException("Internal error: count after rehash "+symbolsSeen+"; should be "+_count);
         }
@@ -791,6 +930,7 @@
     private void nukeSymbols()
     {
         _count = 0;
+        _longestCollisionList = 0;
         Arrays.fill(_mainHash, 0);
         Arrays.fill(_mainNames, null);
         Arrays.fill(_collList, null);
@@ -906,6 +1046,21 @@
 
     /*
     /**********************************************************
+    /* Other helper methods
+    /**********************************************************
+     */
+    
+    /**
+     * @since 1.9.9
+     */
+    protected void reportTooManyCollisions(int maxLen)
+    {
+        throw new IllegalStateException("Longest collision chain in symbol table (of size "+_count
+                +") now exceeds maximum, "+maxLen+" -- suspect a DoS attack based on hash collisions");
+    }
+    
+    /*
+    /**********************************************************
     /* Helper classes
     /**********************************************************
      */
@@ -914,21 +1069,16 @@
     {
         protected final Name _name;
         protected final Bucket _next;
+        private final int _length;
 
         Bucket(Name name, Bucket next)
         {
             _name = name;
             _next = next;
+            _length = (next == null) ? 1 : next._length+1;
         }
 
-        public int length()
-        {
-            int len = 1;
-            for (Bucket curr = _next; curr != null; curr = curr._next) {
-                ++len;
-            }
-            return len;
-        }
+        public int length() { return _length; }
 
         public Name find(int hash, int firstQuad, int secondQuad)
         {

diff --git a/src/java/org/codehaus/jackson/sym/CharsToNameCanonicalizer.java b/src/java/org/codehaus/jackson/sym/CharsToNameCanonicalizer.java
index 76badc3..11c0650 100644
--- a/src/java/org/codehaus/jackson/sym/CharsToNameCanonicalizer.java
+++ b/src/java/org/codehaus/jackson/sym/CharsToNameCanonicalizer.java

@@ -346,9 +346,11 @@
      */
 
     public int size() { return _size; }
-
+   
     public boolean maybeDirty() { return _dirty; }
 
+    public int bucketCount() { return _symbols.length; }
+    
     /**
      * Method mostly needed by unit tests; calculates number of
      * entries that are in collision list. Value can be at most
@@ -392,8 +394,7 @@
             return new String(buffer, start, len);
         }
 
-        hash &= _indexMask;
-
+        hash = _hashToIndex(hash);
         String sym = _symbols[hash];
 
         // Optimal case; checking existing primary symbol for hash index:
@@ -429,7 +430,7 @@
             /* Need to recalc hash; rare occurence (index mask has been
              * recalculated as part of rehash)
              */
-            hash = calcHash(buffer, start, len) & _indexMask;
+            hash = _hashToIndex(calcHash(buffer, start, len));
         }
 
         String newSymbol = new String(buffer, start, len);
@@ -453,6 +454,12 @@
         return newSymbol;
     }
 
+    /*
+    /**********************************************************
+    /* Hash calculation
+    /**********************************************************
+     */
+    
     /**
      * Implementation of a hashing method for variable length
      * Strings. Most of the time intention is that this calculation
@@ -479,6 +486,16 @@
         return hash;
     }
 
+    /**
+     * Helper method that takes in a "raw" hash value, shuffles it as necessary,
+     * and truncates to be used as the index.
+     */
+    private final int _hashToIndex(int rawHash)
+    {
+        rawHash += (rawHash >>> 15); // this seems to help quite a bit, at least for our tests
+        return (rawHash & _indexMask);
+    }
+    
     /*
     /**********************************************************
     /* Internal methods
@@ -546,7 +563,7 @@
             String symbol = oldSyms[i];
             if (symbol != null) {
                 ++count;
-                int index = calcHash(symbol) & _indexMask;
+                int index = _hashToIndex(calcHash(symbol));
                 if (_symbols[index] == null) {
                     _symbols[index] = symbol;
                 } else {
@@ -564,7 +581,7 @@
             while (b != null) {
                 ++count;
                 String symbol = b.getSymbol();
-                int index = calcHash(symbol) & _indexMask;
+                int index = _hashToIndex(calcHash(symbol));
                 if (_symbols[index] == null) {
                     _symbols[index] = symbol;
                 } else {

diff --git a/src/mapper/java/org/codehaus/jackson/map/ser/BasicSerializerFactory.java b/src/mapper/java/org/codehaus/jackson/map/ser/BasicSerializerFactory.java
index 5f6f418..76de697 100644
--- a/src/mapper/java/org/codehaus/jackson/map/ser/BasicSerializerFactory.java
+++ b/src/mapper/java/org/codehaus/jackson/map/ser/BasicSerializerFactory.java

@@ -7,7 +7,6 @@
 import java.util.*;
 
 import org.codehaus.jackson.map.*;
-import org.codehaus.jackson.map.annotate.JacksonStdImpl;
 import org.codehaus.jackson.map.annotate.JsonSerialize;
 import org.codehaus.jackson.map.ext.OptionalHandlerFactory;
 import org.codehaus.jackson.map.introspect.*;

diff --git a/src/test/org/codehaus/jackson/map/ser/TestNullSerialization.java b/src/test/org/codehaus/jackson/map/ser/TestNullSerialization.java
index 37b0947..4a3ca98 100644
--- a/src/test/org/codehaus/jackson/map/ser/TestNullSerialization.java
+++ b/src/test/org/codehaus/jackson/map/ser/TestNullSerialization.java

@@ -51,7 +51,7 @@
         ObjectMapper mapper = new ObjectMapper();
         mapper.setAnnotationIntrospector(new JacksonAnnotationIntrospector());
         mapper.setSerializationInclusion(Inclusion.NON_NULL);
-        assertEquals("{\"value\":\"abc\"}", mapper.writeValueAsString(new NullBean("abc")));
-        assertEquals("{}", mapper.writeValueAsString(new NullBean(null)));
+        assertEquals("{\"value\":\"abc\"}", mapper.writeValueAsString(new NullBean<String>("abc")));
+        assertEquals("{}", mapper.writeValueAsString(new NullBean<String>(null)));
     }
 }

diff --git a/src/test/org/codehaus/jackson/sym/TestSymbolTables.java b/src/test/org/codehaus/jackson/sym/TestSymbolTables.java
index c6d6664..023ff71 100644
--- a/src/test/org/codehaus/jackson/sym/TestSymbolTables.java
+++ b/src/test/org/codehaus/jackson/sym/TestSymbolTables.java

@@ -1,5 +1,7 @@
 package org.codehaus.jackson.sym;
 
+import java.io.IOException;
+
 public class TestSymbolTables extends main.BaseTest
 {
     // 11 3-char snippets that hash to 0xFFFF (with default JDK hashCode() calc),
@@ -51,4 +53,80 @@
             assertEquals(CharsToNameCanonicalizer.MAX_COLL_CHAIN_LENGTH+2, sym.size());
         }
     }
+
+    // Test for verifying stability of hashCode, wrt collisions, using
+    // synthetic field name generation and character-based input
+    public void testSyntheticWithChars()
+    {
+        // pass seed, to keep results consistent:
+        CharsToNameCanonicalizer symbols = CharsToNameCanonicalizer.createRoot();
+        final int COUNT = 6000;
+        for (int i = 0; i < COUNT; ++i) {
+            String id = fieldNameFor(i);
+            char[] ch = id.toCharArray();
+            symbols.findSymbol(ch, 0, ch.length, CharsToNameCanonicalizer.calcHash(id));
+        }
+
+        assertEquals(8192, symbols.bucketCount());
+        assertEquals(COUNT, symbols.size());
+        
+//System.out.printf("Char stuff: collisions %d, max-coll %d\n", symbols.collisionCount(), symbols.maxCollisionLength());
+        
+        // original hashCode calc gave very high rate (3567), but with shuffling comes down a bit
+        // (changing mult to 33 would help more)
+        assertEquals(1905, symbols.collisionCount());
+        // esp. with collisions; first got about 30
+        assertEquals(6, symbols.maxCollisionLength());
+    }
+
+    // Test for verifying stability of hashCode, wrt collisions, using
+    // synthetic field name generation and byte-based input (UTF-8)
+    public void testSyntheticWithBytes() throws IOException
+    {
+        // pass seed, to keep results consistent:
+        BytesToNameCanonicalizer symbols = BytesToNameCanonicalizer.createRoot(1);
+        final int COUNT = 6000;
+        for (int i = 0; i < COUNT; ++i) {
+            String id = fieldNameFor(i);
+            int[] quads = BytesToNameCanonicalizer.calcQuads(id.getBytes("UTF-8"));
+            symbols.addName(id, quads, quads.length);
+        }
+        assertEquals(COUNT, symbols.size());
+        assertEquals(8192, symbols.bucketCount());
+
+//System.out.printf("Byte stuff: collisions %d, max-coll %d\n", symbols.collisionCount(), symbols.maxCollisionLength());
+    
+        // Fewer collisions than with chars, but still quite a few
+        assertEquals(1770, symbols.collisionCount());
+        // but not super long collision chains:
+        assertEquals(9, symbols.maxCollisionLength());
+    }
+
+    protected void fieldNameFor(StringBuilder sb, int index)
+    {
+        /* let's do something like "f1.1" to exercise different
+         * field names (important for byte-based codec)
+         * Other name shuffling done mostly just for fun... :)
+         */
+        sb.append("f");
+        sb.append(index);
+        if (index > 50) {
+            sb.append('.');
+            if (index > 200) {
+                sb.append(index);
+                if (index > 4000) { // and some even longer symbols...
+                    sb.append(".").append(index);
+                }
+            } else {
+                sb.append(index >> 3); // divide by 8
+            }
+        }
+    }
+
+    protected String fieldNameFor(int index)
+    {
+        StringBuilder sb = new StringBuilder(16);
+        fieldNameFor(sb, index);
+        return sb.toString();
+    }
 }
commit	37ec337e09479bcd525efaebc3a66c651cb16264	[log] [tgz]
author	cowtowncoder <cowtowncoder@cc757fca-8a48-0410-80b4-e22f7f27f4c6>	Sat Jul 14 04:03:50 2012 +0000
committer	cowtowncoder <cowtowncoder@cc757fca-8a48-0410-80b4-e22f7f27f4c6>	Sat Jul 14 04:03:50 2012 +0000
tree	31379426d63aae2199ec521f803e219b9641ba85
parent	c45febda21b6ec411b9926fb5b867a03b7fc4643 [diff]