More cleanup, refactoring, additions to unit test coverage.

diff --git a/src/perf/BaseReader.java b/src/perf/BaseReader.java
new file mode 100644
index 0000000..7831789
--- /dev/null
+++ b/src/perf/BaseReader.java

@@ -0,0 +1,117 @@
+//package org.codehaus.jackson.io;
+
+import java.io.*;
+
+import org.codehaus.jackson.io.IOContext;
+
+/**
+ * Simple basic class for optimized readers in this package; implements
+ * "cookie-cutter" methods that are used by all actual implementations.
+ */
+abstract class BaseReader
+    extends Reader
+{
+    /**
+     * JSON actually limits available Unicode range in the high end
+     * to the same as xml (to basically limit UTF-8 max byte sequence
+     * length to 4)
+     */
+    final protected static int LAST_VALID_UNICODE_CHAR = 0x10FFFF;
+
+    final protected static char NULL_CHAR = (char) 0;
+    final protected static char NULL_BYTE = (byte) 0;
+
+    final protected IOContext mContext;
+
+    protected InputStream mIn;
+
+    protected byte[] mBuffer;
+
+    protected int mPtr;
+    protected int mLength;
+
+    /*
+    ////////////////////////////////////////
+    // Life-cycle
+    ////////////////////////////////////////
+    */
+
+    protected BaseReader(IOContext context,
+                         InputStream in, byte[] buf, int ptr, int len)
+    {
+        mContext = context;
+        mIn = in;
+        mBuffer = buf;
+        mPtr = ptr;
+        mLength = len;
+    }
+
+    /*
+    ////////////////////////////////////////
+    // Reader API
+    ////////////////////////////////////////
+    */
+
+    public void close()
+        throws IOException
+    {
+        InputStream in = mIn;
+
+        if (in != null) {
+            mIn = null;
+            freeBuffers();
+            in.close();
+        }
+    }
+
+    char[] mTmpBuf = null;
+
+    /**
+     * Although this method is implemented by the base class, AND it should
+     * never be called by main code, let's still implement it bit more
+     * efficiently just in case
+     */
+    public int read()
+        throws IOException
+    {
+        if (mTmpBuf == null) {
+            mTmpBuf = new char[1];
+        }
+        if (read(mTmpBuf, 0, 1) < 1) {
+            return -1;
+        }
+        return mTmpBuf[0];
+    }
+
+    /*
+    ////////////////////////////////////////
+    // Internal/package methods:
+    ////////////////////////////////////////
+    */
+
+    /**
+     * This method should be called along with (or instead of) normal
+     * close. After calling this method, no further reads should be tried.
+     * Method will try to recycle read buffers (if any).
+     */
+    public final void freeBuffers()
+    {
+        byte[] buf = mBuffer;
+        if (buf != null) {
+            mBuffer = null;
+            mContext.releaseReadIOBuffer(buf);
+        }
+    }
+
+    protected void reportBounds(char[] cbuf, int start, int len)
+        throws IOException
+    {
+        throw new ArrayIndexOutOfBoundsException("read(buf,"+start+","+len+"), cbuf["+cbuf.length+"]");
+    }
+
+    protected void reportStrangeStream()
+        throws IOException
+    {
+        throw new IOException("Strange I/O stream, returned 0 bytes on read");
+    }
+}

diff --git a/src/perf/TestJsonPerf.java b/src/perf/TestJsonPerf.java
index b28aeda..22e2e2f 100644
--- a/src/perf/TestJsonPerf.java
+++ b/src/perf/TestJsonPerf.java

@@ -2,7 +2,6 @@
 
 import org.codehaus.jackson.*;
 import org.codehaus.jackson.io.IOContext;
-import org.codehaus.jackson.io.UTF8Reader;
 import org.codehaus.jackson.map.JavaTypeMapper;
 import org.codehaus.jackson.map.JsonTypeMapper;
 import org.codehaus.jackson.util.BufferRecycler;
@@ -175,7 +174,7 @@
 
         char[] cbuf = new char[mData.length];
 
-        IOContext ctxt = new IOContext(new BufferRecycler(), this);
+        IOContext ctxt = new IOContext(new BufferRecycler(), this, false);
         int sum = 0;
 
         for (int i = 0; i < reps; ++i) {

diff --git a/src/perf/UTF8Reader.java b/src/perf/UTF8Reader.java
new file mode 100644
index 0000000..c69f937
--- /dev/null
+++ b/src/perf/UTF8Reader.java

@@ -0,0 +1,364 @@
+//package org.codehaus.jackson.io;
+
+import java.io.*;
+
+import org.codehaus.jackson.io.IOContext;
+
+/**
+ * Optimized Reader that reads UTF-8 encoded content from an input stream.
+ * In addition to doing (hopefully) optimal conversion, it can also take
+ * array of "pre-read" (leftover) bytes; this is necessary when preliminary
+ * stream/reader is trying to figure out character encoding.
+ */
+public final class UTF8Reader
+    extends BaseReader
+{
+    char mSurrogate = NULL_CHAR;
+
+    /**
+     * Total read character count; used for error reporting purposes
+     */
+    int mCharCount = 0;
+
+    /**
+     * Total read byte count; used for error reporting purposes
+     */
+    int mByteCount = 0;
+
+    /*
+    ////////////////////////////////////////
+    // Life-cycle
+    ////////////////////////////////////////
+    */
+
+    public UTF8Reader(IOContext ctxt,
+                      InputStream in, byte[] buf, int ptr, int len)
+    {
+        super(ctxt, in, buf, ptr, len);
+    }
+
+    /*
+    ////////////////////////////////////////
+    // Public API
+    ////////////////////////////////////////
+    */
+
+    public int read(char[] cbuf, int start, int len)
+        throws IOException
+    {
+        // Already EOF?
+        if (mBuffer == null) {
+            return -1;
+        }
+        if (len < 1) {
+            return len;
+        }
+        // Let's then ensure there's enough room...
+        if (start < 0 || (start+len) > cbuf.length) {
+            reportBounds(cbuf, start, len);
+        }
+
+        len += start;
+        int outPtr = start;
+
+        // Ok, first; do we have a surrogate from last round?
+        if (mSurrogate != NULL_CHAR) {
+            cbuf[outPtr++] = mSurrogate;
+            mSurrogate = NULL_CHAR;
+            // No need to load more, already got one char
+        } else {
+            /* To prevent unnecessary blocking (esp. with network streams),
+             * we'll only require decoding of a single char
+             */
+            int left = (mLength - mPtr);
+
+            /* So; only need to load more if we can't provide at least
+             * one more character. We need not do thorough check here,
+             * but let's check the common cases here: either completely
+             * empty buffer (left == 0), or one with less than max. byte
+             * count for a single char, and starting of a multi-byte
+             * encoding (this leaves possibility of a 2/3-byte char
+             * that is still fully accessible... but that can be checked
+             * by the load method)
+             */
+            if (left < 4) {
+                // Need to load more?
+                if (left < 1 || mBuffer[mPtr] < 0) {
+                    if (!loadMore(left)) { // (legal) EOF?
+                        return -1;
+                    }
+                }
+            }
+        }
+
+        /* This may look silly, but using a local var is indeed faster
+         * (if and when HotSpot properly gets things running) than
+         * member variable...
+         */
+        byte[] buf = mBuffer;
+        int inPtr = mPtr;
+        int inBufLen = mLength;
+
+        main_loop:
+        while (outPtr < len) {
+            // At this point we have at least one byte available
+            int c = (int) buf[inPtr++];
+
+            /* Let's first do the quickie loop for common case; 7-bit
+             * ascii:
+             */
+            if (c >= 0) { // ascii? can probably loop, then
+                cbuf[outPtr++] = (char) c; // ok since MSB is never on
+
+                /* Ok, how many such chars could we safely process
+                 * without overruns? (will combine 2 in-loop comparisons
+                 * into just one)
+                 */
+                int outMax = (len - outPtr); // max output
+                int inMax = (inBufLen - inPtr); // max input
+                int inEnd = inPtr + ((inMax < outMax) ? inMax : outMax);
+
+                ascii_loop:
+                while (true) {
+                    if (inPtr >= inEnd) {
+                        break main_loop;
+                    }
+                    c = (int) buf[inPtr++];
+                    if (c < 0) { // multi-byte
+                        break ascii_loop;
+                    }
+                    cbuf[outPtr++] = (char) c;
+                }
+            }
+
+            int needed;
+
+            // Ok; if we end here, we got multi-byte combination
+            if ((c & 0xE0) == 0xC0) { // 2 bytes (0x0080 - 0x07FF)
+                c = (c & 0x1F);
+                needed = 1;
+            } else if ((c & 0xF0) == 0xE0) { // 3 bytes (0x0800 - 0xFFFF)
+                c = (c & 0x0F);
+                needed = 2;
+            } else if ((c & 0xF8) == 0xF0) {
+                // 4 bytes; double-char BS, with surrogates and all...
+                c = (c & 0x0F);
+                needed = 3;
+            } else {
+                reportInvalidInitial(c & 0xFF, outPtr-start);
+                // never gets here...
+                needed = 1;
+            }
+            /* Do we have enough bytes? If not, let's just push back the
+             * byte and leave, since we have already gotten at least one
+             * char decoded. This way we will only block (with read from
+             * input stream) when absolutely necessary.
+             */
+            if ((inBufLen - inPtr) < needed) {
+                --inPtr;
+                break main_loop;
+            }
+
+            int d = (int) buf[inPtr++];
+            if ((d & 0xC0) != 0x080) {
+                reportInvalidOther(d & 0xFF, outPtr-start);
+            }
+            c = (c << 6) | (d & 0x3F);
+
+            if (needed > 1) { // needed == 1 means 2 bytes total
+                d = buf[inPtr++]; // 3rd byte
+                if ((d & 0xC0) != 0x080) {
+                    reportInvalidOther(d & 0xFF, outPtr-start);
+                }
+                c = (c << 6) | (d & 0x3F);
+                if (needed > 2) { // 4 bytes? (need surrogates)
+                    d = buf[inPtr++];
+                    if ((d & 0xC0) != 0x080) {
+                        reportInvalidOther(d & 0xFF, outPtr-start);
+                    }
+                    c = (c << 6) | (d & 0x3F);
+                    if (c > LAST_VALID_UNICODE_CHAR) {
+                        reportInvalid(c, outPtr-start,
+                                      "(above "+Integer.toHexString(LAST_VALID_UNICODE_CHAR));
+                    }
+                    /* Ugh. Need to mess with surrogates. Ok; let's inline them
+                     * there, then, if there's room: if only room for one,
+                     * need to save the surrogate for the rainy day...
+                     */
+                    c -= 0x10000; // to normalize it starting with 0x0
+                    cbuf[outPtr++] = (char) (0xD800 + (c >> 10));
+                    // hmmh. can this ever be 0? (not legal, at least?)
+                    c = (0xDC00 | (c & 0x03FF));
+
+                    // Room for second part?
+                    if (outPtr >= len) { // nope
+                        mSurrogate = (char) c;
+                        break main_loop;
+                    }
+                    // sure, let's fall back to normal processing:
+                }
+
+                /* 08-Jun-2007, TSa: Not sure if it's really legal
+                 *   to get surrogate chars here: JSON specs do not
+                 *   prevent them, which is different from xml. So
+                 *   for now let's not worry about them. If checks
+                 *   are needed, can uncomment following:
+                 */
+
+                /*
+                else {
+                    // Otherwise, need to check that 3-byte chars are
+                    // legal ones (should not expand to surrogates)
+                    if (c >= 0xD800) {
+                        // But first, let's check max chars:
+                        if (c < 0xE000) {
+                            reportInvalid(c, outPtr-start, "(a surrogate character) ");
+                        }
+                    }
+                }
+                */
+            }
+            cbuf[outPtr++] = (char) c;
+            if (inPtr >= inBufLen) {
+                break main_loop;
+            }
+        }
+
+        mPtr = inPtr;
+        len = outPtr - start;
+        mCharCount += len;
+        return len;
+    }
+
+    /*
+    ////////////////////////////////////////
+    // Internal methods
+    ////////////////////////////////////////
+    */
+
+    private void reportInvalidInitial(int mask, int offset)
+        throws IOException
+    {
+        // input (byte) ptr has been advanced by one, by now:
+        int bytePos = mByteCount + mPtr - 1;
+        int charPos = mCharCount + offset + 1;
+
+        throw new CharConversionException("Invalid UTF-8 start byte 0x"
+                                          +Integer.toHexString(mask)
+                                          +" (at char #"+charPos+", byte #"+bytePos+")");
+    }
+
+    private void reportInvalidOther(int mask, int offset)
+        throws IOException
+    {
+        int bytePos = mByteCount + mPtr - 1;
+        int charPos = mCharCount + offset;
+
+        throw new CharConversionException("Invalid UTF-8 middle byte 0x"
+                                          +Integer.toHexString(mask)
+                                          +" (at char #"+charPos+", byte #"+bytePos+")");
+    }
+
+    private void reportUnexpectedEOF(int gotBytes, int needed)
+        throws IOException
+    {
+        int bytePos = mByteCount + gotBytes;
+        int charPos = mCharCount;
+
+        throw new CharConversionException("Unexpected EOF in the middle of a multi-byte char: got "
+                                          +gotBytes+", needed "+needed
+                                          +", at char #"+charPos+", byte #"+bytePos+")");
+    }
+
+    private void reportInvalid(int value, int offset, String msg)
+        throws IOException
+    { 
+        int bytePos = mByteCount + mPtr - 1;
+        int charPos = mCharCount + offset;
+
+        throw new CharConversionException("Invalid UTF-8 character 0x"
+                                          +Integer.toHexString(value)+msg
+                                          +" at char #"+charPos+", byte #"+bytePos+")");
+    }
+
+    /**
+     * @param available Number of "unused" bytes in the input buffer
+     *
+     * @return True, if enough bytes were read to allow decoding of at least
+     *   one full character; false if EOF was encountered instead.
+     */
+    private boolean loadMore(int available)
+        throws IOException
+    {
+        mByteCount += (mLength - available);
+
+        // Bytes that need to be moved to the beginning of buffer?
+        if (available > 0) {
+            if (mPtr > 0) {
+                for (int i = 0; i < available; ++i) {
+                    mBuffer[i] = mBuffer[mPtr+i];
+                }
+                mPtr = 0;
+            }
+            mLength = available;
+        } else {
+            /* Ok; here we can actually reasonably expect an EOF,
+             * so let's do a separate read right away:
+             */
+            mPtr = 0;
+            int count = mIn.read(mBuffer);
+            if (count < 1) {
+                mLength = 0;
+                if (count < 0) { // -1
+                    freeBuffers(); // to help GC?
+                    return false;
+                }
+                // 0 count is no good; let's err out
+                reportStrangeStream();
+            }
+            mLength = count;
+        }
+
+        /* We now have at least one byte... and that allows us to
+         * calculate exactly how many bytes we need!
+         */
+        int c = (int) mBuffer[0];
+        if (c >= 0) { // single byte (ascii) char... cool, can return
+            return true;
+        }
+
+        // Ok, a multi-byte char, let's check how many bytes we'll need:
+        int needed;
+        if ((c & 0xE0) == 0xC0) { // 2 bytes (0x0080 - 0x07FF)
+            needed = 2;
+        } else if ((c & 0xF0) == 0xE0) { // 3 bytes (0x0800 - 0xFFFF)
+            needed = 3;
+        } else if ((c & 0xF8) == 0xF0) {
+            // 4 bytes; double-char BS, with surrogates and all...
+            needed = 4;
+        } else {
+            reportInvalidInitial(c & 0xFF, 0);
+            // never gets here... but compiler whines without this:
+            needed = 1;
+        }
+
+        /* And then we'll just need to load up to that many bytes;
+         * if an EOF is hit, that'll be an error. But we need not do
+         * actual decoding here, just load enough bytes.
+         */
+        while (mLength < needed) {
+            int count = mIn.read(mBuffer, mLength, mBuffer.length - mLength);
+            if (count < 1) {
+                if (count < 0) { // -1, EOF... no good!
+                    freeBuffers();
+                    reportUnexpectedEOF(mLength, needed);
+                }
+                // 0 count is no good; let's err out
+                reportStrangeStream();
+            }
+            mLength += count;
+        }
+        return true;
+    }
+}
+