src/java/org/codehaus/jackson/impl/ByteSourceBootstrapper.java - jackson - Git at Google

 package org.codehaus.jackson.impl;

 import java.io.*;

 import org.codehaus.jackson.*;
 import org.codehaus.jackson.format.InputAccessor;
 import org.codehaus.jackson.format.MatchStrength;
 import org.codehaus.jackson.io.*;
 import org.codehaus.jackson.sym.BytesToNameCanonicalizer;
 import org.codehaus.jackson.sym.CharsToNameCanonicalizer;

 /**
  * This class is used to determine the encoding of byte stream
  * that is to contain JSON content. Rules are fairly simple, and
  * defined in JSON specification (RFC-4627 or newer), except
  * for BOM handling, which is a property of underlying
  * streams.
  */
 public final class ByteSourceBootstrapper
 {
     final static byte UTF8_BOM_1 = (byte) 0xEF;
     final static byte UTF8_BOM_2 = (byte) 0xBB;
     final static byte UTF8_BOM_3 = (byte) 0xBF;

     /*
     /**********************************************************
     /* Configuration
     /**********************************************************
      */

     protected final IOContext _context;

     protected final InputStream _in;

     /*
     /**********************************************************
     /* Input buffering
     /**********************************************************
      */

     protected final byte[] _inputBuffer;

     private int _inputPtr;

     private int _inputEnd;

     /**
      * Flag that indicates whether buffer above is to be recycled
      * after being used or not.
      */
     private final boolean _bufferRecyclable;

     /*
     /**********************************************************
     /* Input location
     /**********************************************************
      */

     /**
      * Current number of input units (bytes or chars) that were processed in
      * previous blocks,
      * before contents of current input buffer.
      *<p>
      * Note: includes possible BOMs, if those were part of the input.
      */
     protected int _inputProcessed;

     /*
     /**********************************************************
     /* Data gathered
     /**********************************************************
      */

     protected boolean _bigEndian = true;

     protected int _bytesPerChar = 0; // 0 means "dunno yet"

     /*
     /**********************************************************
     /* Life-cycle
     /**********************************************************
      */

     public ByteSourceBootstrapper(IOContext ctxt, InputStream in)
     {
         _context = ctxt;
         _in = in;
         _inputBuffer = ctxt.allocReadIOBuffer();
         _inputEnd = _inputPtr = 0;
         _inputProcessed = 0;
         _bufferRecyclable = true;
     }

     public ByteSourceBootstrapper(IOContext ctxt, byte[] inputBuffer, int inputStart, int inputLen)
     {
         _context = ctxt;
         _in = null;
         _inputBuffer = inputBuffer;
         _inputPtr = inputStart;
         _inputEnd = (inputStart + inputLen);
         // Need to offset this for correct location info
         _inputProcessed = -inputStart;
         _bufferRecyclable = false;
     }

     /*
     /**********************************************************
     /*  Encoding detection during bootstrapping
     /**********************************************************
      */

     /**
      * Method that should be called after constructing an instace.
      * It will figure out encoding that content uses, to allow
      * for instantiating a proper scanner object.
      */
     public JsonEncoding detectEncoding()
         throws IOException, JsonParseException
     {
         boolean foundEncoding = false;

         // First things first: BOM handling
         /* Note: we can require 4 bytes to be read, since no
          * combination of BOM + valid JSON content can have
          * shorter length (shortest valid JSON content is single
          * digit char, but BOMs are chosen such that combination
          * is always at least 4 chars long)
          */
         if (ensureLoaded(4)) {
             int quad =  (_inputBuffer[_inputPtr] << 24)
                 | ((_inputBuffer[_inputPtr+1] & 0xFF) << 16)
                 | ((_inputBuffer[_inputPtr+2] & 0xFF) << 8)
                 | (_inputBuffer[_inputPtr+3] & 0xFF);

             if (handleBOM(quad)) {
                 foundEncoding = true;
             } else {
                 /* If no BOM, need to auto-detect based on first char;
                  * this works since it must be 7-bit ascii (wrt. unicode
                  * compatible encodings, only ones JSON can be transferred
                  * over)
                  */
                 // UTF-32?
                 if (checkUTF32(quad)) {
                     foundEncoding = true;
                 } else if (checkUTF16(quad >>> 16)) {
                     foundEncoding = true;
                 }
             }
         } else if (ensureLoaded(2)) {
             int i16 = ((_inputBuffer[_inputPtr] & 0xFF) << 8)
                 | (_inputBuffer[_inputPtr+1] & 0xFF);
             if (checkUTF16(i16)) {
                 foundEncoding = true;
             }
         }

         JsonEncoding enc;

         /* Not found yet? As per specs, this means it must be UTF-8. */
         if (!foundEncoding) {
             enc = JsonEncoding.UTF8;
         } else {
             switch (_bytesPerChar) {
             case 1:
                 enc = JsonEncoding.UTF8;
                 break;
             case 2:
                 enc = _bigEndian ? JsonEncoding.UTF16_BE : JsonEncoding.UTF16_LE;
                 break;
             case 4:
                 enc = _bigEndian ? JsonEncoding.UTF32_BE : JsonEncoding.UTF32_LE;
                 break;
             default:
                 throw new RuntimeException("Internal error"); // should never get here
             }
         }
         _context.setEncoding(enc);
         return enc;
     }

     /*
     /**********************************************************
     /* Constructing a Reader
     /**********************************************************
      */

     public Reader constructReader()
         throws IOException
     {
         JsonEncoding enc = _context.getEncoding();
         switch (enc) {
         case UTF32_BE:
         case UTF32_LE:
             return new UTF32Reader(_context, _in, _inputBuffer, _inputPtr, _inputEnd,
                                    _context.getEncoding().isBigEndian());

         case UTF16_BE:
         case UTF16_LE:
         case UTF8: // only in non-common case where we don't want to do direct mapping
             {
                 // First: do we have a Stream? If not, need to create one:
                 InputStream in = _in;

                 if (in == null) {
                     in = new ByteArrayInputStream(_inputBuffer, _inputPtr, _inputEnd);
                 } else {
                     /* Also, if we have any read but unused input (usually true),
                      * need to merge that input in:
                      */
                     if (_inputPtr < _inputEnd) {
                         in = new MergedStream(_context, in, _inputBuffer, _inputPtr, _inputEnd);
                     }
                 }
                 return new InputStreamReader(in, enc.getJavaName());
             }
         }
         throw new RuntimeException("Internal error"); // should never get here
     }

     public JsonParser constructParser(int features, ObjectCodec codec, BytesToNameCanonicalizer rootByteSymbols, CharsToNameCanonicalizer rootCharSymbols)
         throws IOException, JsonParseException
     {
         JsonEncoding enc = detectEncoding();

         // As per [JACKSON-259], may want to fully disable canonicalization:
         boolean canonicalize = JsonParser.Feature.CANONICALIZE_FIELD_NAMES.enabledIn(features);
         boolean intern = JsonParser.Feature.INTERN_FIELD_NAMES.enabledIn(features);
         if (enc == JsonEncoding.UTF8) {
             /* and without canonicalization, byte-based approach is not performance; just use std UTF-8 reader
              * (which is ok for larger input; not so hot for smaller; but this is not a common case)
              */
             if (canonicalize) {
                 BytesToNameCanonicalizer can = rootByteSymbols.makeChild(canonicalize, intern);
                 return new Utf8StreamParser(_context, features, _in, codec, can, _inputBuffer, _inputPtr, _inputEnd, _bufferRecyclable);
             }
         }
         return new ReaderBasedParser(_context, features, constructReader(), codec, rootCharSymbols.makeChild(canonicalize, intern));
     }

     /*
     /**********************************************************
     /*  Encoding detection for data format auto-detection
     /**********************************************************
      */

     /**
      * Current implementation is not as thorough as other functionality
      * ({@link org.codehaus.jackson.impl.ByteSourceBootstrapper});
      * supports UTF-8, for example. But it should work, for now, and can
      * be improved as necessary.
      *
      * @since 1.8
      */
     public static MatchStrength hasJSONFormat(InputAccessor acc) throws IOException
     {
         // Ideally we should see "[" or "{"; but if not, we'll accept double-quote (String)
         // in future could also consider accepting non-standard matches?

         if (!acc.hasMoreBytes()) {
             return MatchStrength.INCONCLUSIVE;
         }
         byte b = acc.nextByte();
         // Very first thing, a UTF-8 BOM?
         if (b == UTF8_BOM_1) { // yes, looks like UTF-8 BOM
             if (!acc.hasMoreBytes()) {
                 return MatchStrength.INCONCLUSIVE;
             }
             if (acc.nextByte() != UTF8_BOM_2) {
                 return MatchStrength.NO_MATCH;
             }
             if (!acc.hasMoreBytes()) {
                 return MatchStrength.INCONCLUSIVE;
             }
             if (acc.nextByte() != UTF8_BOM_3) {
                 return MatchStrength.NO_MATCH;
             }
             if (!acc.hasMoreBytes()) {
                 return MatchStrength.INCONCLUSIVE;
             }
             b = acc.nextByte();
         }
         // Then possible leading space
         int ch = skipSpace(acc, b);
         if (ch < 0) {
             return MatchStrength.INCONCLUSIVE;
         }
         // First, let's see if it looks like a structured type:
         if (ch == '{') { // JSON object?
             // Ideally we need to find either double-quote or closing bracket
             ch = skipSpace(acc);
             if (ch < 0) {
                 return MatchStrength.INCONCLUSIVE;
             }
             if (ch == '"' || ch == '}') {
                 return MatchStrength.SOLID_MATCH;
             }
             // ... should we allow non-standard? Let's not yet... can add if need be
             return MatchStrength.NO_MATCH;
         }
         MatchStrength strength;

         if (ch == '[') {
             ch = skipSpace(acc);
             if (ch < 0) {
                 return MatchStrength.INCONCLUSIVE;
             }
             // closing brackets is easy; but for now, let's also accept opening...
             if (ch == ']' || ch == '[') {
                 return MatchStrength.SOLID_MATCH;
             }
             return MatchStrength.SOLID_MATCH;
         } else {
             // plain old value is not very convincing...
             strength = MatchStrength.WEAK_MATCH;
         }

         if (ch == '"') { // string value
             return strength;
         }
         if (ch <= '9' && ch >= '0') { // number
             return strength;
         }
         if (ch == '-') { // negative number
             ch = skipSpace(acc);
             if (ch < 0) {
                 return MatchStrength.INCONCLUSIVE;
             }
             return (ch <= '9' && ch >= '0') ? strength : MatchStrength.NO_MATCH;
         }
         // or one of literals
         if (ch == 'n') { // null
             return tryMatch(acc, "ull", strength);
         }
         if (ch == 't') { // true
             return tryMatch(acc, "rue", strength);
         }
         if (ch == 'f') { // false
             return tryMatch(acc, "alse", strength);
         }
         return MatchStrength.NO_MATCH;
     }

     private final static MatchStrength tryMatch(InputAccessor acc, String matchStr, MatchStrength fullMatchStrength)
         throws IOException
     {
         for (int i = 0, len = matchStr.length(); i < len; ++i) {
             if (!acc.hasMoreBytes()) {
                 return MatchStrength.INCONCLUSIVE;
             }
             if (acc.nextByte() != matchStr.charAt(i)) {
                 return MatchStrength.NO_MATCH;
             }
         }
         return fullMatchStrength;
     }

     private final static int skipSpace(InputAccessor acc) throws IOException
     {
         if (!acc.hasMoreBytes()) {
             return -1;
         }
         return skipSpace(acc, acc.nextByte());
     }

     private final static int skipSpace(InputAccessor acc, byte b) throws IOException
     {
         while (true) {
             int ch = (int) b & 0xFF;
             if (!(ch == ' ' || ch == '\r' || ch == '\n' || ch == '\t')) {
                 return ch;
             }
             if (!acc.hasMoreBytes()) {
                 return -1;
             }
             b = acc.nextByte();
             ch = (int) b & 0xFF;
         }
     }

     /*
     /**********************************************************
     /* Internal methods, parsing
     /**********************************************************
      */

     /**
      * @return True if a BOM was succesfully found, and encoding
      *   thereby recognized.
      */
     private boolean handleBOM(int quad)
         throws IOException
     {
         /* Handling of (usually) optional BOM (required for
          * multi-byte formats); first 32-bit charsets:
          */
         switch (quad) {
         case 0x0000FEFF:
             _bigEndian = true;
             _inputPtr += 4;
             _bytesPerChar = 4;
             return true;
         case 0xFFFE0000: // UCS-4, LE?
             _inputPtr += 4;
             _bytesPerChar = 4;
             _bigEndian = false;
             return true;
         case 0x0000FFFE: // UCS-4, in-order...
             reportWeirdUCS4("2143"); // throws exception
         case 0xFEFF0000: // UCS-4, in-order...
             reportWeirdUCS4("3412"); // throws exception
         }
         // Ok, if not, how about 16-bit encoding BOMs?
         int msw = quad >>> 16;
         if (msw == 0xFEFF) { // UTF-16, BE
             _inputPtr += 2;
             _bytesPerChar = 2;
             _bigEndian = true;
             return true;
         }
         if (msw == 0xFFFE) { // UTF-16, LE
             _inputPtr += 2;
             _bytesPerChar = 2;
             _bigEndian = false;
             return true;
         }
         // And if not, then UTF-8 BOM?
         if ((quad >>> 8) == 0xEFBBBF) { // UTF-8
             _inputPtr += 3;
             _bytesPerChar = 1;
             _bigEndian = true; // doesn't really matter
             return true;
         }
         return false;
     }

     private boolean checkUTF32(int quad)
         throws IOException
     {
         /* Handling of (usually) optional BOM (required for
          * multi-byte formats); first 32-bit charsets:
          */
         if ((quad >> 8) == 0) { // 0x000000?? -> UTF32-BE
             _bigEndian = true;
         } else if ((quad & 0x00FFFFFF) == 0) { // 0x??000000 -> UTF32-LE
             _bigEndian = false;
         } else if ((quad & ~0x00FF0000) == 0) { // 0x00??0000 -> UTF32-in-order
             reportWeirdUCS4("3412");
         } else if ((quad & ~0x0000FF00) == 0) { // 0x0000??00 -> UTF32-in-order
             reportWeirdUCS4("2143");
         } else {
             // Can not be valid UTF-32 encoded JSON...
             return false;
         }
         // Not BOM (just regular content), nothing to skip past:
         //_inputPtr += 4;
         _bytesPerChar = 4;
         return true;
     }

     private boolean checkUTF16(int i16)
     {
         if ((i16 & 0xFF00) == 0) { // UTF-16BE
             _bigEndian = true;
         } else if ((i16 & 0x00FF) == 0) { // UTF-16LE
             _bigEndian = false;
         } else { // nope, not  UTF-16
             return false;
         }
         // Not BOM (just regular content), nothing to skip past:
         //_inputPtr += 2;
         _bytesPerChar = 2;
         return true;
     }

     /*
     /**********************************************************
     /* Internal methods, problem reporting
     /**********************************************************
      */

     private void reportWeirdUCS4(String type)
         throws IOException
     {
         throw new CharConversionException("Unsupported UCS-4 endianness ("+type+") detected");
     }

     /*
     /**********************************************************
     /* Internal methods, raw input access
     /**********************************************************
      */

     protected boolean ensureLoaded(int minimum)
         throws IOException
     {
         /* Let's assume here buffer has enough room -- this will always
          * be true for the limited used this method gets
          */
         int gotten = (_inputEnd - _inputPtr);
         while (gotten < minimum) {
             int count;

             if (_in == null) { // block source
                 count = -1;
             } else {
                 count = _in.read(_inputBuffer, _inputEnd, _inputBuffer.length - _inputEnd);
             }
             if (count < 1) {
                 return false;
             }
             _inputEnd += count;
             gotten += count;
         }
         return true;
     }
 }
	package org.codehaus.jackson.impl;

	import java.io.*;

	import org.codehaus.jackson.*;
	import org.codehaus.jackson.format.InputAccessor;
	import org.codehaus.jackson.format.MatchStrength;
	import org.codehaus.jackson.io.*;
	import org.codehaus.jackson.sym.BytesToNameCanonicalizer;
	import org.codehaus.jackson.sym.CharsToNameCanonicalizer;

	/**
	* This class is used to determine the encoding of byte stream
	* that is to contain JSON content. Rules are fairly simple, and
	* defined in JSON specification (RFC-4627 or newer), except
	* for BOM handling, which is a property of underlying
	* streams.
	*/
	public final class ByteSourceBootstrapper
	{
	final static byte UTF8_BOM_1 = (byte) 0xEF;
	final static byte UTF8_BOM_2 = (byte) 0xBB;
	final static byte UTF8_BOM_3 = (byte) 0xBF;

	/*
	/**********************************************************
	/* Configuration
	/**********************************************************
	*/

	protected final IOContext _context;

	protected final InputStream _in;

	/*
	/**********************************************************
	/* Input buffering
	/**********************************************************
	*/

	protected final byte[] _inputBuffer;

	private int _inputPtr;

	private int _inputEnd;

	/**
	* Flag that indicates whether buffer above is to be recycled
	* after being used or not.
	*/
	private final boolean _bufferRecyclable;

	/*
	/**********************************************************
	/* Input location
	/**********************************************************
	*/

	/**
	* Current number of input units (bytes or chars) that were processed in
	* previous blocks,
	* before contents of current input buffer.
	*<p>
	* Note: includes possible BOMs, if those were part of the input.
	*/
	protected int _inputProcessed;

	/*
	/**********************************************************
	/* Data gathered
	/**********************************************************
	*/

	protected boolean _bigEndian = true;

	protected int _bytesPerChar = 0; // 0 means "dunno yet"

	/*
	/**********************************************************
	/* Life-cycle
	/**********************************************************
	*/

	public ByteSourceBootstrapper(IOContext ctxt, InputStream in)
	{
	_context = ctxt;
	_in = in;
	_inputBuffer = ctxt.allocReadIOBuffer();
	_inputEnd = _inputPtr = 0;
	_inputProcessed = 0;
	_bufferRecyclable = true;
	}

	public ByteSourceBootstrapper(IOContext ctxt, byte[] inputBuffer, int inputStart, int inputLen)
	{
	_context = ctxt;
	_in = null;
	_inputBuffer = inputBuffer;
	_inputPtr = inputStart;
	_inputEnd = (inputStart + inputLen);
	// Need to offset this for correct location info
	_inputProcessed = -inputStart;
	_bufferRecyclable = false;
	}

	/*
	/**********************************************************
	/* Encoding detection during bootstrapping
	/**********************************************************
	*/

	/**
	* Method that should be called after constructing an instace.
	* It will figure out encoding that content uses, to allow
	* for instantiating a proper scanner object.
	*/
	public JsonEncoding detectEncoding()
	throws IOException, JsonParseException
	{
	boolean foundEncoding = false;

	// First things first: BOM handling
	/* Note: we can require 4 bytes to be read, since no
	* combination of BOM + valid JSON content can have
	* shorter length (shortest valid JSON content is single
	* digit char, but BOMs are chosen such that combination
	* is always at least 4 chars long)
	*/
	if (ensureLoaded(4)) {
	int quad = (_inputBuffer[_inputPtr] << 24)
	\| ((_inputBuffer[_inputPtr+1] & 0xFF) << 16)
	\| ((_inputBuffer[_inputPtr+2] & 0xFF) << 8)
	\| (_inputBuffer[_inputPtr+3] & 0xFF);

	if (handleBOM(quad)) {
	foundEncoding = true;
	} else {
	/* If no BOM, need to auto-detect based on first char;
	* this works since it must be 7-bit ascii (wrt. unicode
	* compatible encodings, only ones JSON can be transferred
	* over)
	*/
	// UTF-32?
	if (checkUTF32(quad)) {
	foundEncoding = true;
	} else if (checkUTF16(quad >>> 16)) {
	foundEncoding = true;
	}
	}
	} else if (ensureLoaded(2)) {
	int i16 = ((_inputBuffer[_inputPtr] & 0xFF) << 8)
	\| (_inputBuffer[_inputPtr+1] & 0xFF);
	if (checkUTF16(i16)) {
	foundEncoding = true;
	}
	}

	JsonEncoding enc;

	/* Not found yet? As per specs, this means it must be UTF-8. */
	if (!foundEncoding) {
	enc = JsonEncoding.UTF8;
	} else {
	switch (_bytesPerChar) {
	case 1:
	enc = JsonEncoding.UTF8;
	break;
	case 2:
	enc = _bigEndian ? JsonEncoding.UTF16_BE : JsonEncoding.UTF16_LE;
	break;
	case 4:
	enc = _bigEndian ? JsonEncoding.UTF32_BE : JsonEncoding.UTF32_LE;
	break;
	default:
	throw new RuntimeException("Internal error"); // should never get here
	}
	}
	_context.setEncoding(enc);
	return enc;
	}

	/*
	/**********************************************************
	/* Constructing a Reader
	/**********************************************************
	*/

	public Reader constructReader()
	throws IOException
	{
	JsonEncoding enc = _context.getEncoding();
	switch (enc) {
	case UTF32_BE:
	case UTF32_LE:
	return new UTF32Reader(_context, _in, _inputBuffer, _inputPtr, _inputEnd,
	_context.getEncoding().isBigEndian());

	case UTF16_BE:
	case UTF16_LE:
	case UTF8: // only in non-common case where we don't want to do direct mapping
	{
	// First: do we have a Stream? If not, need to create one:
	InputStream in = _in;

	if (in == null) {
	in = new ByteArrayInputStream(_inputBuffer, _inputPtr, _inputEnd);
	} else {
	/* Also, if we have any read but unused input (usually true),
	* need to merge that input in:
	*/
	if (_inputPtr < _inputEnd) {
	in = new MergedStream(_context, in, _inputBuffer, _inputPtr, _inputEnd);
	}
	}
	return new InputStreamReader(in, enc.getJavaName());
	}
	}
	throw new RuntimeException("Internal error"); // should never get here
	}

	public JsonParser constructParser(int features, ObjectCodec codec, BytesToNameCanonicalizer rootByteSymbols, CharsToNameCanonicalizer rootCharSymbols)
	throws IOException, JsonParseException
	{
	JsonEncoding enc = detectEncoding();

	// As per [JACKSON-259], may want to fully disable canonicalization:
	boolean canonicalize = JsonParser.Feature.CANONICALIZE_FIELD_NAMES.enabledIn(features);
	boolean intern = JsonParser.Feature.INTERN_FIELD_NAMES.enabledIn(features);
	if (enc == JsonEncoding.UTF8) {
	/* and without canonicalization, byte-based approach is not performance; just use std UTF-8 reader
	* (which is ok for larger input; not so hot for smaller; but this is not a common case)
	*/
	if (canonicalize) {
	BytesToNameCanonicalizer can = rootByteSymbols.makeChild(canonicalize, intern);
	return new Utf8StreamParser(_context, features, _in, codec, can, _inputBuffer, _inputPtr, _inputEnd, _bufferRecyclable);
	}
	}
	return new ReaderBasedParser(_context, features, constructReader(), codec, rootCharSymbols.makeChild(canonicalize, intern));
	}

	/*
	/**********************************************************
	/* Encoding detection for data format auto-detection
	/**********************************************************
	*/

	/**
	* Current implementation is not as thorough as other functionality
	* ({@link org.codehaus.jackson.impl.ByteSourceBootstrapper});
	* supports UTF-8, for example. But it should work, for now, and can
	* be improved as necessary.
	*
	* @since 1.8
	*/
	public static MatchStrength hasJSONFormat(InputAccessor acc) throws IOException
	{
	// Ideally we should see "[" or "{"; but if not, we'll accept double-quote (String)
	// in future could also consider accepting non-standard matches?

	if (!acc.hasMoreBytes()) {
	return MatchStrength.INCONCLUSIVE;
	}
	byte b = acc.nextByte();
	// Very first thing, a UTF-8 BOM?
	if (b == UTF8_BOM_1) { // yes, looks like UTF-8 BOM
	if (!acc.hasMoreBytes()) {
	return MatchStrength.INCONCLUSIVE;
	}
	if (acc.nextByte() != UTF8_BOM_2) {
	return MatchStrength.NO_MATCH;
	}
	if (!acc.hasMoreBytes()) {
	return MatchStrength.INCONCLUSIVE;
	}
	if (acc.nextByte() != UTF8_BOM_3) {
	return MatchStrength.NO_MATCH;
	}
	if (!acc.hasMoreBytes()) {
	return MatchStrength.INCONCLUSIVE;
	}
	b = acc.nextByte();
	}
	// Then possible leading space
	int ch = skipSpace(acc, b);
	if (ch < 0) {
	return MatchStrength.INCONCLUSIVE;
	}
	// First, let's see if it looks like a structured type:
	if (ch == '{') { // JSON object?
	// Ideally we need to find either double-quote or closing bracket
	ch = skipSpace(acc);
	if (ch < 0) {
	return MatchStrength.INCONCLUSIVE;
	}
	if (ch == '"' \|\| ch == '}') {
	return MatchStrength.SOLID_MATCH;
	}
	// ... should we allow non-standard? Let's not yet... can add if need be
	return MatchStrength.NO_MATCH;
	}
	MatchStrength strength;

	if (ch == '[') {
	ch = skipSpace(acc);
	if (ch < 0) {
	return MatchStrength.INCONCLUSIVE;
	}
	// closing brackets is easy; but for now, let's also accept opening...
	if (ch == ']' \|\| ch == '[') {
	return MatchStrength.SOLID_MATCH;
	}
	return MatchStrength.SOLID_MATCH;
	} else {
	// plain old value is not very convincing...
	strength = MatchStrength.WEAK_MATCH;
	}

	if (ch == '"') { // string value
	return strength;
	}
	if (ch <= '9' && ch >= '0') { // number
	return strength;
	}
	if (ch == '-') { // negative number
	ch = skipSpace(acc);
	if (ch < 0) {
	return MatchStrength.INCONCLUSIVE;
	}
	return (ch <= '9' && ch >= '0') ? strength : MatchStrength.NO_MATCH;
	}
	// or one of literals
	if (ch == 'n') { // null
	return tryMatch(acc, "ull", strength);
	}
	if (ch == 't') { // true
	return tryMatch(acc, "rue", strength);
	}
	if (ch == 'f') { // false
	return tryMatch(acc, "alse", strength);
	}
	return MatchStrength.NO_MATCH;
	}

	private final static MatchStrength tryMatch(InputAccessor acc, String matchStr, MatchStrength fullMatchStrength)
	throws IOException
	{
	for (int i = 0, len = matchStr.length(); i < len; ++i) {
	if (!acc.hasMoreBytes()) {
	return MatchStrength.INCONCLUSIVE;
	}
	if (acc.nextByte() != matchStr.charAt(i)) {
	return MatchStrength.NO_MATCH;
	}
	}
	return fullMatchStrength;
	}

	private final static int skipSpace(InputAccessor acc) throws IOException
	{
	if (!acc.hasMoreBytes()) {
	return -1;
	}
	return skipSpace(acc, acc.nextByte());
	}

	private final static int skipSpace(InputAccessor acc, byte b) throws IOException
	{
	while (true) {
	int ch = (int) b & 0xFF;
	if (!(ch == ' ' \|\| ch == '\r' \|\| ch == '\n' \|\| ch == '\t')) {
	return ch;
	}
	if (!acc.hasMoreBytes()) {
	return -1;
	}
	b = acc.nextByte();
	ch = (int) b & 0xFF;
	}
	}

	/*
	/**********************************************************
	/* Internal methods, parsing
	/**********************************************************
	*/

	/**
	* @return True if a BOM was succesfully found, and encoding
	* thereby recognized.
	*/
	private boolean handleBOM(int quad)
	throws IOException
	{
	/* Handling of (usually) optional BOM (required for
	* multi-byte formats); first 32-bit charsets:
	*/
	switch (quad) {
	case 0x0000FEFF:
	_bigEndian = true;
	_inputPtr += 4;
	_bytesPerChar = 4;
	return true;
	case 0xFFFE0000: // UCS-4, LE?
	_inputPtr += 4;
	_bytesPerChar = 4;
	_bigEndian = false;
	return true;
	case 0x0000FFFE: // UCS-4, in-order...
	reportWeirdUCS4("2143"); // throws exception
	case 0xFEFF0000: // UCS-4, in-order...
	reportWeirdUCS4("3412"); // throws exception
	}
	// Ok, if not, how about 16-bit encoding BOMs?
	int msw = quad >>> 16;
	if (msw == 0xFEFF) { // UTF-16, BE
	_inputPtr += 2;
	_bytesPerChar = 2;
	_bigEndian = true;
	return true;
	}
	if (msw == 0xFFFE) { // UTF-16, LE
	_inputPtr += 2;
	_bytesPerChar = 2;
	_bigEndian = false;
	return true;
	}
	// And if not, then UTF-8 BOM?
	if ((quad >>> 8) == 0xEFBBBF) { // UTF-8
	_inputPtr += 3;
	_bytesPerChar = 1;
	_bigEndian = true; // doesn't really matter
	return true;
	}
	return false;
	}

	private boolean checkUTF32(int quad)
	throws IOException
	{
	/* Handling of (usually) optional BOM (required for
	* multi-byte formats); first 32-bit charsets:
	*/
	if ((quad >> 8) == 0) { // 0x000000?? -> UTF32-BE
	_bigEndian = true;
	} else if ((quad & 0x00FFFFFF) == 0) { // 0x??000000 -> UTF32-LE
	_bigEndian = false;
	} else if ((quad & ~0x00FF0000) == 0) { // 0x00??0000 -> UTF32-in-order
	reportWeirdUCS4("3412");
	} else if ((quad & ~0x0000FF00) == 0) { // 0x0000??00 -> UTF32-in-order
	reportWeirdUCS4("2143");
	} else {
	// Can not be valid UTF-32 encoded JSON...
	return false;
	}
	// Not BOM (just regular content), nothing to skip past:
	//_inputPtr += 4;
	_bytesPerChar = 4;
	return true;
	}

	private boolean checkUTF16(int i16)
	{
	if ((i16 & 0xFF00) == 0) { // UTF-16BE
	_bigEndian = true;
	} else if ((i16 & 0x00FF) == 0) { // UTF-16LE
	_bigEndian = false;
	} else { // nope, not UTF-16
	return false;
	}
	// Not BOM (just regular content), nothing to skip past:
	//_inputPtr += 2;
	_bytesPerChar = 2;
	return true;
	}

	/*
	/**********************************************************
	/* Internal methods, problem reporting
	/**********************************************************
	*/

	private void reportWeirdUCS4(String type)
	throws IOException
	{
	throw new CharConversionException("Unsupported UCS-4 endianness ("+type+") detected");
	}

	/*
	/**********************************************************
	/* Internal methods, raw input access
	/**********************************************************
	*/

	protected boolean ensureLoaded(int minimum)
	throws IOException
	{
	/* Let's assume here buffer has enough room -- this will always
	* be true for the limited used this method gets
	*/
	int gotten = (_inputEnd - _inputPtr);
	while (gotten < minimum) {
	int count;

	if (_in == null) { // block source
	count = -1;
	} else {
	count = _in.read(_inputBuffer, _inputEnd, _inputBuffer.length - _inputEnd);
	}
	if (count < 1) {
	return false;
	}
	_inputEnd += count;
	gotten += count;
	}
	return true;
	}
	}