1.9.10/src/java/org/codehaus/jackson/io/UTF8Writer.java - jackson - Git at Google

 package org.codehaus.jackson.io;

 import java.io.*;


 public final class UTF8Writer
     extends Writer
 {
     final static int SURR1_FIRST = 0xD800;
     final static int SURR1_LAST = 0xDBFF;
     final static int SURR2_FIRST = 0xDC00;
     final static int SURR2_LAST = 0xDFFF;

     final protected IOContext _context;

     OutputStream _out;

     byte[] _outBuffer;

     final int _outBufferEnd;

     int _outPtr;

     /**
      * When outputting chars from BMP, surrogate pairs need to be coalesced.
      * To do this, both pairs must be known first; and since it is possible
      * pairs may be split, we need temporary storage for the first half
      */
     int _surrogate = 0;

     public UTF8Writer(IOContext ctxt, OutputStream out)
     {
         _context = ctxt;
         _out = out;

         _outBuffer = ctxt.allocWriteEncodingBuffer();
         /* Max. expansion for a single char (in unmodified UTF-8) is
          * 4 bytes (or 3 depending on how you view it -- 4 when recombining
          * surrogate pairs)
          */
         _outBufferEnd = _outBuffer.length - 4;
         _outPtr = 0;
     }

     @Override
     public Writer append(char c)
         throws IOException
     {
         write(c);
         return this;
     }

     @Override
     public void close()
         throws IOException
     {
         if (_out != null) {
             if (_outPtr > 0) {
                 _out.write(_outBuffer, 0, _outPtr);
                 _outPtr = 0;
             }
             OutputStream out = _out;
             _out = null;

             byte[] buf = _outBuffer;
             if (buf != null) {
                 _outBuffer = null;
                 _context.releaseWriteEncodingBuffer(buf);
             }

             out.close();

             /* Let's 'flush' orphan surrogate, no matter what; but only
              * after cleanly closing everything else.
              */
             int code = _surrogate;
             _surrogate = 0;
             if (code > 0) {
                 throwIllegal(code);
             }
         }
     }

     @Override
     public void flush()
         throws IOException
     {
         if (_out != null) {
             if (_outPtr > 0) {
                 _out.write(_outBuffer, 0, _outPtr);
                 _outPtr = 0;
             }
             _out.flush();
         }
     }

     @Override
     public void write(char[] cbuf)
         throws IOException
     {
         write(cbuf, 0, cbuf.length);
     }

     @Override
     public void write(char[] cbuf, int off, int len)
         throws IOException
     {
         if (len < 2) {
             if (len == 1) {
                 write(cbuf[off]);
             }
             return;
         }

         // First: do we have a leftover surrogate to deal with?
         if (_surrogate > 0) {
             char second = cbuf[off++];
             --len;
             write(convertSurrogate(second));
             // will have at least one more char
         }

         int outPtr = _outPtr;
         byte[] outBuf = _outBuffer;
         int outBufLast = _outBufferEnd; // has 4 'spare' bytes

         // All right; can just loop it nice and easy now:
         len += off; // len will now be the end of input buffer

         output_loop:
         for (; off < len; ) {
             /* First, let's ensure we can output at least 4 bytes
              * (longest UTF-8 encoded codepoint):
              */
             if (outPtr >= outBufLast) {
                 _out.write(outBuf, 0, outPtr);
                 outPtr = 0;
             }

             int c = cbuf[off++];
             // And then see if we have an Ascii char:
             if (c < 0x80) { // If so, can do a tight inner loop:
                 outBuf[outPtr++] = (byte)c;
                 // Let's calc how many ascii chars we can copy at most:
                 int maxInCount = (len - off);
                 int maxOutCount = (outBufLast - outPtr);

                 if (maxInCount > maxOutCount) {
                     maxInCount = maxOutCount;
                 }
                 maxInCount += off;
                 ascii_loop:
                 while (true) {
                     if (off >= maxInCount) { // done with max. ascii seq
                         continue output_loop;
                     }
                     c = cbuf[off++];
                     if (c >= 0x80) {
                         break ascii_loop;
                     }
                     outBuf[outPtr++] = (byte) c;
                 }
             }

             // Nope, multi-byte:
             if (c < 0x800) { // 2-byte
                 outBuf[outPtr++] = (byte) (0xc0 | (c >> 6));
                 outBuf[outPtr++] = (byte) (0x80 | (c & 0x3f));
             } else { // 3 or 4 bytes
                 // Surrogates?
                 if (c < SURR1_FIRST || c > SURR2_LAST) {
                     outBuf[outPtr++] = (byte) (0xe0 | (c >> 12));
                     outBuf[outPtr++] = (byte) (0x80 | ((c >> 6) & 0x3f));
                     outBuf[outPtr++] = (byte) (0x80 | (c & 0x3f));
                     continue;
                 }
                 // Yup, a surrogate:
                 if (c > SURR1_LAST) { // must be from first range
                     _outPtr = outPtr;
                     throwIllegal(c);
                 }
                 _surrogate = c;
                 // and if so, followed by another from next range
                 if (off >= len) { // unless we hit the end?
                     break;
                 }
                 c = convertSurrogate(cbuf[off++]);
                 if (c > 0x10FFFF) { // illegal in JSON as well as in XML
                     _outPtr = outPtr;
                     throwIllegal(c);
                 }
                 outBuf[outPtr++] = (byte) (0xf0 | (c >> 18));
                 outBuf[outPtr++] = (byte) (0x80 | ((c >> 12) & 0x3f));
                 outBuf[outPtr++] = (byte) (0x80 | ((c >> 6) & 0x3f));
                 outBuf[outPtr++] = (byte) (0x80 | (c & 0x3f));
             }
         }
         _outPtr = outPtr;
     }

     @Override
     public void write(int c) throws IOException
     {
         // First; do we have a left over surrogate?
         if (_surrogate > 0) {
             c = convertSurrogate(c);
             // If not, do we start with a surrogate?
         } else if (c >= SURR1_FIRST && c <= SURR2_LAST) {
             // Illegal to get second part without first:
             if (c > SURR1_LAST) {
                 throwIllegal(c);
             }
             // First part just needs to be held for now
             _surrogate = c;
             return;
         }

         if (_outPtr >= _outBufferEnd) { // let's require enough room, first
             _out.write(_outBuffer, 0, _outPtr);
             _outPtr = 0;
         }

         if (c < 0x80) { // ascii
             _outBuffer[_outPtr++] = (byte) c;
         } else {
             int ptr = _outPtr;
             if (c < 0x800) { // 2-byte
                 _outBuffer[ptr++] = (byte) (0xc0 | (c >> 6));
                 _outBuffer[ptr++] = (byte) (0x80 | (c & 0x3f));
             } else if (c <= 0xFFFF) { // 3 bytes
                 _outBuffer[ptr++] = (byte) (0xe0 | (c >> 12));
                 _outBuffer[ptr++] = (byte) (0x80 | ((c >> 6) & 0x3f));
                 _outBuffer[ptr++] = (byte) (0x80 | (c & 0x3f));
             } else { // 4 bytes
                 if (c > 0x10FFFF) { // illegal
                     throwIllegal(c);
                 }
                 _outBuffer[ptr++] = (byte) (0xf0 | (c >> 18));
                 _outBuffer[ptr++] = (byte) (0x80 | ((c >> 12) & 0x3f));
                 _outBuffer[ptr++] = (byte) (0x80 | ((c >> 6) & 0x3f));
                 _outBuffer[ptr++] = (byte) (0x80 | (c & 0x3f));
             }
             _outPtr = ptr;
         }
     }

     @Override
     public void write(String str) throws IOException
     {
         write(str, 0, str.length());
     }

     @Override
     public void write(String str, int off, int len)  throws IOException
     {
         if (len < 2) {
             if (len == 1) {
                 write(str.charAt(off));
             }
             return;
         }

         // First: do we have a leftover surrogate to deal with?
         if (_surrogate > 0) {
             char second = str.charAt(off++);
             --len;
             write(convertSurrogate(second));
             // will have at least one more char (case of 1 char was checked earlier on)
         }

         int outPtr = _outPtr;
         byte[] outBuf = _outBuffer;
         int outBufLast = _outBufferEnd; // has 4 'spare' bytes

         // All right; can just loop it nice and easy now:
         len += off; // len will now be the end of input buffer

         output_loop:
         for (; off < len; ) {
             /* First, let's ensure we can output at least 4 bytes
              * (longest UTF-8 encoded codepoint):
              */
             if (outPtr >= outBufLast) {
                 _out.write(outBuf, 0, outPtr);
                 outPtr = 0;
             }

             int c = str.charAt(off++);
             // And then see if we have an Ascii char:
             if (c < 0x80) { // If so, can do a tight inner loop:
                 outBuf[outPtr++] = (byte)c;
                 // Let's calc how many ascii chars we can copy at most:
                 int maxInCount = (len - off);
                 int maxOutCount = (outBufLast - outPtr);

                 if (maxInCount > maxOutCount) {
                     maxInCount = maxOutCount;
                 }
                 maxInCount += off;
                 ascii_loop:
                 while (true) {
                     if (off >= maxInCount) { // done with max. ascii seq
                         continue output_loop;
                     }
                     c = str.charAt(off++);
                     if (c >= 0x80) {
                         break ascii_loop;
                     }
                     outBuf[outPtr++] = (byte) c;
                 }
             }

             // Nope, multi-byte:
             if (c < 0x800) { // 2-byte
                 outBuf[outPtr++] = (byte) (0xc0 | (c >> 6));
                 outBuf[outPtr++] = (byte) (0x80 | (c & 0x3f));
             } else { // 3 or 4 bytes
                 // Surrogates?
                 if (c < SURR1_FIRST || c > SURR2_LAST) {
                     outBuf[outPtr++] = (byte) (0xe0 | (c >> 12));
                     outBuf[outPtr++] = (byte) (0x80 | ((c >> 6) & 0x3f));
                     outBuf[outPtr++] = (byte) (0x80 | (c & 0x3f));
                     continue;
                 }
                 // Yup, a surrogate:
                 if (c > SURR1_LAST) { // must be from first range
                     _outPtr = outPtr;
                     throwIllegal(c);
                 }
                 _surrogate = c;
                 // and if so, followed by another from next range
                 if (off >= len) { // unless we hit the end?
                     break;
                 }
                 c = convertSurrogate(str.charAt(off++));
                 if (c > 0x10FFFF) { // illegal, as per RFC 4627
                     _outPtr = outPtr;
                     throwIllegal(c);
                 }
                 outBuf[outPtr++] = (byte) (0xf0 | (c >> 18));
                 outBuf[outPtr++] = (byte) (0x80 | ((c >> 12) & 0x3f));
                 outBuf[outPtr++] = (byte) (0x80 | ((c >> 6) & 0x3f));
                 outBuf[outPtr++] = (byte) (0x80 | (c & 0x3f));
             }
         }
         _outPtr = outPtr;
     }

     /*
     /**********************************************************
     /* Internal methods
     /**********************************************************
      */

     /**
      * Method called to calculate UTF codepoint, from a surrogate pair.
      */
     private int convertSurrogate(int secondPart)
         throws IOException
     {
         int firstPart = _surrogate;
         _surrogate = 0;

         // Ok, then, is the second part valid?
         if (secondPart < SURR2_FIRST || secondPart > SURR2_LAST) {
             throw new IOException("Broken surrogate pair: first char 0x"+Integer.toHexString(firstPart)+", second 0x"+Integer.toHexString(secondPart)+"; illegal combination");
         }
         return 0x10000 + ((firstPart - SURR1_FIRST) << 10) + (secondPart - SURR2_FIRST);
     }

     private void throwIllegal(int code)
         throws IOException
     {
         if (code > 0x10FFFF) { // over max?
             throw new IOException("Illegal character point (0x"+Integer.toHexString(code)+") to output; max is 0x10FFFF as per RFC 4627");
         }
         if (code >= SURR1_FIRST) {
             if (code <= SURR1_LAST) { // Unmatched first part (closing without second part?)
                 throw new IOException("Unmatched first part of surrogate pair (0x"+Integer.toHexString(code)+")");
             }
             throw new IOException("Unmatched second part of surrogate pair (0x"+Integer.toHexString(code)+")");
         }

         // should we ever get this?
         throw new IOException("Illegal character point (0x"+Integer.toHexString(code)+") to output");
     }
 }
	package org.codehaus.jackson.io;

	import java.io.*;


	public final class UTF8Writer
	extends Writer
	{
	final static int SURR1_FIRST = 0xD800;
	final static int SURR1_LAST = 0xDBFF;
	final static int SURR2_FIRST = 0xDC00;
	final static int SURR2_LAST = 0xDFFF;

	final protected IOContext _context;

	OutputStream _out;

	byte[] _outBuffer;

	final int _outBufferEnd;

	int _outPtr;

	/**
	* When outputting chars from BMP, surrogate pairs need to be coalesced.
	* To do this, both pairs must be known first; and since it is possible
	* pairs may be split, we need temporary storage for the first half
	*/
	int _surrogate = 0;

	public UTF8Writer(IOContext ctxt, OutputStream out)
	{
	_context = ctxt;
	_out = out;

	_outBuffer = ctxt.allocWriteEncodingBuffer();
	/* Max. expansion for a single char (in unmodified UTF-8) is
	* 4 bytes (or 3 depending on how you view it -- 4 when recombining
	* surrogate pairs)
	*/
	_outBufferEnd = _outBuffer.length - 4;
	_outPtr = 0;
	}

	@Override
	public Writer append(char c)
	throws IOException
	{
	write(c);
	return this;
	}

	@Override
	public void close()
	throws IOException
	{
	if (_out != null) {
	if (_outPtr > 0) {
	_out.write(_outBuffer, 0, _outPtr);
	_outPtr = 0;
	}
	OutputStream out = _out;
	_out = null;

	byte[] buf = _outBuffer;
	if (buf != null) {
	_outBuffer = null;
	_context.releaseWriteEncodingBuffer(buf);
	}

	out.close();

	/* Let's 'flush' orphan surrogate, no matter what; but only
	* after cleanly closing everything else.
	*/
	int code = _surrogate;
	_surrogate = 0;
	if (code > 0) {
	throwIllegal(code);
	}
	}
	}

	@Override
	public void flush()
	throws IOException
	{
	if (_out != null) {
	if (_outPtr > 0) {
	_out.write(_outBuffer, 0, _outPtr);
	_outPtr = 0;
	}
	_out.flush();
	}
	}

	@Override
	public void write(char[] cbuf)
	throws IOException
	{
	write(cbuf, 0, cbuf.length);
	}

	@Override
	public void write(char[] cbuf, int off, int len)
	throws IOException
	{
	if (len < 2) {
	if (len == 1) {
	write(cbuf[off]);
	}
	return;
	}

	// First: do we have a leftover surrogate to deal with?
	if (_surrogate > 0) {
	char second = cbuf[off++];
	--len;
	write(convertSurrogate(second));
	// will have at least one more char
	}

	int outPtr = _outPtr;
	byte[] outBuf = _outBuffer;
	int outBufLast = _outBufferEnd; // has 4 'spare' bytes

	// All right; can just loop it nice and easy now:
	len += off; // len will now be the end of input buffer

	output_loop:
	for (; off < len; ) {
	/* First, let's ensure we can output at least 4 bytes
	* (longest UTF-8 encoded codepoint):
	*/
	if (outPtr >= outBufLast) {
	_out.write(outBuf, 0, outPtr);
	outPtr = 0;
	}

	int c = cbuf[off++];
	// And then see if we have an Ascii char:
	if (c < 0x80) { // If so, can do a tight inner loop:
	outBuf[outPtr++] = (byte)c;
	// Let's calc how many ascii chars we can copy at most:
	int maxInCount = (len - off);
	int maxOutCount = (outBufLast - outPtr);

	if (maxInCount > maxOutCount) {
	maxInCount = maxOutCount;
	}
	maxInCount += off;
	ascii_loop:
	while (true) {
	if (off >= maxInCount) { // done with max. ascii seq
	continue output_loop;
	}
	c = cbuf[off++];
	if (c >= 0x80) {
	break ascii_loop;
	}
	outBuf[outPtr++] = (byte) c;
	}
	}

	// Nope, multi-byte:
	if (c < 0x800) { // 2-byte
	outBuf[outPtr++] = (byte) (0xc0 \| (c >> 6));
	outBuf[outPtr++] = (byte) (0x80 \| (c & 0x3f));
	} else { // 3 or 4 bytes
	// Surrogates?
	if (c < SURR1_FIRST \|\| c > SURR2_LAST) {
	outBuf[outPtr++] = (byte) (0xe0 \| (c >> 12));
	outBuf[outPtr++] = (byte) (0x80 \| ((c >> 6) & 0x3f));
	outBuf[outPtr++] = (byte) (0x80 \| (c & 0x3f));
	continue;
	}
	// Yup, a surrogate:
	if (c > SURR1_LAST) { // must be from first range
	_outPtr = outPtr;
	throwIllegal(c);
	}
	_surrogate = c;
	// and if so, followed by another from next range
	if (off >= len) { // unless we hit the end?
	break;
	}
	c = convertSurrogate(cbuf[off++]);
	if (c > 0x10FFFF) { // illegal in JSON as well as in XML
	_outPtr = outPtr;
	throwIllegal(c);
	}
	outBuf[outPtr++] = (byte) (0xf0 \| (c >> 18));
	outBuf[outPtr++] = (byte) (0x80 \| ((c >> 12) & 0x3f));
	outBuf[outPtr++] = (byte) (0x80 \| ((c >> 6) & 0x3f));
	outBuf[outPtr++] = (byte) (0x80 \| (c & 0x3f));
	}
	}
	_outPtr = outPtr;
	}

	@Override
	public void write(int c) throws IOException
	{
	// First; do we have a left over surrogate?
	if (_surrogate > 0) {
	c = convertSurrogate(c);
	// If not, do we start with a surrogate?
	} else if (c >= SURR1_FIRST && c <= SURR2_LAST) {
	// Illegal to get second part without first:
	if (c > SURR1_LAST) {
	throwIllegal(c);
	}
	// First part just needs to be held for now
	_surrogate = c;
	return;
	}

	if (_outPtr >= _outBufferEnd) { // let's require enough room, first
	_out.write(_outBuffer, 0, _outPtr);
	_outPtr = 0;
	}

	if (c < 0x80) { // ascii
	_outBuffer[_outPtr++] = (byte) c;
	} else {
	int ptr = _outPtr;
	if (c < 0x800) { // 2-byte
	_outBuffer[ptr++] = (byte) (0xc0 \| (c >> 6));
	_outBuffer[ptr++] = (byte) (0x80 \| (c & 0x3f));
	} else if (c <= 0xFFFF) { // 3 bytes
	_outBuffer[ptr++] = (byte) (0xe0 \| (c >> 12));
	_outBuffer[ptr++] = (byte) (0x80 \| ((c >> 6) & 0x3f));
	_outBuffer[ptr++] = (byte) (0x80 \| (c & 0x3f));
	} else { // 4 bytes
	if (c > 0x10FFFF) { // illegal
	throwIllegal(c);
	}
	_outBuffer[ptr++] = (byte) (0xf0 \| (c >> 18));
	_outBuffer[ptr++] = (byte) (0x80 \| ((c >> 12) & 0x3f));
	_outBuffer[ptr++] = (byte) (0x80 \| ((c >> 6) & 0x3f));
	_outBuffer[ptr++] = (byte) (0x80 \| (c & 0x3f));
	}
	_outPtr = ptr;
	}
	}

	@Override
	public void write(String str) throws IOException
	{
	write(str, 0, str.length());
	}

	@Override
	public void write(String str, int off, int len) throws IOException
	{
	if (len < 2) {
	if (len == 1) {
	write(str.charAt(off));
	}
	return;
	}

	// First: do we have a leftover surrogate to deal with?
	if (_surrogate > 0) {
	char second = str.charAt(off++);
	--len;
	write(convertSurrogate(second));
	// will have at least one more char (case of 1 char was checked earlier on)
	}

	int outPtr = _outPtr;
	byte[] outBuf = _outBuffer;
	int outBufLast = _outBufferEnd; // has 4 'spare' bytes

	// All right; can just loop it nice and easy now:
	len += off; // len will now be the end of input buffer

	output_loop:
	for (; off < len; ) {
	/* First, let's ensure we can output at least 4 bytes
	* (longest UTF-8 encoded codepoint):
	*/
	if (outPtr >= outBufLast) {
	_out.write(outBuf, 0, outPtr);
	outPtr = 0;
	}

	int c = str.charAt(off++);
	// And then see if we have an Ascii char:
	if (c < 0x80) { // If so, can do a tight inner loop:
	outBuf[outPtr++] = (byte)c;
	// Let's calc how many ascii chars we can copy at most:
	int maxInCount = (len - off);
	int maxOutCount = (outBufLast - outPtr);

	if (maxInCount > maxOutCount) {
	maxInCount = maxOutCount;
	}
	maxInCount += off;
	ascii_loop:
	while (true) {
	if (off >= maxInCount) { // done with max. ascii seq
	continue output_loop;
	}
	c = str.charAt(off++);
	if (c >= 0x80) {
	break ascii_loop;
	}
	outBuf[outPtr++] = (byte) c;
	}
	}

	// Nope, multi-byte:
	if (c < 0x800) { // 2-byte
	outBuf[outPtr++] = (byte) (0xc0 \| (c >> 6));
	outBuf[outPtr++] = (byte) (0x80 \| (c & 0x3f));
	} else { // 3 or 4 bytes
	// Surrogates?
	if (c < SURR1_FIRST \|\| c > SURR2_LAST) {
	outBuf[outPtr++] = (byte) (0xe0 \| (c >> 12));
	outBuf[outPtr++] = (byte) (0x80 \| ((c >> 6) & 0x3f));
	outBuf[outPtr++] = (byte) (0x80 \| (c & 0x3f));
	continue;
	}
	// Yup, a surrogate:
	if (c > SURR1_LAST) { // must be from first range
	_outPtr = outPtr;
	throwIllegal(c);
	}
	_surrogate = c;
	// and if so, followed by another from next range
	if (off >= len) { // unless we hit the end?
	break;
	}
	c = convertSurrogate(str.charAt(off++));
	if (c > 0x10FFFF) { // illegal, as per RFC 4627
	_outPtr = outPtr;
	throwIllegal(c);
	}
	outBuf[outPtr++] = (byte) (0xf0 \| (c >> 18));
	outBuf[outPtr++] = (byte) (0x80 \| ((c >> 12) & 0x3f));
	outBuf[outPtr++] = (byte) (0x80 \| ((c >> 6) & 0x3f));
	outBuf[outPtr++] = (byte) (0x80 \| (c & 0x3f));
	}
	}
	_outPtr = outPtr;
	}

	/*
	/**********************************************************
	/* Internal methods
	/**********************************************************
	*/

	/**
	* Method called to calculate UTF codepoint, from a surrogate pair.
	*/
	private int convertSurrogate(int secondPart)
	throws IOException
	{
	int firstPart = _surrogate;
	_surrogate = 0;

	// Ok, then, is the second part valid?
	if (secondPart < SURR2_FIRST \|\| secondPart > SURR2_LAST) {
	throw new IOException("Broken surrogate pair: first char 0x"+Integer.toHexString(firstPart)+", second 0x"+Integer.toHexString(secondPart)+"; illegal combination");
	}
	return 0x10000 + ((firstPart - SURR1_FIRST) << 10) + (secondPart - SURR2_FIRST);
	}

	private void throwIllegal(int code)
	throws IOException
	{
	if (code > 0x10FFFF) { // over max?
	throw new IOException("Illegal character point (0x"+Integer.toHexString(code)+") to output; max is 0x10FFFF as per RFC 4627");
	}
	if (code >= SURR1_FIRST) {
	if (code <= SURR1_LAST) { // Unmatched first part (closing without second part?)
	throw new IOException("Unmatched first part of surrogate pair (0x"+Integer.toHexString(code)+")");
	}
	throw new IOException("Unmatched second part of surrogate pair (0x"+Integer.toHexString(code)+")");
	}

	// should we ever get this?
	throw new IOException("Illegal character point (0x"+Integer.toHexString(code)+") to output");
	}
	}