| package org.codehaus.jackson.impl; |
| |
| import java.io.*; |
| |
| import org.codehaus.jackson.*; |
| import org.codehaus.jackson.io.IOContext; |
| import org.codehaus.jackson.sym.CharsToNameCanonicalizer; |
| import org.codehaus.jackson.util.*; |
| |
| /** |
| * This is a concrete implementation of {@link JsonParser}, which is |
| * based on a {@link java.io.Reader} to handle low-level character |
| * conversion tasks. |
| */ |
| public final class ReaderBasedParser |
| extends ReaderBasedNumericParser |
| { |
| /* |
| /********************************************************** |
| /* Configuration, state |
| /********************************************************** |
| */ |
| |
| protected ObjectCodec _objectCodec; |
| |
| final protected CharsToNameCanonicalizer _symbols; |
| |
| /* |
| /********************************************************** |
| /* Life-cycle |
| /********************************************************** |
| */ |
| |
| public ReaderBasedParser(IOContext ioCtxt, int features, Reader r, |
| ObjectCodec codec, CharsToNameCanonicalizer st) |
| { |
| super(ioCtxt, features, r); |
| _objectCodec = codec; |
| _symbols = st; |
| } |
| |
| public ObjectCodec getCodec() { |
| return _objectCodec; |
| } |
| |
| public void setCodec(ObjectCodec c) { |
| _objectCodec = c; |
| } |
| |
| /* |
| /********************************************************** |
| /* Public API, traversal |
| /********************************************************** |
| */ |
| |
| /** |
| * @return Next token from the stream, if any found, or null |
| * to indicate end-of-input |
| */ |
| @Override |
| public JsonToken nextToken() |
| throws IOException, JsonParseException |
| { |
| /* First: field names are special -- we will always tokenize |
| * (part of) value along with field name to simplify |
| * state handling. If so, can and need to use secondary token: |
| */ |
| if (_currToken == JsonToken.FIELD_NAME) { |
| return _nextAfterName(); |
| } |
| if (_tokenIncomplete) { |
| _skipString(); // only strings can be partial |
| } |
| |
| int i = _skipWSOrEnd(); |
| if (i < 0) { // end-of-input |
| /* 19-Feb-2009, tatu: Should actually close/release things |
| * like input source, symbol table and recyclable buffers now. |
| */ |
| close(); |
| return (_currToken = null); |
| } |
| |
| /* First, need to ensure we know the starting location of token |
| * after skipping leading white space |
| */ |
| _tokenInputTotal = _currInputProcessed + _inputPtr - 1; |
| _tokenInputRow = _currInputRow; |
| _tokenInputCol = _inputPtr - _currInputRowStart - 1; |
| |
| // finally: clear any data retained so far |
| _binaryValue = null; |
| |
| // Closing scope? |
| if (i == INT_RBRACKET) { |
| if (!_parsingContext.inArray()) { |
| _reportMismatchedEndMarker(i, '}'); |
| } |
| _parsingContext = _parsingContext.getParent(); |
| return (_currToken = JsonToken.END_ARRAY); |
| } |
| if (i == INT_RCURLY) { |
| if (!_parsingContext.inObject()) { |
| _reportMismatchedEndMarker(i, ']'); |
| } |
| _parsingContext = _parsingContext.getParent(); |
| return (_currToken = JsonToken.END_OBJECT); |
| } |
| |
| // Nope: do we then expect a comma? |
| if (_parsingContext.expectComma()) { |
| if (i != INT_COMMA) { |
| _reportUnexpectedChar(i, "was expecting comma to separate "+_parsingContext.getTypeDesc()+" entries"); |
| } |
| i = _skipWS(); |
| } |
| |
| /* And should we now have a name? Always true for |
| * Object contexts, since the intermediate 'expect-value' |
| * state is never retained. |
| */ |
| boolean inObject = _parsingContext.inObject(); |
| if (inObject) { |
| // First, field name itself: |
| String name = _parseFieldName(i); |
| _parsingContext.setCurrentName(name); |
| _currToken = JsonToken.FIELD_NAME; |
| i = _skipWS(); |
| if (i != INT_COLON) { |
| _reportUnexpectedChar(i, "was expecting a colon to separate field name and value"); |
| } |
| i = _skipWS(); |
| } |
| |
| // Ok: we must have a value... what is it? |
| |
| JsonToken t; |
| |
| switch (i) { |
| case INT_QUOTE: |
| _tokenIncomplete = true; |
| t = JsonToken.VALUE_STRING; |
| break; |
| case INT_LBRACKET: |
| if (!inObject) { |
| _parsingContext = _parsingContext.createChildArrayContext(_tokenInputRow, _tokenInputCol); |
| } |
| t = JsonToken.START_ARRAY; |
| break; |
| case INT_LCURLY: |
| if (!inObject) { |
| _parsingContext = _parsingContext.createChildObjectContext(_tokenInputRow, _tokenInputCol); |
| } |
| t = JsonToken.START_OBJECT; |
| break; |
| case INT_RBRACKET: |
| case INT_RCURLY: |
| // Error: neither is valid at this point; valid closers have |
| // been handled earlier |
| _reportUnexpectedChar(i, "expected a value"); |
| case INT_t: |
| _matchToken(JsonToken.VALUE_TRUE); |
| t = JsonToken.VALUE_TRUE; |
| break; |
| case INT_f: |
| _matchToken(JsonToken.VALUE_FALSE); |
| t = JsonToken.VALUE_FALSE; |
| break; |
| case INT_n: |
| _matchToken(JsonToken.VALUE_NULL); |
| t = JsonToken.VALUE_NULL; |
| break; |
| |
| case INT_MINUS: |
| /* Should we have separate handling for plus? Although |
| * it is not allowed per se, it may be erroneously used, |
| * and could be indicate by a more specific error message. |
| */ |
| case INT_0: |
| case INT_1: |
| case INT_2: |
| case INT_3: |
| case INT_4: |
| case INT_5: |
| case INT_6: |
| case INT_7: |
| case INT_8: |
| case INT_9: |
| t = parseNumberText(i); |
| break; |
| default: |
| t = _handleUnexpectedValue(i); |
| break; |
| } |
| |
| if (inObject) { |
| _nextToken = t; |
| return _currToken; |
| } |
| _currToken = t; |
| return t; |
| } |
| |
| private final JsonToken _nextAfterName() |
| { |
| _nameCopied = false; // need to invalidate if it was copied |
| JsonToken t = _nextToken; |
| _nextToken = null; |
| // Also: may need to start new context? |
| if (t == JsonToken.START_ARRAY) { |
| _parsingContext = _parsingContext.createChildArrayContext(_tokenInputRow, _tokenInputCol); |
| } else if (t == JsonToken.START_OBJECT) { |
| _parsingContext = _parsingContext.createChildObjectContext(_tokenInputRow, _tokenInputCol); |
| } |
| return (_currToken = t); |
| } |
| |
| @Override |
| public void close() throws IOException |
| { |
| super.close(); |
| _symbols.release(); |
| } |
| |
| /* |
| /********************************************************** |
| /* Internal methods, secondary parsing |
| /********************************************************** |
| */ |
| |
| protected final String _parseFieldName(int i) |
| throws IOException, JsonParseException |
| { |
| if (i != INT_QUOTE) { |
| return _handleUnusualFieldName(i); |
| } |
| /* First: let's try to see if we have a simple name: one that does |
| * not cross input buffer boundary, and does not contain escape |
| * sequences. |
| */ |
| int ptr = _inputPtr; |
| int hash = 0; |
| final int inputLen = _inputEnd; |
| |
| if (ptr < inputLen) { |
| final int[] codes = CharTypes.getInputCodeLatin1(); |
| final int maxCode = codes.length; |
| |
| do { |
| int ch = _inputBuffer[ptr]; |
| if (ch < maxCode && codes[ch] != 0) { |
| if (ch == '"') { |
| int start = _inputPtr; |
| _inputPtr = ptr+1; // to skip the quote |
| return _symbols.findSymbol(_inputBuffer, start, ptr - start, hash); |
| } |
| break; |
| } |
| hash = (hash * 31) + ch; |
| ++ptr; |
| } while (ptr < inputLen); |
| } |
| |
| int start = _inputPtr; |
| _inputPtr = ptr; |
| return _parseFieldName2(start, hash, INT_QUOTE); |
| } |
| |
| private String _parseFieldName2(int startPtr, int hash, int endChar) |
| throws IOException, JsonParseException |
| { |
| _textBuffer.resetWithShared(_inputBuffer, startPtr, (_inputPtr - startPtr)); |
| |
| /* Output pointers; calls will also ensure that the buffer is |
| * not shared and has room for at least one more char. |
| */ |
| char[] outBuf = _textBuffer.getCurrentSegment(); |
| int outPtr = _textBuffer.getCurrentSegmentSize(); |
| |
| while (true) { |
| if (_inputPtr >= _inputEnd) { |
| if (!loadMore()) { |
| _reportInvalidEOF(": was expecting closing '"+((char) endChar)+"' for name"); |
| } |
| } |
| char c = _inputBuffer[_inputPtr++]; |
| int i = (int) c; |
| if (i <= INT_BACKSLASH) { |
| if (i == INT_BACKSLASH) { |
| /* Although chars outside of BMP are to be escaped as |
| * an UTF-16 surrogate pair, does that affect decoding? |
| * For now let's assume it does not. |
| */ |
| c = _decodeEscaped(); |
| } else if (i <= endChar) { |
| if (i == endChar) { |
| break; |
| } |
| if (i < INT_SPACE) { |
| _throwUnquotedSpace(i, "name"); |
| } |
| } |
| } |
| hash = (hash * 31) + i; |
| // Ok, let's add char to output: |
| outBuf[outPtr++] = c; |
| |
| // Need more room? |
| if (outPtr >= outBuf.length) { |
| outBuf = _textBuffer.finishCurrentSegment(); |
| outPtr = 0; |
| } |
| } |
| _textBuffer.setCurrentLength(outPtr); |
| { |
| TextBuffer tb = _textBuffer; |
| char[] buf = tb.getTextBuffer(); |
| int start = tb.getTextOffset(); |
| int len = tb.size(); |
| |
| return _symbols.findSymbol(buf, start, len, hash); |
| } |
| } |
| |
| /** |
| * Method called when we see non-white space character other |
| * than double quote, when expecting a field name. |
| * In standard mode will just throw an expection; but |
| * in non-standard modes may be able to parse name. |
| * |
| * @since 1.2 |
| */ |
| protected final String _handleUnusualFieldName(int i) |
| throws IOException, JsonParseException |
| { |
| // [JACKSON-173]: allow single quotes |
| if (i == INT_APOSTROPHE && isEnabled(Feature.ALLOW_SINGLE_QUOTES)) { |
| return _parseApostropheFieldName(); |
| } |
| // [JACKSON-69]: allow unquoted names if feature enabled: |
| if (!isEnabled(Feature.ALLOW_UNQUOTED_FIELD_NAMES)) { |
| _reportUnexpectedChar(i, "was expecting double-quote to start field name"); |
| } |
| final int[] codes = CharTypes.getInputCodeLatin1JsNames(); |
| final int maxCode = codes.length; |
| |
| // Also: first char must be a valid name char, but NOT be number |
| boolean firstOk; |
| |
| if (i < maxCode) { // identifier, and not a number |
| firstOk = (codes[i] == 0) && (i < INT_0 || i > INT_9); |
| } else { |
| firstOk = Character.isJavaIdentifierPart((char) i); |
| } |
| if (!firstOk) { |
| _reportUnexpectedChar(i, "was expecting either valid name character (for unquoted name) or double-quote (for quoted) to start field name"); |
| } |
| int ptr = _inputPtr; |
| int hash = 0; |
| final int inputLen = _inputEnd; |
| |
| if (ptr < inputLen) { |
| do { |
| int ch = _inputBuffer[ptr]; |
| if (ch < maxCode) { |
| if (codes[ch] != 0) { |
| int start = _inputPtr-1; // -1 to bring back first char |
| _inputPtr = ptr; |
| return _symbols.findSymbol(_inputBuffer, start, ptr - start, hash); |
| } |
| } else if (!Character.isJavaIdentifierPart((char) ch)) { |
| int start = _inputPtr-1; // -1 to bring back first char |
| _inputPtr = ptr; |
| return _symbols.findSymbol(_inputBuffer, start, ptr - start, hash); |
| } |
| hash = (hash * 31) + ch; |
| ++ptr; |
| } while (ptr < inputLen); |
| } |
| int start = _inputPtr-1; |
| _inputPtr = ptr; |
| return _parseUnusualFieldName2(start, hash, codes); |
| } |
| |
| protected final String _parseApostropheFieldName() |
| throws IOException, JsonParseException |
| { |
| // Note: mostly copy of_parseFieldName |
| int ptr = _inputPtr; |
| int hash = 0; |
| final int inputLen = _inputEnd; |
| |
| if (ptr < inputLen) { |
| final int[] codes = CharTypes.getInputCodeLatin1(); |
| final int maxCode = codes.length; |
| |
| do { |
| int ch = _inputBuffer[ptr]; |
| if (ch == '\'') { |
| int start = _inputPtr; |
| _inputPtr = ptr+1; // to skip the quote |
| return _symbols.findSymbol(_inputBuffer, start, ptr - start, hash); |
| } |
| if (ch < maxCode && codes[ch] != 0) { |
| break; |
| } |
| hash = (hash * 31) + ch; |
| ++ptr; |
| } while (ptr < inputLen); |
| } |
| |
| int start = _inputPtr; |
| _inputPtr = ptr; |
| |
| return _parseFieldName2(start, hash, INT_APOSTROPHE); |
| } |
| |
| /** |
| * Method for handling cases where first non-space character |
| * of an expected value token is not legal for standard JSON content. |
| * |
| * @since 1.3 |
| */ |
| protected final JsonToken _handleUnexpectedValue(int i) |
| throws IOException, JsonParseException |
| { |
| // Most likely an error, unless we are to allow single-quote-strings |
| if (i != INT_APOSTROPHE || !isEnabled(Feature.ALLOW_SINGLE_QUOTES)) { |
| _reportUnexpectedChar(i, "expected a valid value (number, String, array, object, 'true', 'false' or 'null')"); |
| } |
| |
| /* [JACKSON-173]: allow single quotes. Unlike with regular |
| * Strings, we'll eagerly parse contents; this so that there's |
| * no need to store information on quote char used. |
| * |
| * Also, no separation to fast/slow parsing; we'll just do |
| * one regular (~= slow) parsing, to keep code simple |
| */ |
| char[] outBuf = _textBuffer.emptyAndGetCurrentSegment(); |
| int outPtr = _textBuffer.getCurrentSegmentSize(); |
| |
| while (true) { |
| if (_inputPtr >= _inputEnd) { |
| if (!loadMore()) { |
| _reportInvalidEOF(": was expecting closing quote for a string value"); |
| } |
| } |
| char c = _inputBuffer[_inputPtr++]; |
| i = (int) c; |
| if (i <= INT_BACKSLASH) { |
| if (i == INT_BACKSLASH) { |
| /* Although chars outside of BMP are to be escaped as |
| * an UTF-16 surrogate pair, does that affect decoding? |
| * For now let's assume it does not. |
| */ |
| c = _decodeEscaped(); |
| } else if (i <= INT_APOSTROPHE) { |
| if (i == INT_APOSTROPHE) { |
| break; |
| } |
| if (i < INT_SPACE) { |
| _throwUnquotedSpace(i, "string value"); |
| } |
| } |
| } |
| // Need more room? |
| if (outPtr >= outBuf.length) { |
| outBuf = _textBuffer.finishCurrentSegment(); |
| outPtr = 0; |
| } |
| // Ok, let's add char to output: |
| outBuf[outPtr++] = c; |
| } |
| _textBuffer.setCurrentLength(outPtr); |
| return JsonToken.VALUE_STRING; |
| } |
| |
| /** |
| * @since 1.2 |
| */ |
| private String _parseUnusualFieldName2(int startPtr, int hash, int[] codes) |
| throws IOException, JsonParseException |
| { |
| _textBuffer.resetWithShared(_inputBuffer, startPtr, (_inputPtr - startPtr)); |
| char[] outBuf = _textBuffer.getCurrentSegment(); |
| int outPtr = _textBuffer.getCurrentSegmentSize(); |
| final int maxCode = codes.length; |
| |
| while (true) { |
| if (_inputPtr >= _inputEnd) { |
| if (!loadMore()) { // acceptable for now (will error out later) |
| break; |
| } |
| } |
| char c = _inputBuffer[_inputPtr]; |
| int i = (int) c; |
| if (i <= maxCode) { |
| if (codes[i] != 0) { |
| break; |
| } |
| } else if (!Character.isJavaIdentifierPart(c)) { |
| break; |
| } |
| ++_inputPtr; |
| hash = (hash * 31) + i; |
| // Ok, let's add char to output: |
| outBuf[outPtr++] = c; |
| |
| // Need more room? |
| if (outPtr >= outBuf.length) { |
| outBuf = _textBuffer.finishCurrentSegment(); |
| outPtr = 0; |
| } |
| } |
| _textBuffer.setCurrentLength(outPtr); |
| { |
| TextBuffer tb = _textBuffer; |
| char[] buf = tb.getTextBuffer(); |
| int start = tb.getTextOffset(); |
| int len = tb.size(); |
| |
| return _symbols.findSymbol(buf, start, len, hash); |
| } |
| } |
| |
| @Override |
| protected void _finishString() |
| throws IOException, JsonParseException |
| { |
| /* First: let's try to see if we have simple String value: one |
| * that does not cross input buffer boundary, and does not |
| * contain escape sequences. |
| */ |
| int ptr = _inputPtr; |
| final int inputLen = _inputEnd; |
| |
| if (ptr < inputLen) { |
| final int[] codes = CharTypes.getInputCodeLatin1(); |
| final int maxCode = codes.length; |
| |
| do { |
| int ch = _inputBuffer[ptr]; |
| if (ch < maxCode && codes[ch] != 0) { |
| if (ch == '"') { |
| _textBuffer.resetWithShared(_inputBuffer, _inputPtr, (ptr-_inputPtr)); |
| _inputPtr = ptr+1; |
| // Yes, we got it all |
| return; |
| } |
| break; |
| } |
| ++ptr; |
| } while (ptr < inputLen); |
| } |
| |
| /* Either ran out of input, or bumped into an escape |
| * sequence... |
| */ |
| _textBuffer.resetWithCopy(_inputBuffer, _inputPtr, (ptr-_inputPtr)); |
| _inputPtr = ptr; |
| _finishString2(); |
| } |
| |
| protected void _finishString2() |
| throws IOException, JsonParseException |
| { |
| char[] outBuf = _textBuffer.getCurrentSegment(); |
| int outPtr = _textBuffer.getCurrentSegmentSize(); |
| |
| while (true) { |
| if (_inputPtr >= _inputEnd) { |
| if (!loadMore()) { |
| _reportInvalidEOF(": was expecting closing quote for a string value"); |
| } |
| } |
| char c = _inputBuffer[_inputPtr++]; |
| int i = (int) c; |
| if (i <= INT_BACKSLASH) { |
| if (i == INT_BACKSLASH) { |
| /* Although chars outside of BMP are to be escaped as |
| * an UTF-16 surrogate pair, does that affect decoding? |
| * For now let's assume it does not. |
| */ |
| c = _decodeEscaped(); |
| } else if (i <= INT_QUOTE) { |
| if (i == INT_QUOTE) { |
| break; |
| } |
| if (i < INT_SPACE) { |
| _throwUnquotedSpace(i, "string value"); |
| } |
| } |
| } |
| // Need more room? |
| if (outPtr >= outBuf.length) { |
| outBuf = _textBuffer.finishCurrentSegment(); |
| outPtr = 0; |
| } |
| // Ok, let's add char to output: |
| outBuf[outPtr++] = c; |
| } |
| _textBuffer.setCurrentLength(outPtr); |
| } |
| |
| /** |
| * Method called to skim through rest of unparsed String value, |
| * if it is not needed. This can be done bit faster if contents |
| * need not be stored for future access. |
| */ |
| protected void _skipString() |
| throws IOException, JsonParseException |
| { |
| _tokenIncomplete = false; |
| |
| int inputPtr = _inputPtr; |
| int inputLen = _inputEnd; |
| char[] inputBuffer = _inputBuffer; |
| |
| while (true) { |
| if (inputPtr >= inputLen) { |
| _inputPtr = inputPtr; |
| if (!loadMore()) { |
| _reportInvalidEOF(": was expecting closing quote for a string value"); |
| } |
| inputPtr = _inputPtr; |
| inputLen = _inputEnd; |
| } |
| char c = inputBuffer[inputPtr++]; |
| int i = (int) c; |
| if (i <= INT_BACKSLASH) { |
| if (i == INT_BACKSLASH) { |
| /* Although chars outside of BMP are to be escaped as |
| * an UTF-16 surrogate pair, does that affect decoding? |
| * For now let's assume it does not. |
| */ |
| _inputPtr = inputPtr; |
| c = _decodeEscaped(); |
| inputPtr = _inputPtr; |
| inputLen = _inputEnd; |
| } else if (i <= INT_QUOTE) { |
| if (i == INT_QUOTE) { |
| _inputPtr = inputPtr; |
| break; |
| } |
| if (i < INT_SPACE) { |
| _inputPtr = inputPtr; |
| _throwUnquotedSpace(i, "string value"); |
| } |
| } |
| } |
| } |
| } |
| |
| /** |
| * Method called to much one of literal tokens we may expect |
| */ |
| protected void _matchToken(JsonToken token) |
| throws IOException, JsonParseException |
| { |
| // First char is already matched, need to check the rest |
| String matchStr = token.asString(); |
| int i = 1; |
| |
| for (int len = matchStr.length(); i < len; ++i) { |
| if (_inputPtr >= _inputEnd) { |
| if (!loadMore()) { |
| _reportInvalidEOF(" in a value"); |
| } |
| } |
| char c = _inputBuffer[_inputPtr]; |
| if (c != matchStr.charAt(i)) { |
| _reportInvalidToken(matchStr.substring(0, i)); |
| } |
| ++_inputPtr; |
| } |
| /* Ok, fine; let's not bother checking anything beyond keyword. |
| * If there's something wrong there, it'll cause a parsing |
| * error later on. |
| */ |
| return; |
| } |
| |
| private void _reportInvalidToken(String matchedPart) |
| throws IOException, JsonParseException |
| { |
| StringBuilder sb = new StringBuilder(matchedPart); |
| /* Let's just try to find what appears to be the token, using |
| * regular Java identifier character rules. It's just a heuristic, |
| * nothing fancy here. |
| */ |
| while (true) { |
| if (_inputPtr >= _inputEnd) { |
| if (!loadMore()) { |
| break; |
| } |
| } |
| char c = _inputBuffer[_inputPtr]; |
| if (!Character.isJavaIdentifierPart(c)) { |
| break; |
| } |
| ++_inputPtr; |
| sb.append(c); |
| } |
| |
| _reportError("Unrecognized token '"+sb.toString()+"': was expecting 'null', 'true' or 'false'"); |
| } |
| |
| /* |
| /********************************************************** |
| /* Internal methods, other parsing |
| /********************************************************** |
| */ |
| |
| /** |
| * We actually need to check the character value here |
| * (to see if we have \n following \r). |
| */ |
| protected final void _skipCR() throws IOException |
| { |
| if (_inputPtr < _inputEnd || loadMore()) { |
| if (_inputBuffer[_inputPtr] == '\n') { |
| ++_inputPtr; |
| } |
| } |
| ++_currInputRow; |
| _currInputRowStart = _inputPtr; |
| } |
| |
| protected final void _skipLF() throws IOException |
| { |
| ++_currInputRow; |
| _currInputRowStart = _inputPtr; |
| } |
| |
| private final int _skipWS() |
| throws IOException, JsonParseException |
| { |
| while (_inputPtr < _inputEnd || loadMore()) { |
| int i = (int) _inputBuffer[_inputPtr++]; |
| if (i > INT_SPACE) { |
| if (i != INT_SLASH) { |
| return i; |
| } |
| _skipComment(); |
| } else if (i != INT_SPACE) { |
| if (i == INT_LF) { |
| _skipLF(); |
| } else if (i == INT_CR) { |
| _skipCR(); |
| } else if (i != INT_TAB) { |
| _throwInvalidSpace(i); |
| } |
| } |
| } |
| throw _constructError("Unexpected end-of-input within/between "+_parsingContext.getTypeDesc()+" entries"); |
| } |
| |
| private final int _skipWSOrEnd() |
| throws IOException, JsonParseException |
| { |
| while ((_inputPtr < _inputEnd) || loadMore()) { |
| int i = (int) _inputBuffer[_inputPtr++]; |
| if (i > INT_SPACE) { |
| if (i != INT_SLASH) { |
| return i; |
| } |
| _skipComment(); |
| } else if (i != INT_SPACE) { |
| if (i == INT_LF) { |
| _skipLF(); |
| } else if (i == INT_CR) { |
| _skipCR(); |
| } else if (i != INT_TAB) { |
| _throwInvalidSpace(i); |
| } |
| } |
| } |
| // We ran out of input... |
| _handleEOF(); |
| return -1; |
| } |
| |
| private final void _skipComment() |
| throws IOException, JsonParseException |
| { |
| if (!isEnabled(Feature.ALLOW_COMMENTS)) { |
| _reportUnexpectedChar('/', "maybe a (non-standard) comment? (not recognized as one since Feature 'ALLOW_COMMENTS' not enabled for parser)"); |
| } |
| // First: check which comment (if either) it is: |
| if (_inputPtr >= _inputEnd && !loadMore()) { |
| _reportInvalidEOF(" in a comment"); |
| } |
| char c = _inputBuffer[_inputPtr++]; |
| if (c == '/') { |
| _skipCppComment(); |
| } else if (c == '*') { |
| _skipCComment(); |
| } else { |
| _reportUnexpectedChar(c, "was expecting either '*' or '/' for a comment"); |
| } |
| } |
| |
| private final void _skipCComment() |
| throws IOException, JsonParseException |
| { |
| // Ok: need the matching '*/' |
| main_loop: |
| while ((_inputPtr < _inputEnd) || loadMore()) { |
| int i = (int) _inputBuffer[_inputPtr++]; |
| if (i <= INT_ASTERISK) { |
| if (i == INT_ASTERISK) { // end? |
| if ((_inputPtr >= _inputEnd) && !loadMore()) { |
| break main_loop; |
| } |
| if (_inputBuffer[_inputPtr] == INT_SLASH) { |
| ++_inputPtr; |
| return; |
| } |
| continue; |
| } |
| if (i < INT_SPACE) { |
| if (i == INT_LF) { |
| _skipLF(); |
| } else if (i == INT_CR) { |
| _skipCR(); |
| } else if (i != INT_TAB) { |
| _throwInvalidSpace(i); |
| } |
| } |
| } |
| } |
| _reportInvalidEOF(" in a comment"); |
| } |
| |
| private final void _skipCppComment() |
| throws IOException, JsonParseException |
| { |
| // Ok: need to find EOF or linefeed |
| while ((_inputPtr < _inputEnd) || loadMore()) { |
| int i = (int) _inputBuffer[_inputPtr++]; |
| if (i < INT_SPACE) { |
| if (i == INT_LF) { |
| _skipLF(); |
| break; |
| } else if (i == INT_CR) { |
| _skipCR(); |
| break; |
| } else if (i != INT_TAB) { |
| _throwInvalidSpace(i); |
| } |
| } |
| } |
| } |
| |
| protected final char _decodeEscaped() |
| throws IOException, JsonParseException |
| { |
| if (_inputPtr >= _inputEnd) { |
| if (!loadMore()) { |
| _reportInvalidEOF(" in character escape sequence"); |
| } |
| } |
| char c = _inputBuffer[_inputPtr++]; |
| |
| switch ((int) c) { |
| // First, ones that are mapped |
| case INT_b: |
| return '\b'; |
| case INT_t: |
| return '\t'; |
| case INT_n: |
| return '\n'; |
| case INT_f: |
| return '\f'; |
| case INT_r: |
| return '\r'; |
| |
| // And these are to be returned as they are |
| case INT_QUOTE: |
| case INT_SLASH: |
| case INT_BACKSLASH: |
| return c; |
| |
| case INT_u: // and finally hex-escaped |
| break; |
| |
| default: |
| return _handleUnrecognizedCharacterEscape(c); |
| } |
| |
| // Ok, a hex escape. Need 4 characters |
| int value = 0; |
| for (int i = 0; i < 4; ++i) { |
| if (_inputPtr >= _inputEnd) { |
| if (!loadMore()) { |
| _reportInvalidEOF(" in character escape sequence"); |
| } |
| } |
| int ch = (int) _inputBuffer[_inputPtr++]; |
| int digit = CharTypes.charToHex(ch); |
| if (digit < 0) { |
| _reportUnexpectedChar(ch, "expected a hex-digit for character escape sequence"); |
| } |
| value = (value << 4) | digit; |
| } |
| return (char) value; |
| } |
| |
| /* |
| /********************************************************** |
| /* Binary access |
| /********************************************************** |
| */ |
| |
| @Override |
| protected byte[] _decodeBase64(Base64Variant b64variant) |
| throws IOException, JsonParseException |
| { |
| ByteArrayBuilder builder = _getByteArrayBuilder(); |
| |
| /* !!! 23-Jan-2009, tatu: There are some potential problems |
| * with this: |
| * |
| * - Escaped chars are not handled. Should they? |
| */ |
| |
| //main_loop: |
| while (true) { |
| // first, we'll skip preceding white space, if any |
| char ch; |
| do { |
| if (_inputPtr >= _inputEnd) { |
| loadMoreGuaranteed(); |
| } |
| ch = _inputBuffer[_inputPtr++]; |
| } while (ch <= INT_SPACE); |
| int bits = b64variant.decodeBase64Char(ch); |
| if (bits < 0) { // reached the end, fair and square? |
| if (ch == '"') { |
| return builder.toByteArray(); |
| } |
| throw reportInvalidChar(b64variant, ch, 0); |
| } |
| int decodedData = bits; |
| |
| // then second base64 char; can't get padding yet, nor ws |
| |
| if (_inputPtr >= _inputEnd) { |
| loadMoreGuaranteed(); |
| } |
| ch = _inputBuffer[_inputPtr++]; |
| bits = b64variant.decodeBase64Char(ch); |
| if (bits < 0) { |
| throw reportInvalidChar(b64variant, ch, 1); |
| } |
| decodedData = (decodedData << 6) | bits; |
| |
| // third base64 char; can be padding, but not ws |
| if (_inputPtr >= _inputEnd) { |
| loadMoreGuaranteed(); |
| } |
| ch = _inputBuffer[_inputPtr++]; |
| bits = b64variant.decodeBase64Char(ch); |
| |
| // First branch: can get padding (-> 1 byte) |
| if (bits < 0) { |
| if (bits != Base64Variant.BASE64_VALUE_PADDING) { |
| throw reportInvalidChar(b64variant, ch, 2); |
| } |
| // Ok, must get padding |
| if (_inputPtr >= _inputEnd) { |
| loadMoreGuaranteed(); |
| } |
| ch = _inputBuffer[_inputPtr++]; |
| if (!b64variant.usesPaddingChar(ch)) { |
| throw reportInvalidChar(b64variant, ch, 3, "expected padding character '"+b64variant.getPaddingChar()+"'"); |
| } |
| // Got 12 bits, only need 8, need to shift |
| decodedData >>= 4; |
| builder.append(decodedData); |
| continue; |
| } |
| // Nope, 2 or 3 bytes |
| decodedData = (decodedData << 6) | bits; |
| // fourth and last base64 char; can be padding, but not ws |
| if (_inputPtr >= _inputEnd) { |
| loadMoreGuaranteed(); |
| } |
| ch = _inputBuffer[_inputPtr++]; |
| bits = b64variant.decodeBase64Char(ch); |
| if (bits < 0) { |
| if (bits != Base64Variant.BASE64_VALUE_PADDING) { |
| throw reportInvalidChar(b64variant, ch, 3); |
| } |
| /* With padding we only get 2 bytes; but we have |
| * to shift it a bit so it is identical to triplet |
| * case with partial output. |
| * 3 chars gives 3x6 == 18 bits, of which 2 are |
| * dummies, need to discard: |
| */ |
| decodedData >>= 2; |
| builder.appendTwoBytes(decodedData); |
| } else { |
| // otherwise, our triple is now complete |
| decodedData = (decodedData << 6) | bits; |
| builder.appendThreeBytes(decodedData); |
| } |
| } |
| } |
| |
| protected IllegalArgumentException reportInvalidChar(Base64Variant b64variant, char ch, int bindex) |
| throws IllegalArgumentException |
| { |
| return reportInvalidChar(b64variant, ch, bindex, null); |
| } |
| |
| /** |
| * @param bindex Relative index within base64 character unit; between 0 |
| * and 3 (as unit has exactly 4 characters) |
| */ |
| protected IllegalArgumentException reportInvalidChar(Base64Variant b64variant, char ch, int bindex, String msg) |
| throws IllegalArgumentException |
| { |
| String base; |
| if (ch <= INT_SPACE) { |
| base = "Illegal white space character (code 0x"+Integer.toHexString(ch)+") as character #"+(bindex+1)+" of 4-char base64 unit: can only used between units"; |
| } else if (b64variant.usesPaddingChar(ch)) { |
| base = "Unexpected padding character ('"+b64variant.getPaddingChar()+"') as character #"+(bindex+1)+" of 4-char base64 unit: padding only legal as 3rd or 4th character"; |
| } else if (!Character.isDefined(ch) || Character.isISOControl(ch)) { |
| // Not sure if we can really get here... ? (most illegal xml chars are caught at lower level) |
| base = "Illegal character (code 0x"+Integer.toHexString(ch)+") in base64 content"; |
| } else { |
| base = "Illegal character '"+ch+"' (code 0x"+Integer.toHexString(ch)+") in base64 content"; |
| } |
| if (msg != null) { |
| base = base + ": " + msg; |
| } |
| return new IllegalArgumentException(base); |
| } |
| } |