| /* |
| * The Original Code is Mozilla Universal charset detector code. |
| * |
| * The Initial Developer of the Original Code is |
| * Netscape Communications Corporation. |
| * Portions created by the Initial Developer are Copyright (C) 2001 |
| * the Initial Developer. All Rights Reserved. |
| * |
| * Contributor(s): |
| * António Afonso (antonio.afonso gmail.com) - port to JavaScript |
| * Mark Pilgrim - port to Python |
| * Shy Shalom - original C code |
| * |
| * This library is free software; you can redistribute it and/or |
| * modify it under the terms of the GNU Lesser General Public |
| * License as published by the Free Software Foundation; either |
| * version 2.1 of the License, or (at your option) any later version. |
| * |
| * This library is distributed in the hope that it will be useful, |
| * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| * Lesser General Public License for more details. |
| * |
| * You should have received a copy of the GNU Lesser General Public |
| * License along with this library; if not, write to the Free Software |
| * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA |
| * 02110-1301 USA |
| */ |
| |
| var CharSetProber = require('./charsetprober'); |
| var constants = require('./constants'); |
| var logger = require('./logger'); |
| |
| function MultiByteCharSetProber() { |
| CharSetProber.apply(this); |
| |
| var self = this; |
| |
| function init() { |
| self._mDistributionAnalyzer = null; |
| self._mCodingSM = null; |
| self._mLastChar = ["\x00", "\x00"]; |
| } |
| |
| this.reset = function() { |
| MultiByteCharSetProber.prototype.reset.apply(this); |
| if( this._mCodingSM ) { |
| this._mCodingSM.reset(); |
| } |
| if( this._mDistributionAnalyzer ) { |
| this._mDistributionAnalyzer.reset(); |
| } |
| this._mLastChar = ["\x00", "\x00"]; |
| } |
| |
| this.getCharsetName = function() { |
| } |
| |
| this.feed = function(aBuf) { |
| var aLen = aBuf.length; |
| for( var i = 0; i < aLen; i++ ) { |
| var codingState = this._mCodingSM.nextState(aBuf[i]); |
| if( codingState == constants.error ) { |
| logger.log(this.getCharsetName() + " prober hit error at byte " + i + "\n"); |
| this._mState = constants.notMe; |
| break; |
| } else if( codingState == constants.itsMe ) { |
| this._mState = constants.foundIt; |
| break; |
| } else if( codingState == constants.start ) { |
| var charLen = this._mCodingSM.getCurrentCharLen(); |
| if( i == 0 ) { |
| this._mLastChar[1] = aBuf[0]; |
| this._mDistributionAnalyzer.feed(this._mLastChar.join(''), charLen); |
| } else { |
| this._mDistributionAnalyzer.feed(aBuf.slice(i-1,i+1), charLen); |
| } |
| } |
| } |
| |
| this._mLastChar[0] = aBuf[aLen - 1]; |
| |
| if( this.getState() == constants.detecting ) { |
| if( this._mDistributionAnalyzer.gotEnoughData() && |
| this.getConfidence() > constants.SHORTCUT_THRESHOLD ) { |
| this._mState = constants.foundIt; |
| } |
| } |
| |
| return this.getState(); |
| } |
| |
| this.getConfidence = function() { |
| return this._mDistributionAnalyzer.getConfidence(); |
| } |
| } |
| MultiByteCharSetProber.prototype = new CharSetProber(); |
| |
| module.exports = MultiByteCharSetProber |