Port of python's chardet (https://github.com/chardet/chardet).
LGPL
npm install jschardet
var jschardet = require("jschardet")
// "àíàçã" in UTF-8
jschardet.detect("\xc3\xa0\xc3\xad\xc3\xa0\xc3\xa7\xc3\xa3")
// { encoding: "UTF-8", confidence: 0.9690625 }
// "次常用國字標準字體表" in Big5
jschardet.detect("\xa6\xb8\xb1\x60\xa5\xce\xb0\xea\xa6\x72\xbc\xd0\xb7\xc7\xa6\x72\xc5\xe9\xaa\xed")
// { encoding: "Big5", confidence: 0.99 }
// Martin Kühl
// jschardet.detectAll("\x3c\x73\x74\x72\x69\x6e\x67\x3e\x4d\x61\x72\x74\x69\x6e\x20\x4b\xfc\x68\x6c\x3c\x2f\x73\x74\x72\x69\x6e\x67\x3e")
// [
// {encoding: "windows-1252", confidence: 0.95},
// {encoding: "ISO-8859-2", confidence: 0.8796300205763055},
// {encoding: "SHIFT_JIS", confidence: 0.01}
// ]
Copy and include jschardet.min.js in your web page.
This library is also available in cdnjs at https://cdnjs.cloudflare.com/ajax/libs/jschardet/1.4.1/jschardet.min.js
// See all information related to the confidence levels of each encoding. // This is useful to see why you're not getting the expected encoding. jschardet.enableDebug(); // Default minimum accepted confidence level is 0.20 but sometimes this is not // enough, specially when dealing with files mostly with numbers. // To change this to 0 to always get something or any other value that can // work for you. jschardet.detect(str, { minimumThreshold: 0 }); // Lock down which encodings to detect, can be useful in situations jschardet // is giving a higher probability to encodings that you never use. jschardet.detect(str, { detectEncodings: ["UTF-8", "windows-1252"] });
I haven't been able to create tests to correctly detect:
Use npm run dist to update the distribution files. They're available at https://github.com/aadsm/jschardet/tree/master/dist.
Ported from python to JavaScript by António Afonso (https://github.com/aadsm/jschardet)
Transformed into an npm package by Markus Ast (https://github.com/brainafk)