| /**************************************************************************** |
| ** |
| ** Copyright (C) 2016 The Qt Company Ltd. |
| ** Contact: https://www.qt.io/licensing/ |
| ** |
| ** This file is part of the QtCore module of the Qt Toolkit. |
| ** |
| ** $QT_BEGIN_LICENSE:LGPL$ |
| ** Commercial License Usage |
| ** Licensees holding valid commercial Qt licenses may use this file in |
| ** accordance with the commercial license agreement provided with the |
| ** Software or, alternatively, in accordance with the terms contained in |
| ** a written agreement between you and The Qt Company. For licensing terms |
| ** and conditions see https://www.qt.io/terms-conditions. For further |
| ** information use the contact form at https://www.qt.io/contact-us. |
| ** |
| ** GNU Lesser General Public License Usage |
| ** Alternatively, this file may be used under the terms of the GNU Lesser |
| ** General Public License version 3 as published by the Free Software |
| ** Foundation and appearing in the file LICENSE.LGPL3 included in the |
| ** packaging of this file. Please review the following information to |
| ** ensure the GNU Lesser General Public License version 3 requirements |
| ** will be met: https://www.gnu.org/licenses/lgpl-3.0.html. |
| ** |
| ** GNU General Public License Usage |
| ** Alternatively, this file may be used under the terms of the GNU |
| ** General Public License version 2.0 or (at your option) the GNU General |
| ** Public license version 3 or any later version approved by the KDE Free |
| ** Qt Foundation. The licenses are as published by the Free Software |
| ** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3 |
| ** included in the packaging of this file. Please review the following |
| ** information to ensure the GNU General Public License requirements will |
| ** be met: https://www.gnu.org/licenses/gpl-2.0.html and |
| ** https://www.gnu.org/licenses/gpl-3.0.html. |
| ** |
| ** $QT_END_LICENSE$ |
| ** |
| ****************************************************************************/ |
| |
| // Most of the code here was originally written by Serika Kurusugawa, |
| // a.k.a. Junji Takagi, and is included in Qt with the author's permission |
| // and the grateful thanks of the Qt team. |
| |
| /*! \class QJisCodec |
| \inmodule QtCore |
| \reentrant |
| \internal |
| */ |
| |
| #include "qjiscodec_p.h" |
| #include "qlist.h" |
| |
| QT_BEGIN_NAMESPACE |
| |
| enum { |
| Esc = 0x1b, |
| So = 0x0e, // Shift Out |
| Si = 0x0f, // Shift In |
| |
| ReverseSolidus = 0x5c, |
| YenSign = 0x5c, |
| Tilde = 0x7e, |
| Overline = 0x7e |
| }; |
| |
| #define IsKana(c) (((c) >= 0xa1) && ((c) <= 0xdf)) |
| #define IsJisChar(c) (((c) >= 0x21) && ((c) <= 0x7e)) |
| |
| #define QValidChar(u) ((u) ? QChar((ushort)(u)) : QChar(QChar::ReplacementCharacter)) |
| |
| enum Iso2022State{ Ascii, MinState = Ascii, |
| JISX0201_Latin, JISX0201_Kana, |
| JISX0208_1978, JISX0208_1983, |
| JISX0212, MaxState = JISX0212, |
| UnknownState }; |
| |
| static const char Esc_CHARS[] = "()*+-./"; |
| |
| static const char Esc_Ascii[] = {Esc, '(', 'B', 0 }; |
| static const char Esc_JISX0201_Latin[] = {Esc, '(', 'J', 0 }; |
| static const char Esc_JISX0201_Kana[] = {Esc, '(', 'I', 0 }; |
| static const char Esc_JISX0208_1978[] = {Esc, '$', '@', 0 }; |
| static const char Esc_JISX0208_1983[] = {Esc, '$', 'B', 0 }; |
| static const char Esc_JISX0212[] = {Esc, '$', '(', 'D', 0 }; |
| static const char * const Esc_SEQ[] = { Esc_Ascii, |
| Esc_JISX0201_Latin, |
| Esc_JISX0201_Kana, |
| Esc_JISX0208_1978, |
| Esc_JISX0208_1983, |
| Esc_JISX0212 }; |
| |
| /*! |
| \internal |
| */ |
| QJisCodec::QJisCodec() : conv(QJpUnicodeConv::newConverter(QJpUnicodeConv::Default)) |
| { |
| } |
| |
| |
| /*! |
| \internal |
| */ |
| QJisCodec::~QJisCodec() |
| { |
| delete (const QJpUnicodeConv*)conv; |
| conv = 0; |
| } |
| |
| QByteArray QJisCodec::convertFromUnicode(const QChar *uc, int len, ConverterState *cs) const |
| { |
| char replacement = '?'; |
| if (cs) { |
| if (cs->flags & ConvertInvalidToNull) |
| replacement = 0; |
| } |
| int invalid = 0; |
| |
| QByteArray result; |
| Iso2022State state = Ascii; |
| Iso2022State prev = Ascii; |
| for (int i = 0; i < len; i++) { |
| QChar ch = uc[i]; |
| uint j; |
| if (ch.row() == 0x00 && ch.cell() < 0x80) { |
| // Ascii |
| if (state != JISX0201_Latin || |
| ch.cell() == ReverseSolidus || ch.cell() == Tilde) { |
| state = Ascii; |
| } |
| j = ch.cell(); |
| } else if ((j = conv->unicodeToJisx0201(ch.row(), ch.cell())) != 0) { |
| if (j < 0x80) { |
| // JIS X 0201 Latin |
| if (state != Ascii || |
| ch.cell() == YenSign || ch.cell() == Overline) { |
| state = JISX0201_Latin; |
| } |
| } else { |
| // JIS X 0201 Kana |
| state = JISX0201_Kana; |
| j &= 0x7f; |
| } |
| } else if ((j = conv->unicodeToJisx0208(ch.row(), ch.cell())) != 0) { |
| // JIS X 0208 |
| state = JISX0208_1983; |
| } else if ((j = conv->unicodeToJisx0212(ch.row(), ch.cell())) != 0) { |
| // JIS X 0212 |
| state = JISX0212; |
| } else { |
| // Invalid |
| state = UnknownState; |
| j = replacement; |
| ++invalid; |
| } |
| if (state != prev) { |
| if (state == UnknownState) { |
| result += Esc_Ascii; |
| } else { |
| result += Esc_SEQ[state - MinState]; |
| } |
| prev = state; |
| } |
| if (j < 0x0100) { |
| result += j & 0xff; |
| } else { |
| result += (j >> 8) & 0xff; |
| result += j & 0xff; |
| } |
| } |
| if (prev != Ascii) { |
| result += Esc_Ascii; |
| } |
| |
| if (cs) { |
| cs->invalidChars += invalid; |
| } |
| return result; |
| } |
| |
| QString QJisCodec::convertToUnicode(const char* chars, int len, ConverterState *cs) const |
| { |
| uchar buf[4] = {0, 0, 0, 0}; |
| int nbuf = 0; |
| Iso2022State state = Ascii, prev = Ascii; |
| bool esc = false; |
| QChar replacement = QChar::ReplacementCharacter; |
| if (cs) { |
| if (cs->flags & ConvertInvalidToNull) |
| replacement = QChar::Null; |
| nbuf = cs->remainingChars; |
| buf[0] = (cs->state_data[0] >> 24) & 0xff; |
| buf[1] = (cs->state_data[0] >> 16) & 0xff; |
| buf[2] = (cs->state_data[0] >> 8) & 0xff; |
| buf[3] = (cs->state_data[0] >> 0) & 0xff; |
| state = (Iso2022State)((cs->state_data[1] >> 0) & 0xff); |
| prev = (Iso2022State)((cs->state_data[1] >> 8) & 0xff); |
| esc = cs->state_data[2]; |
| } |
| int invalid = 0; |
| |
| QString result; |
| for (int i=0; i<len; i++) { |
| uchar ch = chars[i]; |
| if (esc) { |
| // Escape sequence |
| state = UnknownState; |
| switch (nbuf) { |
| case 0: |
| if (ch == '$' || strchr(Esc_CHARS, ch)) { |
| buf[nbuf++] = ch; |
| } else { |
| nbuf = 0; |
| esc = false; |
| } |
| break; |
| case 1: |
| if (buf[0] == '$') { |
| if (strchr(Esc_CHARS, ch)) { |
| buf[nbuf++] = ch; |
| } else { |
| switch (ch) { |
| case '@': |
| state = JISX0208_1978; // Esc $ @ |
| break; |
| case 'B': |
| state = JISX0208_1983; // Esc $ B |
| break; |
| } |
| nbuf = 0; |
| esc = false; |
| } |
| } else { |
| if (buf[0] == '(') { |
| switch (ch) { |
| case 'B': |
| state = Ascii; // Esc (B |
| break; |
| case 'I': |
| state = JISX0201_Kana; // Esc (I |
| break; |
| case 'J': |
| state = JISX0201_Latin; // Esc (J |
| break; |
| } |
| } |
| nbuf = 0; |
| esc = false; |
| } |
| break; |
| case 2: |
| if (buf[1] == '(') { |
| switch (ch) { |
| case 'D': |
| state = JISX0212; // Esc $ (D |
| break; |
| } |
| } |
| nbuf = 0; |
| esc = false; |
| break; |
| } |
| } else { |
| if (ch == Esc) { |
| // Escape sequence |
| nbuf = 0; |
| esc = true; |
| } else if (ch == So) { |
| // Shift out |
| prev = state; |
| state = JISX0201_Kana; |
| nbuf = 0; |
| } else if (ch == Si) { |
| // Shift in |
| if (prev == Ascii || prev == JISX0201_Latin) { |
| state = prev; |
| } else { |
| state = Ascii; |
| } |
| nbuf = 0; |
| } else { |
| uint u; |
| switch (nbuf) { |
| case 0: |
| switch (state) { |
| case Ascii: |
| if (ch < 0x80) { |
| result += QLatin1Char(ch); |
| break; |
| } |
| Q_FALLTHROUGH(); |
| case JISX0201_Latin: |
| u = conv->jisx0201ToUnicode(ch); |
| result += QValidChar(u); |
| break; |
| case JISX0201_Kana: |
| u = conv->jisx0201ToUnicode(ch | 0x80); |
| result += QValidChar(u); |
| break; |
| case JISX0208_1978: |
| case JISX0208_1983: |
| case JISX0212: |
| buf[nbuf++] = ch; |
| break; |
| default: |
| result += QChar::ReplacementCharacter; |
| break; |
| } |
| break; |
| case 1: |
| switch (state) { |
| case JISX0208_1978: |
| case JISX0208_1983: |
| u = conv->jisx0208ToUnicode(buf[0] & 0x7f, ch & 0x7f); |
| result += QValidChar(u); |
| break; |
| case JISX0212: |
| u = conv->jisx0212ToUnicode(buf[0] & 0x7f, ch & 0x7f); |
| result += QValidChar(u); |
| break; |
| default: |
| result += replacement; |
| ++invalid; |
| break; |
| } |
| nbuf = 0; |
| break; |
| } |
| } |
| } |
| } |
| |
| if (cs) { |
| cs->remainingChars = nbuf; |
| cs->invalidChars += invalid; |
| cs->state_data[0] = (buf[0] << 24) + (buf[1] << 16) + (buf[2] << 8) + buf[3]; |
| cs->state_data[1] = (prev << 8) + state; |
| cs->state_data[2] = esc; |
| } |
| |
| return result; |
| } |
| |
| |
| |
| /*! |
| \internal |
| */ |
| int QJisCodec::_mibEnum() |
| { |
| return 39; |
| } |
| |
| /*! |
| \internal |
| */ |
| QByteArray QJisCodec::_name() |
| { |
| return "ISO-2022-JP"; |
| } |
| |
| /*! |
| Returns the codec's mime name. |
| */ |
| QList<QByteArray> QJisCodec::_aliases() |
| { |
| QList<QByteArray> list; |
| list << "JIS7"; // Qt 3 compat |
| return list; |
| } |
| |
| QT_END_NAMESPACE |