Qt 4.8
qsjiscodec.cpp
Go to the documentation of this file.
1 /****************************************************************************
2 **
3 ** Copyright (C) 2014 Digia Plc and/or its subsidiary(-ies).
4 ** Contact: http://www.qt-project.org/legal
5 **
6 ** This file is part of the plugins of the Qt Toolkit.
7 **
8 ** $QT_BEGIN_LICENSE:LGPL$
9 ** Commercial License Usage
10 ** Licensees holding valid commercial Qt licenses may use this file in
11 ** accordance with the commercial license agreement provided with the
12 ** Software or, alternatively, in accordance with the terms contained in
13 ** a written agreement between you and Digia. For licensing terms and
14 ** conditions see http://qt.digia.com/licensing. For further information
15 ** use the contact form at http://qt.digia.com/contact-us.
16 **
17 ** GNU Lesser General Public License Usage
18 ** Alternatively, this file may be used under the terms of the GNU Lesser
19 ** General Public License version 2.1 as published by the Free Software
20 ** Foundation and appearing in the file LICENSE.LGPL included in the
21 ** packaging of this file. Please review the following information to
22 ** ensure the GNU Lesser General Public License version 2.1 requirements
23 ** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
24 **
25 ** In addition, as a special exception, Digia gives you certain additional
26 ** rights. These rights are described in the Digia Qt LGPL Exception
27 ** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
28 **
29 ** GNU General Public License Usage
30 ** Alternatively, this file may be used under the terms of the GNU
31 ** General Public License version 3.0 as published by the Free Software
32 ** Foundation and appearing in the file LICENSE.GPL included in the
33 ** packaging of this file. Please review the following information to
34 ** ensure the GNU General Public License version 3.0 requirements will be
35 ** met: http://www.gnu.org/copyleft/gpl.html.
36 **
37 **
38 ** $QT_END_LICENSE$
39 **
40 ****************************************************************************/
41 
42 // Most of the code here was originally written by Serika Kurusugawa
43 // a.k.a. Junji Takagi, and is included in Qt with the author's permission,
44 // and the grateful thanks of the Qt team.
45 
51 #include "qsjiscodec.h"
52 #include "qlist.h"
53 
55 
56 #ifndef QT_NO_TEXTCODEC
57 enum {
58  Esc = 0x1b
59 };
60 
61 #define IsKana(c) (((c) >= 0xa1) && ((c) <= 0xdf))
62 #define IsSjisChar1(c) ((((c) >= 0x81) && ((c) <= 0x9f)) || \
63  (((c) >= 0xe0) && ((c) <= 0xfc)))
64 #define IsSjisChar2(c) (((c) >= 0x40) && ((c) != 0x7f) && ((c) <= 0xfc))
65 #define IsUserDefinedChar1(c) (((c) >= 0xf0) && ((c) <= 0xfc))
66 
67 #define QValidChar(u) ((u) ? QChar((ushort)(u)) : QChar(QChar::ReplacementCharacter))
68 
74 {
75 }
76 
77 
82 {
83  delete (QJpUnicodeConv*)conv;
84  conv = 0;
85 }
86 
87 
89 {
90  char replacement = '?';
91  if (state) {
92  if (state->flags & ConvertInvalidToNull)
93  replacement = 0;
94  }
95  int invalid = 0;
96 
97  int rlen = 2*len + 1;
98  QByteArray rstr;
99  rstr.resize(rlen);
100  uchar* cursor = (uchar*)rstr.data();
101  for (int i = 0; i < len; i++) {
102  QChar ch = uc[i];
103  uint j;
104  if (ch.row() == 0x00 && ch.cell() < 0x80) {
105  // ASCII
106  *cursor++ = ch.cell();
107  } else if ((j = conv->unicodeToJisx0201(ch.row(), ch.cell())) != 0) {
108  // JIS X 0201 Latin or JIS X 0201 Kana
109  *cursor++ = j;
110  } else if ((j = conv->unicodeToSjis(ch.row(), ch.cell())) != 0) {
111  // JIS X 0208
112  *cursor++ = (j >> 8);
113  *cursor++ = (j & 0xff);
114  } else if ((j = conv->unicodeToSjisibmvdc(ch.row(), ch.cell())) != 0) {
115  // JIS X 0208 IBM VDC
116  *cursor++ = (j >> 8);
117  *cursor++ = (j & 0xff);
118  } else if ((j = conv->unicodeToCp932(ch.row(), ch.cell())) != 0) {
119  // CP932 (for lead bytes 87, ee & ed)
120  *cursor++ = (j >> 8);
121  *cursor++ = (j & 0xff);
122  } else if ((j = conv->unicodeToJisx0212(ch.row(), ch.cell())) != 0) {
123  // JIS X 0212 (can't be encoded in ShiftJIS !)
124  *cursor++ = 0x81; // white square
125  *cursor++ = 0xa0; // white square
126  } else {
127  // Error
128  *cursor++ = replacement;
129  ++invalid;
130  }
131  }
132  rstr.resize(cursor - (const uchar*)rstr.constData());
133 
134  if (state) {
135  state->invalidChars += invalid;
136  }
137  return rstr;
138 }
139 
140 QString QSjisCodec::convertToUnicode(const char* chars, int len, ConverterState *state) const
141 {
142  uchar buf[1] = {0};
143  int nbuf = 0;
144  QChar replacement = QChar::ReplacementCharacter;
145  if (state) {
146  if (state->flags & ConvertInvalidToNull)
147  replacement = QChar::Null;
148  nbuf = state->remainingChars;
149  buf[0] = state->state_data[0];
150  }
151  int invalid = 0;
152  uint u= 0;
153  QString result;
154  for (int i=0; i<len; i++) {
155  uchar ch = chars[i];
156  switch (nbuf) {
157  case 0:
158  if (ch < 0x80) {
159  result += QValidChar(ch);
160  } else if (IsKana(ch)) {
161  // JIS X 0201 Latin or JIS X 0201 Kana
162  u = conv->jisx0201ToUnicode(ch);
163  result += QValidChar(u);
164  } else if (IsSjisChar1(ch)) {
165  // JIS X 0208
166  buf[0] = ch;
167  nbuf = 1;
168  } else {
169  // Invalid
170  result += replacement;
171  ++invalid;
172  }
173  break;
174  case 1:
175  // JIS X 0208
176  if (IsSjisChar2(ch)) {
177  if ((u = conv->sjisibmvdcToUnicode(buf[0], ch))) {
178  result += QValidChar(u);
179  } else if ((u = conv->cp932ToUnicode(buf[0], ch))) {
180  result += QValidChar(u);
181  }
182  else if (IsUserDefinedChar1(buf[0])) {
183  result += QChar::ReplacementCharacter;
184  } else {
185  u = conv->sjisToUnicode(buf[0], ch);
186  result += QValidChar(u);
187  }
188  } else {
189  // Invalid
190  result += replacement;
191  ++invalid;
192  }
193  nbuf = 0;
194  break;
195  }
196  }
197 
198  if (state) {
199  state->remainingChars = nbuf;
200  state->state_data[0] = buf[0];
201  state->invalidChars += invalid;
202  }
203  return result;
204 }
205 
206 
208 {
209  return 17;
210 }
211 
213 {
214  return "Shift_JIS";
215 }
216 
221 {
222  QList<QByteArray> list;
223  list << "SJIS" // Qt 3 compat
224  << "MS_Kanji";
225  return list;
226 }
227 #endif // QT_NO_TEXTCODEC
228 
uint unicodeToSjis(uint h, uint l) const
Definition: qjpunicode.cpp:275
virtual uint unicodeToJisx0212(uint h, uint l) const
Definition: qjpunicode.cpp:242
static int _mibEnum()
Definition: qsjiscodec.cpp:207
static QByteArray _name()
Definition: qsjiscodec.cpp:212
#define QT_END_NAMESPACE
This macro expands to.
Definition: qglobal.h:90
uint unicodeToJisx0201(uint h, uint l) const
Definition: qjpunicode.cpp:189
char * data()
Returns a pointer to the data stored in the byte array.
Definition: qbytearray.h:429
QSjisCodec()
Creates a Shift-JIS codec.
Definition: qsjiscodec.cpp:73
The QByteArray class provides an array of bytes.
Definition: qbytearray.h:135
#define IsUserDefinedChar1(c)
Definition: qsjiscodec.cpp:65
quint16 u
The QString class provides a Unicode character string.
Definition: qstring.h:83
uint unicodeToSjisibmvdc(uint h, uint l) const
const QJpUnicodeConv * conv
Definition: qsjiscodec.h:99
The QChar class provides a 16-bit Unicode character.
Definition: qchar.h:72
static const uint Default
Definition: qsplitter_p.h:61
~QSjisCodec()
Destroys the Shift-JIS codec.
Definition: qsjiscodec.cpp:81
unsigned char uchar
Definition: qglobal.h:994
#define QT_BEGIN_NAMESPACE
This macro expands to.
Definition: qglobal.h:89
unsigned int uint
Definition: qglobal.h:996
uint jisx0201ToUnicode(uint h, uint l) const
Definition: qjpunicode.cpp:112
uint sjisibmvdcToUnicode(uint h, uint l) const
#define QValidChar(u)
Definition: qsjiscodec.cpp:67
ConversionFlags flags
Definition: qtextcodec.h:106
const char * constData() const
Returns a pointer to the data stored in the byte array.
Definition: qbytearray.h:433
uint cp932ToUnicode(uint h, uint l) const
void resize(int size)
Sets the size of the byte array to size bytes.
#define IsKana(c)
Definition: qsjiscodec.cpp:61
uchar cell() const
Returns the cell (least significant byte) of the Unicode character.
Definition: qchar.h:283
QString convertToUnicode(const char *, int, ConverterState *) const
QTextCodec subclasses must reimplement this function.
Definition: qsjiscodec.cpp:140
uint sjisToUnicode(uint h, uint l) const
Definition: qjpunicode.cpp:264
QByteArray convertFromUnicode(const QChar *, int, ConverterState *) const
QTextCodec subclasses must reimplement this function.
Definition: qsjiscodec.cpp:88
static QList< QByteArray > _aliases()
Returns the codec&#39;s mime name.
Definition: qsjiscodec.cpp:220
uint unicodeToCp932(uint h, uint l) const
uchar row() const
Returns the row (most significant byte) of the Unicode character.
Definition: qchar.h:284
#define IsSjisChar1(c)
Definition: qsjiscodec.cpp:62
#define IsSjisChar2(c)
Definition: qsjiscodec.cpp:64