Qt 4.8
qjiscodec.cpp
Go to the documentation of this file.
1 /****************************************************************************
2 **
3 ** Copyright (C) 2014 Digia Plc and/or its subsidiary(-ies).
4 ** Contact: http://www.qt-project.org/legal
5 **
6 ** This file is part of the plugins of the Qt Toolkit.
7 **
8 ** $QT_BEGIN_LICENSE:LGPL$
9 ** Commercial License Usage
10 ** Licensees holding valid commercial Qt licenses may use this file in
11 ** accordance with the commercial license agreement provided with the
12 ** Software or, alternatively, in accordance with the terms contained in
13 ** a written agreement between you and Digia. For licensing terms and
14 ** conditions see http://qt.digia.com/licensing. For further information
15 ** use the contact form at http://qt.digia.com/contact-us.
16 **
17 ** GNU Lesser General Public License Usage
18 ** Alternatively, this file may be used under the terms of the GNU Lesser
19 ** General Public License version 2.1 as published by the Free Software
20 ** Foundation and appearing in the file LICENSE.LGPL included in the
21 ** packaging of this file. Please review the following information to
22 ** ensure the GNU Lesser General Public License version 2.1 requirements
23 ** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
24 **
25 ** In addition, as a special exception, Digia gives you certain additional
26 ** rights. These rights are described in the Digia Qt LGPL Exception
27 ** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
28 **
29 ** GNU General Public License Usage
30 ** Alternatively, this file may be used under the terms of the GNU
31 ** General Public License version 3.0 as published by the Free Software
32 ** Foundation and appearing in the file LICENSE.GPL included in the
33 ** packaging of this file. Please review the following information to
34 ** ensure the GNU General Public License version 3.0 requirements will be
35 ** met: http://www.gnu.org/copyleft/gpl.html.
36 **
37 **
38 ** $QT_END_LICENSE$
39 **
40 ****************************************************************************/
41 
42 // Most of the code here was originally written by Serika Kurusugawa,
43 // a.k.a. Junji Takagi, and is included in Qt with the author's permission
44 // and the grateful thanks of the Qt team.
45 
51 #include "qjiscodec.h"
52 #include "qlist.h"
53 
55 
56 #ifndef QT_NO_TEXTCODEC
57 enum {
58  Esc = 0x1b,
59  So = 0x0e, // Shift Out
60  Si = 0x0f, // Shift In
61 
63  YenSign = 0x5c,
64  Tilde = 0x7e,
65  Overline = 0x7e
66 };
67 
68 #define IsKana(c) (((c) >= 0xa1) && ((c) <= 0xdf))
69 #define IsJisChar(c) (((c) >= 0x21) && ((c) <= 0x7e))
70 
71 #define QValidChar(u) ((u) ? QChar((ushort)(u)) : QChar(QChar::ReplacementCharacter))
72 
78 
79 static const char Esc_CHARS[] = "()*+-./";
80 
81 static const char Esc_Ascii[] = {Esc, '(', 'B', 0 };
82 static const char Esc_JISX0201_Latin[] = {Esc, '(', 'J', 0 };
83 static const char Esc_JISX0201_Kana[] = {Esc, '(', 'I', 0 };
84 static const char Esc_JISX0208_1978[] = {Esc, '$', '@', 0 };
85 static const char Esc_JISX0208_1983[] = {Esc, '$', 'B', 0 };
86 static const char Esc_JISX0212[] = {Esc, '$', '(', 'D', 0 };
87 static const char * const Esc_SEQ[] = { Esc_Ascii,
92  Esc_JISX0212 };
93 
96 {
97 }
98 
99 
102 {
103  delete (QJpUnicodeConv*)conv;
104  conv = 0;
105 }
106 
108 {
109  char replacement = '?';
110  if (cs) {
111  if (cs->flags & ConvertInvalidToNull)
112  replacement = 0;
113  }
114  int invalid = 0;
115 
116  QByteArray result;
117  Iso2022State state = Ascii;
118  Iso2022State prev = Ascii;
119  for (int i = 0; i < len; i++) {
120  QChar ch = uc[i];
121  uint j;
122  if (ch.row() == 0x00 && ch.cell() < 0x80) {
123  // Ascii
124  if (state != JISX0201_Latin ||
125  ch.cell() == ReverseSolidus || ch.cell() == Tilde) {
126  state = Ascii;
127  }
128  j = ch.cell();
129  } else if ((j = conv->unicodeToJisx0201(ch.row(), ch.cell())) != 0) {
130  if (j < 0x80) {
131  // JIS X 0201 Latin
132  if (state != Ascii ||
133  ch.cell() == YenSign || ch.cell() == Overline) {
134  state = JISX0201_Latin;
135  }
136  } else {
137  // JIS X 0201 Kana
138  state = JISX0201_Kana;
139  j &= 0x7f;
140  }
141  } else if ((j = conv->unicodeToJisx0208(ch.row(), ch.cell())) != 0) {
142  // JIS X 0208
143  state = JISX0208_1983;
144  } else if ((j = conv->unicodeToJisx0212(ch.row(), ch.cell())) != 0) {
145  // JIS X 0212
146  state = JISX0212;
147  } else {
148  // Invalid
149  state = UnknownState;
150  j = replacement;
151  ++invalid;
152  }
153  if (state != prev) {
154  if (state == UnknownState) {
155  result += Esc_Ascii;
156  } else {
157  result += Esc_SEQ[state - MinState];
158  }
159  prev = state;
160  }
161  if (j < 0x0100) {
162  result += j & 0xff;
163  } else {
164  result += (j >> 8) & 0xff;
165  result += j & 0xff;
166  }
167  }
168  if (prev != Ascii) {
169  result += Esc_Ascii;
170  }
171 
172  if (cs) {
173  cs->invalidChars += invalid;
174  }
175  return result;
176 }
177 
178 QString QJisCodec::convertToUnicode(const char* chars, int len, ConverterState *cs) const
179 {
180  uchar buf[4] = {0, 0, 0, 0};
181  int nbuf = 0;
182  Iso2022State state = Ascii, prev = Ascii;
183  bool esc = false;
184  QChar replacement = QChar::ReplacementCharacter;
185  if (cs) {
186  if (cs->flags & ConvertInvalidToNull)
187  replacement = QChar::Null;
188  nbuf = cs->remainingChars;
189  buf[0] = (cs->state_data[0] >> 24) & 0xff;
190  buf[1] = (cs->state_data[0] >> 16) & 0xff;
191  buf[2] = (cs->state_data[0] >> 8) & 0xff;
192  buf[3] = (cs->state_data[0] >> 0) & 0xff;
193  state = (Iso2022State)((cs->state_data[1] >> 0) & 0xff);
194  prev = (Iso2022State)((cs->state_data[1] >> 8) & 0xff);
195  esc = cs->state_data[2];
196  }
197  int invalid = 0;
198 
199  QString result;
200  for (int i=0; i<len; i++) {
201  uchar ch = chars[i];
202  if (esc) {
203  // Escape sequence
204  state = UnknownState;
205  switch (nbuf) {
206  case 0:
207  if (ch == '$' || strchr(Esc_CHARS, ch)) {
208  buf[nbuf++] = ch;
209  } else {
210  nbuf = 0;
211  esc = false;
212  }
213  break;
214  case 1:
215  if (buf[0] == '$') {
216  if (strchr(Esc_CHARS, ch)) {
217  buf[nbuf++] = ch;
218  } else {
219  switch (ch) {
220  case '@':
221  state = JISX0208_1978; // Esc $ @
222  break;
223  case 'B':
224  state = JISX0208_1983; // Esc $ B
225  break;
226  }
227  nbuf = 0;
228  esc = false;
229  }
230  } else {
231  if (buf[0] == '(') {
232  switch (ch) {
233  case 'B':
234  state = Ascii; // Esc (B
235  break;
236  case 'I':
237  state = JISX0201_Kana; // Esc (I
238  break;
239  case 'J':
240  state = JISX0201_Latin; // Esc (J
241  break;
242  }
243  }
244  nbuf = 0;
245  esc = false;
246  }
247  break;
248  case 2:
249  if (buf[1] == '(') {
250  switch (ch) {
251  case 'D':
252  state = JISX0212; // Esc $ (D
253  break;
254  }
255  }
256  nbuf = 0;
257  esc = false;
258  break;
259  }
260  } else {
261  if (ch == Esc) {
262  // Escape sequence
263  nbuf = 0;
264  esc = true;
265  } else if (ch == So) {
266  // Shift out
267  prev = state;
268  state = JISX0201_Kana;
269  nbuf = 0;
270  } else if (ch == Si) {
271  // Shift in
272  if (prev == Ascii || prev == JISX0201_Latin) {
273  state = prev;
274  } else {
275  state = Ascii;
276  }
277  nbuf = 0;
278  } else {
279  uint u;
280  switch (nbuf) {
281  case 0:
282  switch (state) {
283  case Ascii:
284  if (ch < 0x80) {
285  result += QLatin1Char(ch);
286  break;
287  }
288  /* fall through */
289  case JISX0201_Latin:
290  u = conv->jisx0201ToUnicode(ch);
291  result += QValidChar(u);
292  break;
293  case JISX0201_Kana:
294  u = conv->jisx0201ToUnicode(ch | 0x80);
295  result += QValidChar(u);
296  break;
297  case JISX0208_1978:
298  case JISX0208_1983:
299  case JISX0212:
300  buf[nbuf++] = ch;
301  break;
302  default:
303  result += QChar::ReplacementCharacter;
304  break;
305  }
306  break;
307  case 1:
308  switch (state) {
309  case JISX0208_1978:
310  case JISX0208_1983:
311  u = conv->jisx0208ToUnicode(buf[0] & 0x7f, ch & 0x7f);
312  result += QValidChar(u);
313  break;
314  case JISX0212:
315  u = conv->jisx0212ToUnicode(buf[0] & 0x7f, ch & 0x7f);
316  result += QValidChar(u);
317  break;
318  default:
319  result += replacement;
320  ++invalid;
321  break;
322  }
323  nbuf = 0;
324  break;
325  }
326  }
327  }
328  }
329 
330  if (cs) {
331  cs->remainingChars = nbuf;
332  cs->invalidChars += invalid;
333  cs->state_data[0] = (buf[0] << 24) + (buf[1] << 16) + (buf[2] << 8) + buf[3];
334  cs->state_data[1] = (prev << 8) + state;
335  cs->state_data[2] = esc;
336  }
337 
338  return result;
339 }
340 
341 
342 
345 {
346  return 39;
347 }
348 
351 {
352  return "ISO-2022-JP";
353 }
354 
359 {
360  QList<QByteArray> list;
361  list << "JIS7"; // Qt 3 compat
362  return list;
363 }
364 
365 #endif // QT_NO_TEXTCODEC
366 
virtual uint unicodeToJisx0212(uint h, uint l) const
Definition: qjpunicode.cpp:242
#define QT_END_NAMESPACE
This macro expands to.
Definition: qglobal.h:90
uint unicodeToJisx0201(uint h, uint l) const
Definition: qjpunicode.cpp:189
virtual uint jisx0208ToUnicode(uint h, uint l) const
Definition: qjpunicode.cpp:143
Definition: qjiscodec.cpp:59
static const char Esc_JISX0201_Latin[]
Definition: qjiscodec.cpp:82
static const char Esc_JISX0201_Kana[]
Definition: qjiscodec.cpp:83
The QByteArray class provides an array of bytes.
Definition: qbytearray.h:135
quint16 u
The QString class provides a Unicode character string.
Definition: qstring.h:83
The QChar class provides a 16-bit Unicode character.
Definition: qchar.h:72
static const uint Default
Definition: qsplitter_p.h:61
#define QValidChar(u)
Definition: qjiscodec.cpp:71
Definition: qjiscodec.cpp:60
unsigned char uchar
Definition: qglobal.h:994
#define QT_BEGIN_NAMESPACE
This macro expands to.
Definition: qglobal.h:89
QString convertToUnicode(const char *, int, ConverterState *) const
QTextCodec subclasses must reimplement this function.
Definition: qjiscodec.cpp:178
static int _mibEnum()
Definition: qjiscodec.cpp:344
unsigned int uint
Definition: qglobal.h:996
uint jisx0201ToUnicode(uint h, uint l) const
Definition: qjpunicode.cpp:112
static const char Esc_CHARS[]
Definition: qjiscodec.cpp:79
static QList< QByteArray > _aliases()
Returns the codec&#39;s mime name.
Definition: qjiscodec.cpp:358
static const char *const Esc_SEQ[]
Definition: qjiscodec.cpp:87
virtual uint jisx0212ToUnicode(uint h, uint l) const
Definition: qjpunicode.cpp:161
ConversionFlags flags
Definition: qtextcodec.h:106
static const char Esc_JISX0208_1983[]
Definition: qjiscodec.cpp:85
static const char Esc_JISX0208_1978[]
Definition: qjiscodec.cpp:84
static const char Esc_JISX0212[]
Definition: qjiscodec.cpp:86
virtual uint unicodeToJisx0208(uint h, uint l) const
Definition: qjpunicode.cpp:221
uchar cell() const
Returns the cell (least significant byte) of the Unicode character.
Definition: qchar.h:283
QByteArray convertFromUnicode(const QChar *, int, ConverterState *) const
QTextCodec subclasses must reimplement this function.
Definition: qjiscodec.cpp:107
static QByteArray _name()
Definition: qjiscodec.cpp:350
static const char Esc_Ascii[]
Definition: qjiscodec.cpp:81
uchar row() const
Returns the row (most significant byte) of the Unicode character.
Definition: qchar.h:284
The QLatin1Char class provides an 8-bit ASCII/Latin-1 character.
Definition: qchar.h:55
Iso2022State
Definition: qjiscodec.cpp:73
const QJpUnicodeConv * conv
Definition: qjiscodec.h:99