Qt 4.8
qeucjpcodec.cpp
Go to the documentation of this file.
1 /****************************************************************************
2 **
3 ** Copyright (C) 2014 Digia Plc and/or its subsidiary(-ies).
4 ** Contact: http://www.qt-project.org/legal
5 **
6 ** This file is part of the plugins of the Qt Toolkit.
7 **
8 ** $QT_BEGIN_LICENSE:LGPL$
9 ** Commercial License Usage
10 ** Licensees holding valid commercial Qt licenses may use this file in
11 ** accordance with the commercial license agreement provided with the
12 ** Software or, alternatively, in accordance with the terms contained in
13 ** a written agreement between you and Digia. For licensing terms and
14 ** conditions see http://qt.digia.com/licensing. For further information
15 ** use the contact form at http://qt.digia.com/contact-us.
16 **
17 ** GNU Lesser General Public License Usage
18 ** Alternatively, this file may be used under the terms of the GNU Lesser
19 ** General Public License version 2.1 as published by the Free Software
20 ** Foundation and appearing in the file LICENSE.LGPL included in the
21 ** packaging of this file. Please review the following information to
22 ** ensure the GNU Lesser General Public License version 2.1 requirements
23 ** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
24 **
25 ** In addition, as a special exception, Digia gives you certain additional
26 ** rights. These rights are described in the Digia Qt LGPL Exception
27 ** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
28 **
29 ** GNU General Public License Usage
30 ** Alternatively, this file may be used under the terms of the GNU
31 ** General Public License version 3.0 as published by the Free Software
32 ** Foundation and appearing in the file LICENSE.GPL included in the
33 ** packaging of this file. Please review the following information to
34 ** ensure the GNU General Public License version 3.0 requirements will be
35 ** met: http://www.gnu.org/copyleft/gpl.html.
36 **
37 **
38 ** $QT_END_LICENSE$
39 **
40 ****************************************************************************/
41 
42 // Most of the code here was originally written by Serika Kurusugawa
43 // a.k.a. Junji Takagi, and is included in Qt with the author's permission,
44 // and the grateful thanks of the Qt team.
45 
51 /*
52  * Copyright (C) 1999 Serika Kurusugawa, All rights reserved.
53  *
54  * Redistribution and use in source and binary forms, with or without
55  * modification, are permitted provided that the following conditions
56  * are met:
57  * 1. Redistributions of source code must retain the above copyright
58  * notice, this list of conditions and the following disclaimer.
59  * 2. Redistributions in binary form must reproduce the above copyright
60  * notice, this list of conditions and the following disclaimer in the
61  * documentation and/or other materials provided with the distribution.
62  *
63  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
64  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
65  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
66  * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
67  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
68  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
69  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
70  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
71  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
72  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
73  * SUCH DAMAGE.
74  */
75 
76 #include "qeucjpcodec.h"
77 
79 
80 #ifndef QT_NO_TEXTCODEC
81 
82 static const uchar Ss2 = 0x8e; // Single Shift 2
83 static const uchar Ss3 = 0x8f; // Single Shift 3
84 
85 #define IsKana(c) (((c) >= 0xa1) && ((c) <= 0xdf))
86 #define IsEucChar(c) (((c) >= 0xa1) && ((c) <= 0xfe))
87 
88 #define QValidChar(u) ((u) ? QChar((ushort)(u)) : QChar(QChar::ReplacementCharacter))
89 
94 {
95 }
96 
101 {
102  delete (QJpUnicodeConv*)conv;
103  conv = 0;
104 }
105 
107 {
108  char replacement = '?';
109  if (state) {
110  if (state->flags & ConvertInvalidToNull)
111  replacement = 0;
112  }
113  int invalid = 0;
114 
115  int rlen = 3*len + 1;
116  QByteArray rstr;
117  rstr.resize(rlen);
118  uchar* cursor = (uchar*)rstr.data();
119  for (int i = 0; i < len; i++) {
120  QChar ch = uc[i];
121  uint j;
122  if (ch.unicode() < 0x80) {
123  // ASCII
124  *cursor++ = ch.cell();
125  } else if ((j = conv->unicodeToJisx0201(ch.row(), ch.cell())) != 0) {
126  if (j < 0x80) {
127  // JIS X 0201 Latin ?
128  *cursor++ = j;
129  } else {
130  // JIS X 0201 Kana
131  *cursor++ = Ss2;
132  *cursor++ = j;
133  }
134  } else if ((j = conv->unicodeToJisx0208(ch.row(), ch.cell())) != 0) {
135  // JIS X 0208
136  *cursor++ = (j >> 8) | 0x80;
137  *cursor++ = (j & 0xff) | 0x80;
138  } else if ((j = conv->unicodeToJisx0212(ch.row(), ch.cell())) != 0) {
139  // JIS X 0212
140  *cursor++ = Ss3;
141  *cursor++ = (j >> 8) | 0x80;
142  *cursor++ = (j & 0xff) | 0x80;
143  } else {
144  // Error
145  *cursor++ = replacement;
146  ++invalid;
147  }
148  }
149  rstr.resize(cursor - (uchar*)rstr.constData());
150 
151  if (state) {
152  state->invalidChars += invalid;
153  }
154  return rstr;
155 }
156 
157 
158 QString QEucJpCodec::convertToUnicode(const char* chars, int len, ConverterState *state) const
159 {
160  uchar buf[2] = {0, 0};
161  int nbuf = 0;
162  QChar replacement = QChar::ReplacementCharacter;
163  if (state) {
164  if (state->flags & ConvertInvalidToNull)
165  replacement = QChar::Null;
166  nbuf = state->remainingChars;
167  buf[0] = state->state_data[0];
168  buf[1] = state->state_data[1];
169  }
170  int invalid = 0;
171 
172  QString result;
173  for (int i=0; i<len; i++) {
174  uchar ch = chars[i];
175  switch (nbuf) {
176  case 0:
177  if (ch < 0x80) {
178  // ASCII
179  result += QLatin1Char(ch);
180  } else if (ch == Ss2 || ch == Ss3) {
181  // JIS X 0201 Kana or JIS X 0212
182  buf[0] = ch;
183  nbuf = 1;
184  } else if (IsEucChar(ch)) {
185  // JIS X 0208
186  buf[0] = ch;
187  nbuf = 1;
188  } else {
189  // Invalid
190  result += replacement;
191  ++invalid;
192  }
193  break;
194  case 1:
195  if (buf[0] == Ss2) {
196  // JIS X 0201 Kana
197  if (IsKana(ch)) {
198  uint u = conv->jisx0201ToUnicode(ch);
199  result += QValidChar(u);
200  } else {
201  result += replacement;
202  ++invalid;
203  }
204  nbuf = 0;
205  } else if (buf[0] == Ss3) {
206  // JIS X 0212-1990
207  if (IsEucChar(ch)) {
208  buf[1] = ch;
209  nbuf = 2;
210  } else {
211  // Error
212  result += replacement;
213  ++invalid;
214  nbuf = 0;
215  }
216  } else {
217  // JIS X 0208-1990
218  if (IsEucChar(ch)) {
219  uint u = conv->jisx0208ToUnicode(buf[0] & 0x7f, ch & 0x7f);
220  result += QValidChar(u);
221  } else {
222  // Error
223  result += replacement;
224  ++invalid;
225  }
226  nbuf = 0;
227  }
228  break;
229  case 2:
230  // JIS X 0212
231  if (IsEucChar(ch)) {
232  uint u = conv->jisx0212ToUnicode(buf[1] & 0x7f, ch & 0x7f);
233  result += QValidChar(u);
234  } else {
235  result += replacement;
236  ++invalid;
237  }
238  nbuf = 0;
239  }
240  }
241  if (state) {
242  state->remainingChars = nbuf;
243  state->state_data[0] = buf[0];
244  state->state_data[1] = buf[1];
245  state->invalidChars += invalid;
246  }
247  return result;
248 }
249 
251 {
252  return 18;
253 }
254 
256 {
257  return "EUC-JP";
258 }
259 #endif // QT_NO_TEXTCODEC
260 
virtual uint unicodeToJisx0212(uint h, uint l) const
Definition: qjpunicode.cpp:242
#define QT_END_NAMESPACE
This macro expands to.
Definition: qglobal.h:90
uint unicodeToJisx0201(uint h, uint l) const
Definition: qjpunicode.cpp:189
virtual uint jisx0208ToUnicode(uint h, uint l) const
Definition: qjpunicode.cpp:143
char * data()
Returns a pointer to the data stored in the byte array.
Definition: qbytearray.h:429
ushort unicode() const
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition: qchar.h:251
The QByteArray class provides an array of bytes.
Definition: qbytearray.h:135
quint16 u
#define IsEucChar(c)
Definition: qeucjpcodec.cpp:86
The QString class provides a Unicode character string.
Definition: qstring.h:83
The QChar class provides a 16-bit Unicode character.
Definition: qchar.h:72
static const uint Default
Definition: qsplitter_p.h:61
unsigned char uchar
Definition: qglobal.h:994
const QJpUnicodeConv * conv
Definition: qeucjpcodec.h:99
#define QT_BEGIN_NAMESPACE
This macro expands to.
Definition: qglobal.h:89
unsigned int uint
Definition: qglobal.h:996
#define QValidChar(u)
Definition: qeucjpcodec.cpp:88
QByteArray convertFromUnicode(const QChar *, int, ConverterState *) const
QTextCodec subclasses must reimplement this function.
uint jisx0201ToUnicode(uint h, uint l) const
Definition: qjpunicode.cpp:112
virtual uint jisx0212ToUnicode(uint h, uint l) const
Definition: qjpunicode.cpp:161
~QEucJpCodec()
Destroys the codec.
ConversionFlags flags
Definition: qtextcodec.h:106
const char * constData() const
Returns a pointer to the data stored in the byte array.
Definition: qbytearray.h:433
static int _mibEnum()
void resize(int size)
Sets the size of the byte array to size bytes.
virtual uint unicodeToJisx0208(uint h, uint l) const
Definition: qjpunicode.cpp:221
static const uchar Ss2
Definition: qeucjpcodec.cpp:82
uchar cell() const
Returns the cell (least significant byte) of the Unicode character.
Definition: qchar.h:283
static const uchar Ss3
Definition: qeucjpcodec.cpp:83
static QByteArray _name()
uchar row() const
Returns the row (most significant byte) of the Unicode character.
Definition: qchar.h:284
The QLatin1Char class provides an 8-bit ASCII/Latin-1 character.
Definition: qchar.h:55
QEucJpCodec()
Constructs a QEucJpCodec.
Definition: qeucjpcodec.cpp:93
QString convertToUnicode(const char *, int, ConverterState *) const
QTextCodec subclasses must reimplement this function.
#define IsKana(c)
Definition: qeucjpcodec.cpp:85