Qt 4.8
qisciicodec.cpp
Go to the documentation of this file.
1 /****************************************************************************
2 **
3 ** Copyright (C) 2014 Digia Plc and/or its subsidiary(-ies).
4 ** Contact: http://www.qt-project.org/legal
5 **
6 ** This file is part of the QtCore module of the Qt Toolkit.
7 **
8 ** $QT_BEGIN_LICENSE:LGPL$
9 ** Commercial License Usage
10 ** Licensees holding valid commercial Qt licenses may use this file in
11 ** accordance with the commercial license agreement provided with the
12 ** Software or, alternatively, in accordance with the terms contained in
13 ** a written agreement between you and Digia. For licensing terms and
14 ** conditions see http://qt.digia.com/licensing. For further information
15 ** use the contact form at http://qt.digia.com/contact-us.
16 **
17 ** GNU Lesser General Public License Usage
18 ** Alternatively, this file may be used under the terms of the GNU Lesser
19 ** General Public License version 2.1 as published by the Free Software
20 ** Foundation and appearing in the file LICENSE.LGPL included in the
21 ** packaging of this file. Please review the following information to
22 ** ensure the GNU Lesser General Public License version 2.1 requirements
23 ** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
24 **
25 ** In addition, as a special exception, Digia gives you certain additional
26 ** rights. These rights are described in the Digia Qt LGPL Exception
27 ** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
28 **
29 ** GNU General Public License Usage
30 ** Alternatively, this file may be used under the terms of the GNU
31 ** General Public License version 3.0 as published by the Free Software
32 ** Foundation and appearing in the file LICENSE.GPL included in the
33 ** packaging of this file. Please review the following information to
34 ** ensure the GNU General Public License version 3.0 requirements will be
35 ** met: http://www.gnu.org/copyleft/gpl.html.
36 **
37 **
38 ** $QT_END_LICENSE$
39 **
40 ****************************************************************************/
41 #include "qisciicodec_p.h"
42 #include "qlist.h"
43 
44 #ifndef QT_NO_CODECS
45 
47 
59 struct Codecs {
60  const char name[10];
62 };
63 
64 static const Codecs codecs [] = {
65  { "Iscii-Dev", 0x900 },
66  { "Iscii-Bng", 0x980 },
67  { "Iscii-Pnj", 0xa00 },
68  { "Iscii-Gjr", 0xa80 },
69  { "Iscii-Ori", 0xb00 },
70  { "Iscii-Tml", 0xb80 },
71  { "Iscii-Tlg", 0xc00 },
72  { "Iscii-Knd", 0xc80 },
73  { "Iscii-Mlm", 0xd00 }
74 };
75 
77 {
78 }
79 
81 {
82  return codecs[idx].name;
83 }
84 
86 {
87  /* There is no MIBEnum for Iscii */
88  return -3000-idx;
89 }
90 
91 static const uchar inv = 0xFF;
92 
93 /* iscii range from 0xa0 - 0xff */
94 static const uchar iscii_to_uni_table[0x60] = {
95  0x00, 0x01, 0x02, 0x03,
96  0x05, 0x06, 0x07, 0x08,
97  0x09, 0x0a, 0x0b, 0x0e,
98  0x0f, 0x20, 0x0d, 0x12,
99 
100  0x13, 0x14, 0x11, 0x15,
101  0x16, 0x17, 0x18, 0x19,
102  0x1a, 0x1b, 0x1c, 0x1d,
103  0x1e, 0x1f, 0x20, 0x21,
104 
105  0x22, 0x23, 0x24, 0x25,
106  0x26, 0x27, 0x28, 0x29,
107  0x2a, 0x2b, 0x2c, 0x2d,
108  0x2e, 0x2f, 0x5f, 0x30,
109 
110  0x31, 0x32, 0x33, 0x34,
111  0x35, 0x36, 0x37, 0x38,
112  0x39, inv, 0x3e, 0x3f,
113  0x40, 0x41, 0x42, 0x43,
114 
115  0x46, 0x47, 0x48, 0x45,
116  0x4a, 0x4b, 0x4c, 0x49,
117  0x4d, 0x3c, 0x64, 0x00,
118  0x00, 0x00, 0x00, 0x00,
119 
120  0x00, 0x66, 0x67, 0x68,
121  0x69, 0x6a, 0x6b, 0x6c,
122  0x6d, 0x6e, 0x6f, 0x00,
123  0x00, 0x00, 0x00, 0x00
124 };
125 
126 static const uchar uni_to_iscii_table[0x80] = {
127  0x00, 0xa1, 0xa2, 0xa3,
128  0x00, 0xa4, 0xa5, 0xa6,
129  0xa7, 0xa8, 0xa9, 0xaa,
130  0x00, 0xae, 0xab, 0xac,
131 
132  0xad, 0xb2, 0xaf, 0xb0,
133  0xb1, 0xb3, 0xb4, 0xb5,
134  0xb6, 0xb7, 0xb8, 0xb9,
135  0xba, 0xbb, 0xbc, 0xbd,
136 
137  0xbe, 0xbf, 0xc0, 0xc1,
138  0xc2, 0xc3, 0xc4, 0xc5,
139  0xc6, 0xc7, 0xc8, 0xc9,
140  0xca, 0xcb, 0xcc, 0xcd,
141 
142  0xcf, 0xd0, 0xd1, 0xd2,
143  0xd3, 0xd4, 0xd5, 0xd6,
144  0xd7, 0xd8, 0x00, 0x00,
145  0xe9, 0x00, 0xda, 0xdb,
146 
147  0xdc, 0xdd, 0xde, 0xdf,
148  0x00, 0xe3, 0xe0, 0xe1,
149  0xe2, 0xe7, 0xe4, 0xe5,
150  0xe6, 0xe8, 0x00, 0x00,
151 
152  0x00, 0x00, 0x00, 0x00,
153  0x00, 0x00, 0x00, 0x00,
154  0x01, 0x02, 0x03, 0x04, // decomposable into the uc codes listed here + nukta
155  0x05, 0x06, 0x07, 0xce,
156 
157  0x00, 0x00, 0x00, 0x00,
158  0xea, 0x08, 0xf1, 0xf2,
159  0xf3, 0xf4, 0xf5, 0xf6,
160  0xf7, 0xf8, 0xf9, 0xfa,
161 
162  0x00, 0x00, 0x00, 0x00,
163  0x00, 0x00, 0x00, 0x00,
164  0x00, 0x00, 0x00, 0x00,
165  0x00, 0x00, 0x00, 0x00
166 };
167 
168 static const uchar uni_to_iscii_pairs[] = {
169  0x00, 0x00,
170  0x15, 0x3c, // 0x958
171  0x16, 0x3c, // 0x959
172  0x17, 0x3c, // 0x95a
173  0x1c, 0x3c, // 0x95b
174  0x21, 0x3c, // 0x95c
175  0x22, 0x3c, // 0x95d
176  0x2b, 0x3c, // 0x95e
177  0x64, 0x64 // 0x965
178 };
179 
180 
182 {
183  char replacement = '?';
184  bool halant = false;
185  if (state) {
186  if (state->flags & ConvertInvalidToNull)
187  replacement = 0;
188  halant = state->state_data[0];
189  }
190  int invalid = 0;
191 
192  QByteArray result(2 * len, Qt::Uninitialized); //worst case
193 
194  uchar *ch = reinterpret_cast<uchar *>(result.data());
195 
196  const int base = codecs[idx].base;
197 
198  for (int i =0; i < len; ++i) {
199  const ushort codePoint = uc[i].unicode();
200 
201  /* The low 7 bits of ISCII is plain ASCII. However, we go all the
202  * way up to 0xA0 such that we can roundtrip with convertToUnicode()'s
203  * behavior. */
204  if(codePoint < 0xA0) {
205  *ch++ = static_cast<uchar>(codePoint);
206  continue;
207  }
208 
209  const int pos = codePoint - base;
210  if (pos > 0 && pos < 0x80) {
211  uchar iscii = uni_to_iscii_table[pos];
212  if (iscii > 0x80) {
213  *ch++ = iscii;
214  } else if (iscii) {
215  const uchar *pair = uni_to_iscii_pairs + 2*iscii;
216  *ch++ = *pair++;
217  *ch++ = *pair++;
218  } else {
219  *ch++ = replacement;
220  ++invalid;
221  }
222  } else {
223  if (uc[i].unicode() == 0x200c) { // ZWNJ
224  if (halant)
225  // Consonant Halant ZWNJ -> Consonant Halant Halant
226  *ch++ = 0xe8;
227  } else if (uc[i].unicode() == 0x200d) { // ZWJ
228  if (halant)
229  // Consonant Halant ZWJ -> Consonant Halant Nukta
230  *ch++ = 0xe9;
231  } else {
232  *ch++ = replacement;
233  ++invalid;
234  }
235  }
236  halant = (pos == 0x4d);
237  }
238  result.truncate(ch - (uchar *)result.data());
239 
240  if (state) {
241  state->invalidChars += invalid;
242  state->state_data[0] = halant;
243  }
244  return result;
245 }
246 
247 QString QIsciiCodec::convertToUnicode(const char* chars, int len, ConverterState *state) const
248 {
249  bool halant = false;
250  if (state) {
251  halant = state->state_data[0];
252  }
253 
254  QString result(len, Qt::Uninitialized);
255  QChar *uc = result.data();
256 
257  const int base = codecs[idx].base;
258 
259  for (int i = 0; i < len; ++i) {
260  ushort ch = (uchar) chars[i];
261  if (ch < 0xa0)
262  *uc++ = ch;
263  else {
264  ushort c = iscii_to_uni_table[ch - 0xa0];
265  if (halant && (c == inv || c == 0xe9)) {
266  // Consonant Halant inv -> Consonant Halant ZWJ
267  // Consonant Halant Nukta -> Consonant Halant ZWJ
268  *uc++ = QChar(0x200d);
269  } else if (halant && c == 0xe8) {
270  // Consonant Halant Halant -> Consonant Halant ZWNJ
271  *uc++ = QChar(0x200c);
272  } else {
273  *uc++ = QChar(c+base);
274  }
275  }
276  halant = ((uchar)chars[i] == 0xe8);
277  }
278  result.resize(uc - result.unicode());
279 
280  if (state) {
281  state->state_data[0] = halant;
282  }
283  return result;
284 }
285 
287 
288 #endif // QT_NO_CODECS
void truncate(int pos)
Truncates the byte array at index position pos.
unsigned char c[8]
Definition: qnumeric_p.h:62
static const uchar uni_to_iscii_table[0x80]
#define QT_END_NAMESPACE
This macro expands to.
Definition: qglobal.h:90
char * data()
Returns a pointer to the data stored in the byte array.
Definition: qbytearray.h:429
ushort unicode() const
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition: qchar.h:251
static const uchar inv
Definition: qisciicodec.cpp:91
The QByteArray class provides an array of bytes.
Definition: qbytearray.h:135
const char name[10]
Definition: qisciicodec.cpp:60
static const Codecs codecs[]
Definition: qisciicodec.cpp:64
The QString class provides a Unicode character string.
Definition: qstring.h:83
The QChar class provides a 16-bit Unicode character.
Definition: qchar.h:72
QChar * data()
Returns a pointer to the data stored in the QString.
Definition: qstring.h:710
unsigned char uchar
Definition: qglobal.h:994
#define QT_BEGIN_NAMESPACE
This macro expands to.
Definition: qglobal.h:89
ushort base
Definition: qisciicodec.cpp:61
const QChar * unicode() const
Returns a &#39;\0&#39;-terminated Unicode representation of the string.
Definition: qstring.h:706
int mibEnum() const
Subclasses of QTextCodec must reimplement this function.
Definition: qisciicodec.cpp:85
static const uchar iscii_to_uni_table[0x60]
Definition: qisciicodec.cpp:94
ConversionFlags flags
Definition: qtextcodec.h:106
void resize(int size)
Sets the size of the string to size characters.
Definition: qstring.cpp:1353
static const uchar uni_to_iscii_pairs[]
unsigned short ushort
Definition: qglobal.h:995
QString convertToUnicode(const char *, int, ConverterState *) const
QTextCodec subclasses must reimplement this function.
QByteArray convertFromUnicode(const QChar *, int, ConverterState *) const
QTextCodec subclasses must reimplement this function.
QByteArray name() const
QTextCodec subclasses must reimplement this function.
Definition: qisciicodec.cpp:80