Qt 4.8
Classes | Macros | Functions | Variables
qgb18030codec.cpp File Reference
#include "qgb18030codec.h"

Go to the source code of this file.

Classes

struct  indexTbl_t
 

Macros

#define InRange(c, lower, upper)   (((c) >= (lower)) && ((c) <= (upper)))
 
#define Is1stByte(c)   (InRange((c), 0x81, 0xFE))
 
#define Is2ndByte(c)   (Is2ndByteIn2Bytes(c) || Is2ndByteIn4Bytes(c))
 
#define Is2ndByteIn2Bytes(c)   (InRange((c), 0x40, 0xFE) && (c) != 0x7F)
 
#define Is2ndByteIn4Bytes(c)   (InRange((c), 0x30, 0x39))
 
#define Is3rdByte(c)   (InRange((c), 0x81, 0xFE))
 
#define Is4thByte(c)   (InRange((c), 0x30, 0x39))
 
#define IsByteInGb2312(c)   (InRange((c), 0xA1, 0xFE))
 
#define IsLatin(c)   ((c) <= 0x7F)
 
#define IsUDA1(a, b)   (InRange((a), 0xAA, 0xAF) && InRange((b), 0xA1, 0xFE))
 
#define IsUDA2(a, b)   (InRange((a), 0xF8, 0xFE) && InRange((b), 0xA1, 0xFE))
 
#define IsUDA3(a, b)   (InRange((a), 0xA1, 0xA7) && InRange((b), 0x40, 0xA0) && ((b) != 0x7F))
 
#define qValidChar(u)   ((u) ? (u) : static_cast<ushort>(QChar::ReplacementCharacter))
 

Functions

static uint gb4lin_to_gb (uint gb4lin)
 
static uint qt_Gb18030ToUnicode (const uchar *gbstr, int &len)
 
static int qt_UnicodeToGb18030 (uint unicode, uchar *gbchar)
 
int qt_UnicodeToGbk (uint unicode, uchar *gbchar)
 

Variables

static quint16 const gb18030_2byte_to_ucs [22046]
 
static quint16 const gb18030_4byte_to_ucs [6793]
 
static const indexTbl_t gb18030_to_ucs_index [154]
 
static quint16 const ucs_to_gb18030 [28839]
 
static const indexTbl_t ucs_to_gb18030_index [256]
 

Macro Definition Documentation

◆ InRange

#define InRange (   c,
  lower,
  upper 
)    (((c) >= (lower)) && ((c) <= (upper)))

Definition at line 53 of file qgb18030codec.cpp.

Referenced by qt_Gb18030ToUnicode(), qt_UnicodeToGb18030(), and qt_UnicodeToGbk().

◆ Is1stByte

#define Is1stByte (   c)    (InRange((c), 0x81, 0xFE))

◆ Is2ndByte

#define Is2ndByte (   c)    (Is2ndByteIn2Bytes(c) || Is2ndByteIn4Bytes(c))

Definition at line 59 of file qgb18030codec.cpp.

◆ Is2ndByteIn2Bytes

#define Is2ndByteIn2Bytes (   c)    (InRange((c), 0x40, 0xFE) && (c) != 0x7F)

◆ Is2ndByteIn4Bytes

#define Is2ndByteIn4Bytes (   c)    (InRange((c), 0x30, 0x39))

Definition at line 58 of file qgb18030codec.cpp.

Referenced by QGb18030Codec::convertToUnicode(), and qt_Gb18030ToUnicode().

◆ Is3rdByte

#define Is3rdByte (   c)    (InRange((c), 0x81, 0xFE))

Definition at line 60 of file qgb18030codec.cpp.

Referenced by QGb18030Codec::convertToUnicode(), and qt_Gb18030ToUnicode().

◆ Is4thByte

#define Is4thByte (   c)    (InRange((c), 0x30, 0x39))

Definition at line 61 of file qgb18030codec.cpp.

Referenced by QGb18030Codec::convertToUnicode(), and qt_Gb18030ToUnicode().

◆ IsByteInGb2312

#define IsByteInGb2312 (   c)    (InRange((c), 0xA1, 0xFE))

Definition at line 55 of file qgb18030codec.cpp.

Referenced by QGb2312Codec::convertToUnicode().

◆ IsLatin

#define IsLatin (   c)    ((c) <= 0x7F)

◆ IsUDA1

#define IsUDA1 (   a,
 
)    (InRange((a), 0xAA, 0xAF) && InRange((b), 0xA1, 0xFE))

Definition at line 68 of file qgb18030codec.cpp.

Referenced by qt_Gb18030ToUnicode().

◆ IsUDA2

#define IsUDA2 (   a,
 
)    (InRange((a), 0xF8, 0xFE) && InRange((b), 0xA1, 0xFE))

Definition at line 69 of file qgb18030codec.cpp.

Referenced by qt_Gb18030ToUnicode().

◆ IsUDA3

#define IsUDA3 (   a,
 
)    (InRange((a), 0xA1, 0xA7) && InRange((b), 0x40, 0xA0) && ((b) != 0x7F))

Definition at line 70 of file qgb18030codec.cpp.

Referenced by qt_Gb18030ToUnicode().

◆ qValidChar

#define qValidChar (   u)    ((u) ? (u) : static_cast<ushort>(QChar::ReplacementCharacter))

Function Documentation

◆ gb4lin_to_gb()

static uint gb4lin_to_gb ( uint  gb4lin)
inlinestatic

Definition at line 9013 of file qgb18030codec.cpp.

Referenced by qt_UnicodeToGb18030().

9013  {
9014  uchar a, b, c, d;
9015  a = 0x81 + gb4lin / 12600;
9016  b = 0x30 + (gb4lin / 1260) % 10;
9017  c = 0x81 + (gb4lin / 10) % 126;
9018  d = 0x30 + gb4lin % 10;
9019  return ((a << 24) | (b << 16) | (c << 8) | d);
9020 }
double d
Definition: qnumeric_p.h:62
unsigned char c[8]
Definition: qnumeric_p.h:62
long ASN1_INTEGER_get ASN1_INTEGER * a
unsigned char uchar
Definition: qglobal.h:994

◆ qt_Gb18030ToUnicode()

static uint qt_Gb18030ToUnicode ( const uchar gbstr,
int &  len 
)
static

Definition at line 9022 of file qgb18030codec.cpp.

Referenced by QGb18030Codec::convertToUnicode(), QGbkCodec::convertToUnicode(), and QGb2312Codec::convertToUnicode().

9022  {
9023  /* Returns Unicode. */
9024  uint uni;
9025  uchar first = *gbstr;
9026 
9027  if (IsLatin(first)) {
9028  len = 1;
9029  uni = (uint)first;
9030  }
9031  else if (Is1stByte(first) && len >= 2) {
9032  uchar second = gbstr[1];
9033 
9034  if (Is2ndByteIn2Bytes(second)) {
9035  len = 2;
9036 
9037  if (IsUDA1(first, second))
9038  uni = 0xE000 + (first - 0xAA) * 94 + (second - 0xA1);
9039  else if (IsUDA2(first, second))
9040  uni = 0xE234 + (first - 0xF8) * 94 + (second - 0xA1);
9041  else if (IsUDA3(first, second))
9042  uni = 0xE4C6 + (first - 0xA1) * 96 + (second - 0x40)
9043  - ((second >= 0x80) ? 1 : 0);
9044  else {
9045  // Use the mapping table
9046  uint i;
9047 
9048  i = (first - 0x81) * 190 + (second - 0x40)
9049  - ((second >= 0x80) ? 1 : 0);
9050 
9051  if (InRange(first, 0xA1, 0xA7))
9052  i -= (first - 0xA0) * 96;
9053  if (first > 0xA7)
9054  i -= 672;
9055  if (InRange(first, 0xAA, 0xAF))
9056  i -= (first - 0xAA) * 94;
9057  if (first > 0xAF)
9058  i -= 564;
9059  if (first >= 0xF8)
9060  i -= (first - 0xF8) * 94;
9061 
9062  uni = (uint)gb18030_2byte_to_ucs[i];
9063  }
9064  }
9065  else if (Is2ndByteIn4Bytes(second) && len >= 4) {
9066  uchar third = gbstr[2],
9067  fourth = gbstr[3];
9068 
9069  if (Is3rdByte(third) && Is4thByte(fourth)) {
9070  // Valid 4-byte GB18030, whether defined or not
9071  uint gb4lin;
9072  indexTbl_t g2u;
9073 
9074  gb4lin = (first - 0x81) * 12600 + (second - 0x30) * 1260
9075  + (third - 0x81) * 10 + (fourth - 0x30);
9076 
9077  len = 4;
9078  if (gb4lin <= 0x99FB) {
9079  /* GB+81308130 - GB+8431A439 */
9080  g2u = gb18030_to_ucs_index[gb4lin >> 8];
9081 
9082  if ((quint8)(gb4lin & 0xFF) >= g2u.tblBegin &&
9083  (quint8)(gb4lin & 0xFF) <= g2u.tblEnd) {
9084 
9085  uni = (uint)gb18030_4byte_to_ucs[gb4lin - g2u.tblOffset];
9086  }
9087  else {
9088  uni = g2u.algOffset + (gb4lin & 0xFF);
9089  }
9090  } else if (InRange(gb4lin, 0x2E248, 0x12E247)) {
9091  /* GB+90308130 - GB+E3329A35 */
9092  uni = gb4lin - 0xE248;
9093  } else {
9094  /* undefined or reserved area */
9095  len = 1;
9097  }
9098  }
9099  else {
9100  len = 1;
9102  }
9103  }
9104  else {
9105  len = 1;
9107  }
9108  }
9109  else {
9110  len = 1;
9112  }
9113  return uni;
9114 }
#define IsLatin(c)
#define InRange(c, lower, upper)
static const indexTbl_t gb18030_to_ucs_index[154]
unsigned char quint8
Definition: qglobal.h:934
static quint16 const gb18030_2byte_to_ucs[22046]
quint8 tblBegin
#define IsUDA1(a, b)
static quint16 const gb18030_4byte_to_ucs[6793]
unsigned char uchar
Definition: qglobal.h:994
quint16 algOffset
#define IsUDA3(a, b)
unsigned int uint
Definition: qglobal.h:996
#define Is2ndByteIn4Bytes(c)
#define Is1stByte(c)
#define Is3rdByte(c)
quint16 tblOffset
#define Is4thByte(c)
#define Is2ndByteIn2Bytes(c)
#define IsUDA2(a, b)

◆ qt_UnicodeToGb18030()

int qt_UnicodeToGb18030 ( uint  unicode,
uchar gbchar 
)
static

Definition at line 9117 of file qgb18030codec.cpp.

Referenced by QGb18030Codec::convertFromUnicode().

9117  {
9118  /* Returns the bytesize of the GB18030 character. */
9119  uint gb, gb4lin;
9120  indexTbl_t u2g;
9121 
9122  if (IsLatin(uni)) {
9123  *gbchar = (uchar)uni;
9124  return 1;
9125  }
9126  else if (uni <= 0xD7FF || InRange(uni, 0xE766, 0xFFFF)) {
9127  u2g = ucs_to_gb18030_index[uni >> 8];
9128 
9129  if ((quint8)(uni & 0xFF) >= u2g.tblBegin && (quint8)(uni & 0xFF) <= u2g.tblEnd) {
9130  // Use mapping table (2-byte or 4-byte GB18030)
9131  uint tblEntry;
9132 
9133  tblEntry = ucs_to_gb18030[uni - u2g.tblOffset];
9134 
9135  if (tblEntry > 0x8000) {
9136  // 2-byte GB18030
9137  gb = tblEntry;
9138  }
9139  else {
9140  // 4-byte GB18030 stored in a special compact format
9141  uchar a, b;
9142  a = 0x81;
9143  b = 0x30 + (tblEntry >> 11);
9144  if (tblEntry >= 0x7000) {
9145  a += 3;
9146  b -= 14;
9147  } else if (tblEntry >= 0x6000) {
9148  a += 2;
9149  b -= 6;
9150  } else if (tblEntry >= 0x3000) {
9151  a += 1;
9152  b -= 6;
9153  } else if (b >= 0x31) {
9154  b += 5;
9155  }
9156  gbchar[0] = a;
9157  gbchar[1] = b;
9158  gbchar[2] = 0x81 + ((tblEntry >> 4) & 0x7F);
9159  gbchar[3] = 0x30 + (tblEntry & 0xF);
9160  return 4;
9161  }
9162  }
9163  else {
9164  // 4-byte GB18030 calculated algorithmically
9165  gb4lin = u2g.algOffset + (uni & 0xFF);
9166  // Yikes, my index table could not cover all the bases...
9167  if (InRange(uni, 0x49B8, 0x49FF))
9168  gb4lin -= 11;
9169  gb = gb4lin_to_gb(gb4lin);
9170  }
9171  }
9172  else if (InRange(uni, 0xE000, 0xE765)) {
9173  // User-defined areas in GB18030 (2-byte)
9174  if (uni <= 0xE233)
9175  gb = 0xAAA1 + (((uni - 0xE000) / 94) << 8) + (uni - 0xE000) % 94;
9176  else if (uni <= 0xE4C5)
9177  gb = 0xF8A1 + (((uni - 0xE234) / 94) << 8) + (uni - 0xE234) % 94;
9178  else {
9179  gb = 0xA140 + (((uni - 0xE4C6) / 96) << 8) + (uni - 0xE4C6) % 96;
9180  // Skip the gap at 0x7F
9181  if ((gb & 0xFF) >= 0x7F)
9182  gb++;
9183  }
9184  }
9185  else if (InRange(uni, 0x10000, 0x10FFFF)) {
9186  // Qt 3.x does not support beyond BMP yet, but what the heck...
9187  // (U+10000 = GB+90308130) to (U+10FFFF = GB+E3329A35)
9188  gb = gb4lin_to_gb(0x1E248 + uni);
9189  }
9190  else {
9191  // Surrogate area and other undefined/reserved areas (discard)
9192  *gbchar = 0;
9193  return 0;
9194  }
9195 
9196  if (gb <= 0xFFFF) {
9197  gbchar[0] = (uchar)((gb >> 8) & 0xFF);
9198  gbchar[1] = (uchar)(gb & 0xFF);
9199  return 2;
9200  } else {
9201  gbchar[0] = (uchar)((gb >> 24) & 0xFF);
9202  gbchar[1] = (uchar)((gb >> 16) & 0xFF);
9203  gbchar[2] = (uchar)((gb >> 8) & 0xFF);
9204  gbchar[3] = (uchar)(gb & 0xFF);
9205  return 4;
9206  }
9207 }
#define IsLatin(c)
#define InRange(c, lower, upper)
long ASN1_INTEGER_get ASN1_INTEGER * a
unsigned char quint8
Definition: qglobal.h:934
quint8 tblBegin
unsigned char uchar
Definition: qglobal.h:994
quint16 algOffset
static const indexTbl_t ucs_to_gb18030_index[256]
unsigned int uint
Definition: qglobal.h:996
quint16 tblOffset
static quint16 const ucs_to_gb18030[28839]
static uint gb4lin_to_gb(uint gb4lin)

◆ qt_UnicodeToGbk()

int qt_UnicodeToGbk ( uint  unicode,
uchar gbchar 
)

Definition at line 9210 of file qgb18030codec.cpp.

Referenced by QGbkCodec::convertFromUnicode(), QGb2312Codec::convertFromUnicode(), QFontGb2312Codec::convertFromUnicode(), and QFontGbkCodec::convertFromUnicode().

9210  {
9211  /* Returns the bytesize of the GBK character. */
9212  /* Intended for improving performance of GB2312 and GBK functions. */
9213  uint gb;
9214  indexTbl_t u2g;
9215 
9216  if (IsLatin(uni)) {
9217  *gbchar = (uchar)uni;
9218  return 1;
9219  }
9220  else if (uni <= 0xD7FF || InRange(uni, 0xE766, 0xFFFF)) {
9221  u2g = ucs_to_gb18030_index[uni >> 8];
9222 
9223  if ((quint8)(uni & 0xFF) >= u2g.tblBegin && (quint8)(uni & 0xFF) <= u2g.tblEnd) {
9224  // Use mapping table (2-byte GBK or 4-byte GB18030)
9225  uint tblEntry;
9226 
9227  tblEntry = ucs_to_gb18030[uni - u2g.tblOffset];
9228 
9229  if (tblEntry > 0x8000) {
9230  // GBK
9231  gb = tblEntry;
9232  }
9233  else {
9234  // 4-byte GB18030 stored in a special compact format (discard)
9235  *gbchar = 0;
9236  return 0;
9237  }
9238  }
9239  else {
9240  // 4-byte GB18030 calculated algorithmically (discard)
9241  *gbchar = 0;
9242  return 0;
9243  }
9244  }
9245  else if (InRange(uni, 0xE000, 0xE765)) {
9246  // User-defined areas in GB18030 (2-byte)
9247  if (uni <= 0xE233)
9248  gb = 0xAAA1 + (((uni - 0xE000) / 94) << 8) + (uni - 0xE000) % 94;
9249  else if (uni <= 0xE4C5)
9250  gb = 0xF8A1 + (((uni - 0xE234) / 94) << 8) + (uni - 0xE234) % 94;
9251  else {
9252  gb = 0xA140 + (((uni - 0xE4C6) / 96) << 8) + (uni - 0xE4C6) % 96;
9253  // Skip the gap at 0x7F
9254  if ((gb & 0xFF) >= 0x7F)
9255  gb++;
9256  }
9257  }
9258  else {
9259  // Surrogate area and other undefined/reserved areas (discard)
9260  *gbchar = 0;
9261  return 0;
9262  }
9263 
9264  gbchar[0] = (uchar)((gb >> 8) & 0xFF);
9265  gbchar[1] = (uchar)(gb & 0xFF);
9266  return 2;
9267 }
#define IsLatin(c)
#define InRange(c, lower, upper)
unsigned char quint8
Definition: qglobal.h:934
quint8 tblBegin
unsigned char uchar
Definition: qglobal.h:994
static const indexTbl_t ucs_to_gb18030_index[256]
unsigned int uint
Definition: qglobal.h:996
quint16 tblOffset
static quint16 const ucs_to_gb18030[28839]

Variable Documentation

◆ gb18030_2byte_to_ucs

quint16 const gb18030_2byte_to_ucs[22046]
static

Definition at line 4577 of file qgb18030codec.cpp.

Referenced by qt_Gb18030ToUnicode().

◆ gb18030_4byte_to_ucs

quint16 const gb18030_4byte_to_ucs[6793]
static

Definition at line 7629 of file qgb18030codec.cpp.

Referenced by qt_Gb18030ToUnicode().

◆ gb18030_to_ucs_index

const indexTbl_t gb18030_to_ucs_index[154]
static

Definition at line 736 of file qgb18030codec.cpp.

◆ ucs_to_gb18030

quint16 const ucs_to_gb18030[28839]
static

Definition at line 947 of file qgb18030codec.cpp.

Referenced by qt_UnicodeToGb18030(), and qt_UnicodeToGbk().

◆ ucs_to_gb18030_index

const indexTbl_t ucs_to_gb18030_index[256]
static

Definition at line 816 of file qgb18030codec.cpp.