Qt 4.8
Classes | Macros | Enumerations | Functions
qchar.cpp File Reference
#include "qchar.h"
#include "qdatastream.h"
#include "qtextcodec.h"
#include "qunicodetables_p.h"
#include "qunicodetables.cpp"

Go to the source code of this file.

Classes

struct  UCS2Pair
 

Macros

#define FLAG(x)   (1 << (x))
 

Enumerations

enum  {
  Hangul_SBase = 0xac00, Hangul_LBase = 0x1100, Hangul_VBase = 0x1161, Hangul_TBase = 0x11a7,
  Hangul_SCount = 11172, Hangul_LCount = 19, Hangul_VCount = 21, Hangul_TCount = 28,
  Hangul_NCount = 21*28
}
 

Functions

static void canonicalOrderHelper (QString *str, QChar::UnicodeVersion version, int from)
 
static void composeHelper (QString *str, QChar::UnicodeVersion version, int from)
 
static void decomposeHelper (QString *str, bool canonical, QChar::UnicodeVersion version, int from)
 
static const unsigned short *QT_FASTCALL decompositionHelper (uint ucs4, int *length, int *tag, unsigned short *buffer)
 
static uint foldCase (const ushort *ch, const ushort *start)
 
static uint foldCase (uint ch, uint &last)
 
static ushort foldCase (ushort ch)
 
static ushort ligatureHelper (ushort u1, ushort u2)
 
bool operator< (ushort u1, const UCS2Pair &ligature)
 
bool operator< (const UCS2Pair &ligature, ushort u1)
 

Macro Definition Documentation

◆ FLAG

#define FLAG (   x)    (1 << (x))

Enumeration Type Documentation

◆ anonymous enum

anonymous enum
Enumerator
Hangul_SBase 
Hangul_LBase 
Hangul_VBase 
Hangul_TBase 
Hangul_SCount 
Hangul_LCount 
Hangul_VCount 
Hangul_TCount 
Hangul_NCount 

Definition at line 1054 of file qchar.cpp.

1054  {
1055  Hangul_SBase = 0xac00,
1056  Hangul_LBase = 0x1100,
1057  Hangul_VBase = 0x1161,
1058  Hangul_TBase = 0x11a7,
1059  Hangul_SCount = 11172,
1060  Hangul_LCount = 19,
1061  Hangul_VCount = 21,
1062  Hangul_TCount = 28,
1063  Hangul_NCount = 21*28
1064 };

Function Documentation

◆ canonicalOrderHelper()

static void canonicalOrderHelper ( QString str,
QChar::UnicodeVersion  version,
int  from 
)
static

Definition at line 1821 of file qchar.cpp.

Referenced by qt_string_normalize().

1822 {
1823  QString &s = *str;
1824  const int l = s.length()-1;
1825  int pos = from;
1826  while (pos < l) {
1827  int p2 = pos+1;
1828  uint u1 = s.at(pos).unicode();
1829  if (QChar(u1).isHighSurrogate()) {
1830  ushort low = s.at(p2).unicode();
1831  if (QChar(low).isLowSurrogate()) {
1832  u1 = QChar::surrogateToUcs4(u1, low);
1833  if (p2 >= l)
1834  break;
1835  ++p2;
1836  }
1837  }
1838  uint u2 = s.at(p2).unicode();
1839  if (QChar(u2).isHighSurrogate() && p2 < l) {
1840  ushort low = s.at(p2+1).unicode();
1841  if (QChar(low).isLowSurrogate()) {
1842  u2 = QChar::surrogateToUcs4(u2, low);
1843  ++p2;
1844  }
1845  }
1846 
1847  ushort c2 = 0;
1848  {
1849  const QUnicodeTables::Properties *p = qGetProp(u2);
1850  if (p->unicodeVersion != QChar::Unicode_Unassigned && p->unicodeVersion <= version)
1851  c2 = p->combiningClass;
1852  }
1853  if (c2 == 0) {
1854  pos = p2+1;
1855  continue;
1856  }
1857 
1858  ushort c1 = 0;
1859  {
1860  const QUnicodeTables::Properties *p = qGetProp(u1);
1861  if (p->unicodeVersion != QChar::Unicode_Unassigned && p->unicodeVersion <= version)
1862  c1 = p->combiningClass;
1863  }
1864 
1865  if (c1 > c2) {
1866  QChar *uc = s.data();
1867  int p = pos;
1868  // exchange characters
1869  if (!QChar::requiresSurrogates(u2)) {
1870  uc[p++] = u2;
1871  } else {
1872  uc[p++] = QChar::highSurrogate(u2);
1873  uc[p++] = QChar::lowSurrogate(u2);
1874  }
1875  if (!QChar::requiresSurrogates(u1)) {
1876  uc[p++] = u1;
1877  } else {
1878  uc[p++] = QChar::highSurrogate(u1);
1879  uc[p++] = QChar::lowSurrogate(u1);
1880  }
1881  if (pos > 0)
1882  --pos;
1883  if (pos > 0 && s.at(pos).isLowSurrogate())
1884  --pos;
1885  } else {
1886  ++pos;
1887  if (QChar::requiresSurrogates(u1))
1888  ++pos;
1889  }
1890  }
1891 }
const QChar at(int i) const
Returns the character at the given index position in the string.
Definition: qstring.h:698
ushort unicode() const
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition: qchar.h:251
int length() const
Returns the number of characters in this string.
Definition: qstring.h:696
bool isLowSurrogate() const
Returns true if the QChar is the low part of a utf16 surrogate (ie.
Definition: qchar.h:279
The QString class provides a Unicode character string.
Definition: qstring.h:83
The QChar class provides a 16-bit Unicode character.
Definition: qchar.h:72
QChar * data()
Returns a pointer to the data stored in the QString.
Definition: qstring.h:710
static ushort highSurrogate(uint ucs4)
Returns the high surrogate value of a ucs4 code point.
Definition: qchar.h:303
static ushort lowSurrogate(uint ucs4)
Returns the low surrogate value of a ucs4 code point.
Definition: qchar.h:306
static bool requiresSurrogates(uint ucs4)
Returns true if the UCS-4-encoded character specified by ucs4 can be split into the high and low part...
Definition: qchar.h:294
static const QUnicodeTables::Properties * qGetProp(uint ucs4)
unsigned int uint
Definition: qglobal.h:996
unsigned short ushort
Definition: qglobal.h:995
QFactoryLoader * l
static uint surrogateToUcs4(ushort high, ushort low)
Converts a UTF16 surrogate pair with the given high and low values to its UCS-4 code point...
Definition: qchar.h:297

◆ composeHelper()

static void composeHelper ( QString str,
QChar::UnicodeVersion  version,
int  from 
)
static

Definition at line 1776 of file qchar.cpp.

Referenced by qt_string_normalize().

1777 {
1778  QString &s = *str;
1779 
1780  if (from < 0 || s.length() - from < 2)
1781  return;
1782 
1783  // the loop can partly ignore high Unicode as all ligatures are in the BMP
1784  int starter = -2; // to prevent starter == pos - 1
1785  int lastCombining = 255; // to prevent combining > lastCombining
1786  int pos = from;
1787  while (pos < s.length()) {
1788  uint uc = s.at(pos).unicode();
1789  if (QChar(uc).isHighSurrogate() && pos < s.length()-1) {
1790  ushort low = s.at(pos+1).unicode();
1791  if (QChar(low).isLowSurrogate()) {
1792  uc = QChar::surrogateToUcs4(uc, low);
1793  ++pos;
1794  }
1795  }
1796  const QUnicodeTables::Properties *p = qGetProp(uc);
1797  if (p->unicodeVersion == QChar::Unicode_Unassigned || p->unicodeVersion > version) {
1798  starter = -1; // to prevent starter == pos - 1
1799  lastCombining = 255; // to prevent combining > lastCombining
1800  ++pos;
1801  continue;
1802  }
1803  int combining = p->combiningClass;
1804  if ((starter == pos - 1 || combining > lastCombining) && starter >= from) {
1805  // allowed to form ligature with S
1806  QChar ligature = ligatureHelper(s.at(starter).unicode(), uc);
1807  if (ligature.unicode()) {
1808  s[starter] = ligature;
1809  s.remove(pos, 1);
1810  continue;
1811  }
1812  }
1813  if (!combining)
1814  starter = pos;
1815  lastCombining = combining;
1816  ++pos;
1817  }
1818 }
static ushort ligatureHelper(ushort u1, ushort u2)
Definition: qchar.cpp:1743
const QChar at(int i) const
Returns the character at the given index position in the string.
Definition: qstring.h:698
ushort unicode() const
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition: qchar.h:251
int length() const
Returns the number of characters in this string.
Definition: qstring.h:696
The QString class provides a Unicode character string.
Definition: qstring.h:83
The QChar class provides a 16-bit Unicode character.
Definition: qchar.h:72
static const QUnicodeTables::Properties * qGetProp(uint ucs4)
unsigned int uint
Definition: qglobal.h:996
unsigned short ushort
Definition: qglobal.h:995
static uint surrogateToUcs4(ushort high, ushort low)
Converts a UTF16 surrogate pair with the given high and low values to its UCS-4 code point...
Definition: qchar.h:297
QString & remove(int i, int len)
Removes n characters from the string, starting at the given position index, and returns a reference t...
Definition: qstring.cpp:1867

◆ decomposeHelper()

static void decomposeHelper ( QString str,
bool  canonical,
QChar::UnicodeVersion  version,
int  from 
)
static

Definition at line 1698 of file qchar.cpp.

Referenced by qt_string_normalize().

1699 {
1700  unsigned short buffer[3];
1701 
1702  QString &s = *str;
1703 
1704  const unsigned short *utf16 = reinterpret_cast<unsigned short *>(s.data());
1705  const unsigned short *uc = utf16 + s.length();
1706  while (uc != utf16 + from) {
1707  uint ucs4 = *(--uc);
1708  if (QChar(ucs4).isLowSurrogate() && uc != utf16) {
1709  ushort high = *(uc - 1);
1710  if (QChar(high).isHighSurrogate()) {
1711  --uc;
1712  ucs4 = QChar::surrogateToUcs4(high, ucs4);
1713  }
1714  }
1716  if (v == QChar::Unicode_Unassigned || v > version)
1717  continue;
1718  int length;
1719  int tag;
1720  const unsigned short *d = decompositionHelper(ucs4, &length, &tag, buffer);
1721  if (!d || (canonical && tag != QChar::Canonical))
1722  continue;
1723 
1724  int pos = uc - utf16;
1725  s.replace(pos, QChar::requiresSurrogates(ucs4) ? 2 : 1, reinterpret_cast<const QChar *>(d), length);
1726  // since the insert invalidates the pointers and we do decomposition recursive
1727  utf16 = reinterpret_cast<unsigned short *>(s.data());
1728  uc = utf16 + pos + length;
1729  }
1730 }
double d
Definition: qnumeric_p.h:62
QString & replace(int i, int len, QChar after)
Definition: qstring.cpp:2005
int length() const
Returns the number of characters in this string.
Definition: qstring.h:696
bool isLowSurrogate() const
Returns true if the QChar is the low part of a utf16 surrogate (ie.
Definition: qchar.h:279
The QString class provides a Unicode character string.
Definition: qstring.h:83
bool isHighSurrogate() const
Returns true if the QChar is the high part of a utf16 surrogate (ie.
Definition: qchar.h:276
The QChar class provides a 16-bit Unicode character.
Definition: qchar.h:72
QChar * data()
Returns a pointer to the data stored in the QString.
Definition: qstring.h:710
UnicodeVersion unicodeVersion() const
Returns the Unicode version that introduced this character.
Definition: qchar.cpp:1189
static bool requiresSurrogates(uint ucs4)
Returns true if the UCS-4-encoded character specified by ucs4 can be split into the high and low part...
Definition: qchar.h:294
unsigned int uint
Definition: qglobal.h:996
UnicodeVersion
Specifies which version of the [Unicode standard](http://www.
Definition: qchar.h:212
unsigned short ushort
Definition: qglobal.h:995
static uint surrogateToUcs4(ushort high, ushort low)
Converts a UTF16 surrogate pair with the given high and low values to its UCS-4 code point...
Definition: qchar.h:297
static const unsigned short *QT_FASTCALL decompositionHelper(uint ucs4, int *length, int *tag, unsigned short *buffer)
Definition: qchar.cpp:1068

◆ decompositionHelper()

static const unsigned short* QT_FASTCALL decompositionHelper ( uint  ucs4,
int *  length,
int *  tag,
unsigned short *  buffer 
)
static

Definition at line 1068 of file qchar.cpp.

Referenced by decomposeHelper(), and QChar::decomposition().

1069 {
1070  *length = 0;
1071  if (ucs4 > UNICODE_LAST_CODEPOINT)
1072  return 0;
1073  if (ucs4 >= Hangul_SBase && ucs4 < Hangul_SBase + Hangul_SCount) {
1074  int SIndex = ucs4 - Hangul_SBase;
1075  buffer[0] = Hangul_LBase + SIndex / Hangul_NCount; // L
1076  buffer[1] = Hangul_VBase + (SIndex % Hangul_NCount) / Hangul_TCount; // V
1077  buffer[2] = Hangul_TBase + SIndex % Hangul_TCount; // T
1078  *length = buffer[2] == Hangul_TBase ? 2 : 3;
1079  *tag = QChar::Canonical;
1080  return buffer;
1081  }
1082 
1083  const unsigned short index = GET_DECOMPOSITION_INDEX(ucs4);
1084  if (index == 0xffff)
1085  return 0;
1086  const unsigned short *decomposition = uc_decomposition_map+index;
1087  *tag = (*decomposition) & 0xff;
1088  *length = (*decomposition) >> 8;
1089  return decomposition+1;
1090 }
#define GET_DECOMPOSITION_INDEX(ucs4)
#define UNICODE_LAST_CODEPOINT
quint16 index
static const unsigned short uc_decomposition_map[]

◆ foldCase() [1/3]

static uint foldCase ( const ushort ch,
const ushort start 
)
inlinestatic

Definition at line 1380 of file qchar.cpp.

Referenced by bm_find(), bm_init_skiptable(), QString::endsWith(), QStringRef::endsWith(), findChar(), lastIndexOfHelper(), qFindString(), qt_ends_with(), qt_last_index_of(), qt_starts_with(), qt_string_count(), QString::remove(), QString::replace(), QString::startsWith(), QStringRef::startsWith(), QString::toCaseFolded(), and ucstricmp().

1381 {
1382  uint c = *ch;
1383  if (QChar(c).isLowSurrogate() && ch > start && QChar(*(ch - 1)).isHighSurrogate())
1384  c = QChar::surrogateToUcs4(*(ch - 1), c);
1385  return *ch + qGetProp(c)->caseFoldDiff;
1386 }
unsigned char c[8]
Definition: qnumeric_p.h:62
The QChar class provides a 16-bit Unicode character.
Definition: qchar.h:72
static const QUnicodeTables::Properties * qGetProp(uint ucs4)
unsigned int uint
Definition: qglobal.h:996
static uint surrogateToUcs4(ushort high, ushort low)
Converts a UTF16 surrogate pair with the given high and low values to its UCS-4 code point...
Definition: qchar.h:297

◆ foldCase() [2/3]

static uint foldCase ( uint  ch,
uint last 
)
inlinestatic

Definition at line 1388 of file qchar.cpp.

1389 {
1390  uint c = ch;
1391  if (QChar(c).isLowSurrogate() && QChar(last).isHighSurrogate())
1392  c = QChar::surrogateToUcs4(last, c);
1393  last = ch;
1394  return ch + qGetProp(c)->caseFoldDiff;
1395 }
unsigned char c[8]
Definition: qnumeric_p.h:62
The QChar class provides a 16-bit Unicode character.
Definition: qchar.h:72
static const QUnicodeTables::Properties * qGetProp(uint ucs4)
unsigned int uint
Definition: qglobal.h:996
static uint surrogateToUcs4(ushort high, ushort low)
Converts a UTF16 surrogate pair with the given high and low values to its UCS-4 code point...
Definition: qchar.h:297

◆ foldCase() [3/3]

static ushort foldCase ( ushort  ch)
inlinestatic

Definition at line 1397 of file qchar.cpp.

1398 {
1399  return ch + qGetProp(ch)->caseFoldDiff;
1400 }
static const QUnicodeTables::Properties * qGetProp(uint ucs4)

◆ ligatureHelper()

static ushort ligatureHelper ( ushort  u1,
ushort  u2 
)
static

Definition at line 1743 of file qchar.cpp.

Referenced by composeHelper().

1744 {
1745  // hangul L-V pair
1746  int LIndex = u1 - Hangul_LBase;
1747  if (0 <= LIndex && LIndex < Hangul_LCount) {
1748  int VIndex = u2 - Hangul_VBase;
1749  if (0 <= VIndex && VIndex < Hangul_VCount)
1750  return Hangul_SBase + (LIndex * Hangul_VCount + VIndex) * Hangul_TCount;
1751  }
1752 
1753  // hangul LV-T pair
1754  int SIndex = u1 - Hangul_SBase;
1755  if (0 <= SIndex && SIndex < Hangul_SCount && (SIndex % Hangul_TCount) == 0) {
1756  int TIndex = u2 - Hangul_TBase;
1757  if (0 <= TIndex && TIndex <= Hangul_TCount)
1758  return u1 + TIndex;
1759  }
1760 
1761  const unsigned short index = GET_LIGATURE_INDEX(u2);
1762  if (index == 0xffff)
1763  return 0;
1764  const unsigned short *ligatures = uc_ligature_map+index;
1765  ushort length = *ligatures++;
1766  {
1767  const UCS2Pair *data = reinterpret_cast<const UCS2Pair *>(ligatures);
1768  const UCS2Pair *r = qBinaryFind(data, data + length, u1);
1769  if (r != data + length)
1770  return r->u2;
1771  }
1772 
1773  return 0;
1774 }
Q_OUTOFLINE_TEMPLATE RandomAccessIterator qBinaryFind(RandomAccessIterator begin, RandomAccessIterator end, const T &value)
Definition: qalgorithms.h:295
#define GET_LIGATURE_INDEX(u2)
static const char * data(const QByteArray &arr)
ushort u2
Definition: qchar.cpp:1735
unsigned short ushort
Definition: qglobal.h:995
quint16 index
static const unsigned short uc_ligature_map[]

◆ operator<() [1/2]

bool operator< ( ushort  u1,
const UCS2Pair ligature 
)
inline

Definition at line 1738 of file qchar.cpp.

1739 { return u1 < ligature.u1; }
ushort u1
Definition: qchar.cpp:1734

◆ operator<() [2/2]

bool operator< ( const UCS2Pair ligature,
ushort  u1 
)
inline

Definition at line 1740 of file qchar.cpp.

1741 { return ligature.u1 < u1; }
ushort u1
Definition: qchar.cpp:1734