Qt 4.8
qcompressedwhitespace.cpp
Go to the documentation of this file.
1 /****************************************************************************
2 **
3 ** Copyright (C) 2014 Digia Plc and/or its subsidiary(-ies).
4 ** Contact: http://www.qt-project.org/legal
5 **
6 ** This file is part of the QtXmlPatterns module of the Qt Toolkit.
7 **
8 ** $QT_BEGIN_LICENSE:LGPL$
9 ** Commercial License Usage
10 ** Licensees holding valid commercial Qt licenses may use this file in
11 ** accordance with the commercial license agreement provided with the
12 ** Software or, alternatively, in accordance with the terms contained in
13 ** a written agreement between you and Digia. For licensing terms and
14 ** conditions see http://qt.digia.com/licensing. For further information
15 ** use the contact form at http://qt.digia.com/contact-us.
16 **
17 ** GNU Lesser General Public License Usage
18 ** Alternatively, this file may be used under the terms of the GNU Lesser
19 ** General Public License version 2.1 as published by the Free Software
20 ** Foundation and appearing in the file LICENSE.LGPL included in the
21 ** packaging of this file. Please review the following information to
22 ** ensure the GNU Lesser General Public License version 2.1 requirements
23 ** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
24 **
25 ** In addition, as a special exception, Digia gives you certain additional
26 ** rights. These rights are described in the Digia Qt LGPL Exception
27 ** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
28 **
29 ** GNU General Public License Usage
30 ** Alternatively, this file may be used under the terms of the GNU
31 ** General Public License version 3.0 as published by the Free Software
32 ** Foundation and appearing in the file LICENSE.GPL included in the
33 ** packaging of this file. Please review the following information to
34 ** ensure the GNU General Public License version 3.0 requirements will be
35 ** met: http://www.gnu.org/copyleft/gpl.html.
36 **
37 **
38 ** $QT_END_LICENSE$
39 **
40 ****************************************************************************/
41 
42 #include <QString>
43 
45 
47 
48 using namespace QPatternist;
49 
51 {
52  switch(ch.unicode())
53  {
54  case ' ':
55  return Space;
56  case '\n':
57  return LF;
58  case '\r':
59  return CR;
60  case '\t':
61  return Tab;
62  default:
63  {
64  Q_ASSERT_X(false, Q_FUNC_INFO,
65  "The caller must guarantee only whitespace is passed.");
66  return Tab;
67  }
68  }
69 }
70 
71 bool CompressedWhitespace::isEven(const int number)
72 {
73  Q_ASSERT(number >= 0);
74  return number % 2 == 0;
75 }
76 
78 {
79  Q_ASSERT(len > 0);
80  Q_ASSERT(len <= MaxCharCount);
81 
82  return len + toIdentifier(ch);
83 }
84 
86 {
87  switch(id)
88  {
89  case Space: return QLatin1Char(' ');
90  case CR: return QLatin1Char('\r');
91  case LF: return QLatin1Char('\n');
92  case Tab: return QLatin1Char('\t');
93  default:
94  {
95  Q_ASSERT_X(false, Q_FUNC_INFO, "Unexpected input");
96  return QChar();
97  }
98  }
99 }
100 
102 {
103  Q_ASSERT(!isEven(1) && isEven(0) && isEven(2));
104  Q_ASSERT(!input.isEmpty());
105 
106  QString result;
107  const int len = input.length();
108 
109  /* The amount of compressed characters. For instance, if input is
110  * four spaces followed by one tab, compressedChars will be 2, and the resulting
111  * QString will have a length of 1, two compressedChars stored in one QChar. */
112  int compressedChars = 0;
113 
114  for(int i = 0; i < len; ++i)
115  {
116  const QChar c(input.at(i));
117 
118  int start = i;
119 
120  while(true)
121  {
122  if(i + 1 == input.length() || input.at(i + 1) != c)
123  break;
124  else
125  ++i;
126  }
127 
128  /* The length of subsequent whitespace characters in the input. */
129  int wsLen = (i - start) + 1;
130 
131  /* We might get a sequence of whitespace that is so long, that we can't
132  * store it in one unit/byte. In that case we chop it into as many subsequent
133  * ones that is needed. */
134  while(true)
135  {
136  const int unitLength = qMin(wsLen, int(MaxCharCount));
137  wsLen -= unitLength;
138 
139  ushort resultCP = toCompressedChar(c, unitLength);
140 
141  if(isEven(compressedChars))
142  result += QChar(resultCP);
143  else
144  {
145  resultCP = resultCP << 8;
146  resultCP |= result.at(result.size() - 1).unicode();
147  result[result.size() - 1] = resultCP;
148  }
149 
150  ++compressedChars;
151 
152  if(wsLen == 0)
153  break;
154  }
155  }
156 
157  return result;
158 }
159 
161 {
162  Q_ASSERT(!input.isEmpty());
163  const int len = input.length() * 2;
164  QString retval;
165 
166  for(int i = 0; i < len; ++i)
167  {
168  ushort cp = input.at(i / 2).unicode();
169 
170  if(isEven(i))
171  cp &= Lower8Bits;
172  else
173  {
174  cp = cp >> 8;
175 
176  if(cp == 0)
177  return retval;
178  }
179 
180  const quint8 wsLen = cp & Lower6Bits;
181  const quint8 id = cp & UpperTwoBits;
182 
183  /* Resize retval, and fill in on the top. */
184  const int oldSize = retval.size();
185  const int newSize = retval.size() + wsLen;
186  retval.resize(newSize);
187  const QChar ch(toChar(CharIdentifier(id)));
188 
189  for(int f = oldSize; f < newSize; ++f)
190  retval[f] = ch;
191  }
192 
193  return retval;
194 }
195 
197 
unsigned char c[8]
Definition: qnumeric_p.h:62
Q_DECL_CONSTEXPR const T & qMin(const T &a, const T &b)
Definition: qglobal.h:1215
#define QT_END_NAMESPACE
This macro expands to.
Definition: qglobal.h:90
int length() const
Returns the number of characters referred to by the string reference.
Definition: qstring.h:1116
const QChar at(int i) const
Returns the character at the given index position in the string.
Definition: qstring.h:698
ushort unicode() const
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition: qchar.h:251
int length() const
Returns the number of characters in this string.
Definition: qstring.h:696
static quint8 toCompressedChar(const QChar ch, const int len)
unsigned char quint8
Definition: qglobal.h:934
The QString class provides a Unicode character string.
Definition: qstring.h:83
#define Q_ASSERT(cond)
Definition: qglobal.h:1823
The QChar class provides a 16-bit Unicode character.
Definition: qchar.h:72
#define QT_BEGIN_NAMESPACE
This macro expands to.
Definition: qglobal.h:89
int size() const
Returns the number of characters in this string.
Definition: qstring.h:102
bool isEmpty() const
Returns true if the string has no characters; otherwise returns false.
Definition: qstring.h:704
static QString compress(const QStringRef &input)
Compresses input into a compressed format, returned as a QString.
The namespace for the internal API of QtXmlPatterns.
bool isEmpty() const
Returns true if the string reference has no characters; otherwise returns false.
Definition: qstring.h:1169
The QStringRef class provides a thin wrapper around QString substrings.
Definition: qstring.h:1099
void resize(int size)
Sets the size of the string to size characters.
Definition: qstring.cpp:1353
static QChar toChar(const CharIdentifier id)
static bool isEven(const int number)
Returns true if number is an even number, otherwise false.
#define Q_ASSERT_X(cond, where, what)
Definition: qglobal.h:1837
unsigned short ushort
Definition: qglobal.h:995
static QString decompress(const QString &input)
Decompresses input into a usual QString.
static CharIdentifier toIdentifier(const QChar ch)
The QLatin1Char class provides an 8-bit ASCII/Latin-1 character.
Definition: qchar.h:55
const QChar at(int i) const
Returns the character at the given index position in the string reference.
Definition: qstring.h:1174
#define Q_FUNC_INFO
Definition: qglobal.h:1871