Qt 4.8
qbytearraymatcher.cpp
Go to the documentation of this file.
1 /****************************************************************************
2 **
3 ** Copyright (C) 2014 Digia Plc and/or its subsidiary(-ies).
4 ** Contact: http://www.qt-project.org/legal
5 **
6 ** This file is part of the QtCore module of the Qt Toolkit.
7 **
8 ** $QT_BEGIN_LICENSE:LGPL$
9 ** Commercial License Usage
10 ** Licensees holding valid commercial Qt licenses may use this file in
11 ** accordance with the commercial license agreement provided with the
12 ** Software or, alternatively, in accordance with the terms contained in
13 ** a written agreement between you and Digia. For licensing terms and
14 ** conditions see http://qt.digia.com/licensing. For further information
15 ** use the contact form at http://qt.digia.com/contact-us.
16 **
17 ** GNU Lesser General Public License Usage
18 ** Alternatively, this file may be used under the terms of the GNU Lesser
19 ** General Public License version 2.1 as published by the Free Software
20 ** Foundation and appearing in the file LICENSE.LGPL included in the
21 ** packaging of this file. Please review the following information to
22 ** ensure the GNU Lesser General Public License version 2.1 requirements
23 ** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
24 **
25 ** In addition, as a special exception, Digia gives you certain additional
26 ** rights. These rights are described in the Digia Qt LGPL Exception
27 ** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
28 **
29 ** GNU General Public License Usage
30 ** Alternatively, this file may be used under the terms of the GNU
31 ** General Public License version 3.0 as published by the Free Software
32 ** Foundation and appearing in the file LICENSE.GPL included in the
33 ** packaging of this file. Please review the following information to
34 ** ensure the GNU General Public License version 3.0 requirements will be
35 ** met: http://www.gnu.org/copyleft/gpl.html.
36 **
37 **
38 ** $QT_END_LICENSE$
39 **
40 ****************************************************************************/
41 
42 #include "qbytearraymatcher.h"
43 
44 #include <limits.h>
45 
47 
48 static inline void bm_init_skiptable(const uchar *cc, int len, uchar *skiptable)
49 {
50  int l = qMin(len, 255);
51  memset(skiptable, l, 256*sizeof(uchar));
52  cc += len - l;
53  while (l--)
54  skiptable[*cc++] = l;
55 }
56 
57 static inline int bm_find(const uchar *cc, int l, int index, const uchar *puc, uint pl,
58  const uchar *skiptable)
59 {
60  if (pl == 0)
61  return index > l ? -1 : index;
62  const uint pl_minus_one = pl - 1;
63 
64  register const uchar *current = cc + index + pl_minus_one;
65  const uchar *end = cc + l;
66  while (current < end) {
67  uint skip = skiptable[*current];
68  if (!skip) {
69  // possible match
70  while (skip < pl) {
71  if (*(current - skip) != puc[pl_minus_one - skip])
72  break;
73  skip++;
74  }
75  if (skip > pl_minus_one) // we have a match
76  return (current - cc) - skip + 1;
77 
78  // in case we don't have a match we are a bit inefficient as we only skip by one
79  // when we have the non matching char in the string.
80  if (skiptable[*(current - skip)] == pl)
81  skip = pl - skip;
82  else
83  skip = 1;
84  }
85  if (current > end - skip)
86  break;
87  current += skip;
88  }
89  return -1; // not found
90 }
91 
123  : d(0)
124 {
125  p.p = 0;
126  p.l = 0;
127  qMemSet(p.q_skiptable, 0, sizeof(p.q_skiptable));
128 }
129 
136  : d(0)
137 {
138  p.p = reinterpret_cast<const uchar *>(pattern);
139  p.l = length;
141 }
142 
148  : d(0), q_pattern(pattern)
149 {
150  p.p = reinterpret_cast<const uchar *>(pattern.constData());
151  p.l = pattern.size();
153 }
154 
159  : d(0)
160 {
161  operator=(other);
162 }
163 
168 {
169 }
170 
175 {
176  q_pattern = other.q_pattern;
177  memcpy(&p, &other.p, sizeof(p));
178  return *this;
179 }
180 
188 {
189  q_pattern = pattern;
190  p.p = reinterpret_cast<const uchar *>(pattern.constData());
191  p.l = pattern.size();
193 }
194 
202 int QByteArrayMatcher::indexIn(const QByteArray &ba, int from) const
203 {
204  if (from < 0)
205  from = 0;
206  return bm_find(reinterpret_cast<const uchar *>(ba.constData()), ba.size(), from,
207  p.p, p.l, p.q_skiptable);
208 }
209 
217 int QByteArrayMatcher::indexIn(const char *str, int len, int from) const
218 {
219  if (from < 0)
220  from = 0;
221  return bm_find(reinterpret_cast<const uchar *>(str), len, from,
222  p.p, p.l, p.q_skiptable);
223 }
224 
238 static int findChar(const char *str, int len, char ch, int from)
239 {
240  const uchar *s = (const uchar *)str;
241  uchar c = (uchar)ch;
242  if (from < 0)
243  from = qMax(from + len, 0);
244  if (from < len) {
245  const uchar *n = s + from - 1;
246  const uchar *e = s + len;
247  while (++n != e)
248  if (*n == c)
249  return n - s;
250  }
251  return -1;
252 }
253 
257  const char *haystack, int haystackLen, int haystackOffset,
258  const char *needle, int needleLen)
259 {
260  uchar skiptable[256];
261  bm_init_skiptable((const uchar *)needle, needleLen, skiptable);
262  if (haystackOffset < 0)
263  haystackOffset = 0;
264  return bm_find((const uchar *)haystack, haystackLen, haystackOffset,
265  (const uchar *)needle, needleLen, skiptable);
266 }
267 
268 #define REHASH(a) \
269  if (sl_minus_1 < sizeof(uint) * CHAR_BIT) \
270  hashHaystack -= (a) << sl_minus_1; \
271  hashHaystack <<= 1
272 
276  const char *haystack0, int haystackLen, int from,
277  const char *needle, int needleLen)
278 {
279  const int l = haystackLen;
280  const int sl = needleLen;
281  if (from < 0)
282  from += l;
283  if (uint(sl + from) > (uint)l)
284  return -1;
285  if (!sl)
286  return from;
287  if (!l)
288  return -1;
289 
290  if (sl == 1)
291  return findChar(haystack0, haystackLen, needle[0], from);
292 
293  /*
294  We use the Boyer-Moore algorithm in cases where the overhead
295  for the skip table should pay off, otherwise we use a simple
296  hash function.
297  */
298  if (l > 500 && sl > 5)
299  return qFindByteArrayBoyerMoore(haystack0, haystackLen, from,
300  needle, needleLen);
301 
302  /*
303  We use some hashing for efficiency's sake. Instead of
304  comparing strings, we compare the hash value of str with that
305  of a part of this QString. Only if that matches, we call memcmp().
306  */
307  const char *haystack = haystack0 + from;
308  const char *end = haystack0 + (l - sl);
309  const uint sl_minus_1 = sl - 1;
310  uint hashNeedle = 0, hashHaystack = 0;
311  int idx;
312  for (idx = 0; idx < sl; ++idx) {
313  hashNeedle = ((hashNeedle<<1) + needle[idx]);
314  hashHaystack = ((hashHaystack<<1) + haystack[idx]);
315  }
316  hashHaystack -= *(haystack + sl_minus_1);
317 
318  while (haystack <= end) {
319  hashHaystack += *(haystack + sl_minus_1);
320  if (hashHaystack == hashNeedle && *needle == *haystack
321  && memcmp(needle, haystack, sl) == 0)
322  return haystack - haystack0;
323 
324  REHASH(*haystack);
325  ++haystack;
326  }
327  return -1;
328 }
329 
double d
Definition: qnumeric_p.h:62
~QByteArrayMatcher()
Destroys the byte array matcher.
unsigned char c[8]
Definition: qnumeric_p.h:62
Q_DECL_CONSTEXPR const T & qMin(const T &a, const T &b)
Definition: qglobal.h:1215
static int bm_find(const uchar *cc, int l, int index, const uchar *puc, uint pl, const uchar *skiptable)
#define QT_END_NAMESPACE
This macro expands to.
Definition: qglobal.h:90
The QByteArray class provides an array of bytes.
Definition: qbytearray.h:135
void setPattern(const QByteArray &pattern)
Sets the byte array that this byte array matcher will search for to pattern.
QByteArrayMatcher & operator=(const QByteArrayMatcher &other)
Assigns the other byte array matcher to this byte array matcher.
static int findChar(const char *str, int len, char ch, int from)
static void bm_init_skiptable(const uchar *cc, int len, uchar *skiptable)
Q_DECL_CONSTEXPR const T & qMax(const T &a, const T &b)
Definition: qglobal.h:1217
QByteArrayMatcher()
Constructs an empty byte array matcher that won&#39;t match anything.
unsigned char uchar
Definition: qglobal.h:994
#define QT_BEGIN_NAMESPACE
This macro expands to.
Definition: qglobal.h:89
QByteArrayMatcherPrivate * d
unsigned int uint
Definition: qglobal.h:996
static int qFindByteArrayBoyerMoore(const char *haystack, int haystackLen, int haystackOffset, const char *needle, int needleLen)
The QByteArrayMatcher class holds a sequence of bytes that can be quickly matched in a byte array...
int length() const
Same as size().
Definition: qbytearray.h:356
const char * constData() const
Returns a pointer to the data stored in the byte array.
Definition: qbytearray.h:433
void * qMemSet(void *dest, int c, size_t n)
Definition: qglobal.cpp:2509
int qFindByteArray(const char *haystack0, int haystackLen, int from, const char *needle, int needleLen)
QFactoryLoader * l
int size() const
Returns the number of bytes in this byte array.
Definition: qbytearray.h:402
quint16 index
QByteArray pattern() const
Returns the byte array pattern that this byte array matcher will search for.
static const KeyPair *const end
int indexIn(const QByteArray &ba, int from=0) const
Searches the byte array ba, from byte position from (default 0, i.e.
#define REHASH(a)