Qt 4.8
Classes | Public Functions | Public Variables | Private Types | Private Functions | Properties | Friends | List of all members
QRegExpEngine Class Reference

Classes

class  Box
 

Public Functions

void addAnchors (int from, int to, int a)
 
void addCatTransitions (const QVector< int > &from, const QVector< int > &to)
 
void addPlusTransitions (const QVector< int > &from, const QVector< int > &to, int atom)
 
int anchorAlternation (int a, int b)
 
int anchorConcatenation (int a, int b)
 
int captureCount () const
 
int createState (QChar ch)
 
int createState (const QRegExpCharClass &cc)
 
int createState (int bref)
 
void dump () const
 
const QStringerrorString () const
 
void heuristicallyChooseHeuristic ()
 
bool isValid () const
 
 QRegExpEngine (Qt::CaseSensitivity cs, bool greedyQuantifiers)
 
 QRegExpEngine (const QRegExpEngineKey &key)
 
 ~QRegExpEngine ()
 

Public Variables

QAtomicInt ref
 

Private Types

enum  { CharClassBit = 0x10000, BackRefBit = 0x20000 }
 
enum  { InitialState = 0, FinalState = 1 }
 
enum  { MaxLookaheads = 13, MaxBackRefs = 14 }
 
enum  {
  Anchor_Dollar = 0x00000001, Anchor_Caret = 0x00000002, Anchor_Word = 0x00000004, Anchor_NonWord = 0x00000008,
  Anchor_FirstLookahead = 0x00000010, Anchor_BackRef1Empty = Anchor_FirstLookahead << MaxLookaheads, Anchor_BackRef0Empty = Anchor_BackRef1Empty >> 1, Anchor_Alternation = unsigned(Anchor_BackRef1Empty) << MaxBackRefs,
  Anchor_LookaheadMask
}
 
enum  {
  Tok_Eos, Tok_Dollar, Tok_LeftParen, Tok_MagicLeftParen,
  Tok_PosLookahead, Tok_NegLookahead, Tok_RightParen, Tok_CharClass,
  Tok_Caret, Tok_Quantifier, Tok_Bar, Tok_Word,
  Tok_NonWord, Tok_Char = 0x10000, Tok_BackRef = 0x20000
}
 

Private Functions

int addLookahead (QRegExpEngine *eng, bool negative)
 
bool badCharMatch (QRegExpMatchState &matchState) const
 
void error (const char *msg)
 
void finishAtom (int atom, bool needCapture)
 
int getChar ()
 
int getEscape ()
 
int getRep (int def)
 
int getToken ()
 
bool goodStringMatch (QRegExpMatchState &matchState) const
 
int parse (const QChar *rx, int len)
 
void parseAtom (Box *box)
 
void parseExpression (Box *box)
 
void parseFactor (Box *box)
 
void parseTerm (Box *box)
 
void setup ()
 
void setupCategoriesRangeMap ()
 
int setupState (int match)
 
void skipChars (int n)
 
int startAtom (bool officialCapture)
 
void startTokenizer (const QChar *rx, int len)
 

Properties

QVector< QRegExpAnchorAlternationaa
 
QVector< QRegExpLookahead * > ahead
 
QVector< int > captureForOfficialCapture
 
bool caretAnchored
 
QHash< QByteArray, QPair< int, int > > categoriesRangeMap
 
int cf
 
QVector< QRegExpCharClasscl
 
Qt::CaseSensitivity cs
 
QVector< QRegExpAtomf
 
int goodEarlyStart
 
int goodLateStart
 
QString goodStr
 
bool greedyQuantifiers
 
int minl
 
int nbrefs
 
int ncap
 
int nf
 
QVector< int > occ1
 
int officialncap
 
QVector< QRegExpAutomatonStates
 
bool trivial
 
bool useGoodStringHeuristic
 
bool valid
 
bool xmlSchemaExtensions
 
int yyCh
 
QScopedPointer< QRegExpCharClassyyCharClass
 
QString yyError
 
const QCharyyIn
 
int yyLen
 
int yyMaxRep
 
bool yyMayCapture
 
int yyMinRep
 
int yyPos
 
int yyPos0
 
int yyTok
 

Friends

class Box
 
struct QRegExpMatchState
 

Detailed Description

Definition at line 1074 of file qregexp.cpp.

Enumerations

◆ anonymous enum

anonymous enum
private
Enumerator
CharClassBit 
BackRefBit 

Definition at line 1118 of file qregexp.cpp.

◆ anonymous enum

anonymous enum
private
Enumerator
InitialState 
FinalState 

Definition at line 1119 of file qregexp.cpp.

◆ anonymous enum

anonymous enum
private
Enumerator
MaxLookaheads 
MaxBackRefs 

Definition at line 1128 of file qregexp.cpp.

◆ anonymous enum

anonymous enum
private
Enumerator
Anchor_Dollar 
Anchor_Caret 
Anchor_Word 
Anchor_NonWord 
Anchor_FirstLookahead 
Anchor_BackRef1Empty 
Anchor_BackRef0Empty 
Anchor_Alternation 
Anchor_LookaheadMask 

Definition at line 1129 of file qregexp.cpp.

◆ anonymous enum

anonymous enum
private

Constructors and Destructors

◆ QRegExpEngine() [1/2]

QRegExpEngine::QRegExpEngine ( Qt::CaseSensitivity  cs,
bool  greedyQuantifiers 
)
inline

Definition at line 1077 of file qregexp.cpp.

Referenced by parseAtom(), and prepareEngine_helper().

Qt::CaseSensitivity cs
Definition: qregexp.cpp:1176
bool greedyQuantifiers
Definition: qregexp.cpp:1177
void setup()
Definition: qregexp.cpp:1706

◆ QRegExpEngine() [2/2]

QRegExpEngine::QRegExpEngine ( const QRegExpEngineKey key)

Definition at line 1343 of file qregexp.cpp.

1346 {
1347  setup();
1348 
1350 
1351  valid = (parse(rx.unicode(), rx.length()) == rx.length());
1352  if (!valid) {
1353 #ifndef QT_NO_REGEXP_OPTIM
1354  trivial = false;
1355 #endif
1357  }
1358 }
int parse(const QChar *rx, int len)
Definition: qregexp.cpp:3361
Qt::CaseSensitivity cs
Definition: qregexp.cpp:1176
int length() const
Returns the number of characters in this string.
Definition: qstring.h:696
Qt::CaseSensitivity cs
Definition: qregexp.cpp:876
void error(const char *msg)
Definition: qregexp.cpp:3181
#define RXERR_LEFTDELIM
Definition: qregexp.cpp:72
The QString class provides a Unicode character string.
Definition: qstring.h:83
QRegExp::PatternSyntax patternSyntax
Definition: qregexp.cpp:875
QString pattern
Definition: qregexp.cpp:874
const QChar * unicode() const
Returns a &#39;\0&#39;-terminated Unicode representation of the string.
Definition: qstring.h:706
Q_CORE_EXPORT QString qt_regexp_toCanonical(const QString &pattern, QRegExp::PatternSyntax patternSyntax)
Definition: qregexp.cpp:1323
bool greedyQuantifiers
Definition: qregexp.cpp:1177
bool xmlSchemaExtensions
Definition: qregexp.cpp:1178
void setup()
Definition: qregexp.cpp:1706

◆ ~QRegExpEngine()

QRegExpEngine::~QRegExpEngine ( )

Definition at line 1360 of file qregexp.cpp.

1361 {
1362 #ifndef QT_NO_REGEXP_LOOKAHEAD
1363  qDeleteAll(ahead);
1364 #endif
1365 }
QVector< QRegExpLookahead * > ahead
Definition: qregexp.cpp:1166
Q_OUTOFLINE_TEMPLATE void qDeleteAll(ForwardIterator begin, ForwardIterator end)
Definition: qalgorithms.h:319

Functions

◆ addAnchors()

void QRegExpEngine::addAnchors ( int  from,
int  to,
int  a 
)

Definition at line 1593 of file qregexp.cpp.

Referenced by QRegExpEngine::Box::addAnchorsToEngine().

1594 {
1595  QRegExpAutomatonState &st = s[from];
1596  if (st.anchors.contains(to))
1597  a = anchorAlternation(st.anchors.value(to), a);
1598  st.anchors.insert(to, a);
1599 }
QMap< int, int > anchors
Definition: qregexp.cpp:958
int anchorAlternation(int a, int b)
Definition: qregexp.cpp:1557
long ASN1_INTEGER_get ASN1_INTEGER * a
const T value(const Key &key) const
Returns the value associated with the key key.
Definition: qmap.h:499
iterator insert(const Key &key, const T &value)
Inserts a new item with the key key and a value of value.
Definition: qmap.h:559
#define st(var, type, card)
bool contains(const Key &key) const
Returns true if the map contains an item with key key; otherwise returns false.
Definition: qmap.h:553
QVector< QRegExpAutomatonState > s
Definition: qregexp.cpp:1153

◆ addCatTransitions()

void QRegExpEngine::addCatTransitions ( const QVector< int > &  from,
const QVector< int > &  to 
)

Definition at line 1528 of file qregexp.cpp.

Referenced by QRegExpEngine::Box::cat(), and QRegExpEngine::Box::plus().

1529 {
1530  for (int i = 0; i < from.size(); i++)
1531  mergeInto(&s[from.at(i)].outs, to);
1532 }
static void mergeInto(QVector< int > *a, const QVector< int > &b)
Definition: qregexp.cpp:716
const T & at(int i) const
Returns the item at index position i in the vector.
Definition: qvector.h:350
QVector< QRegExpAutomatonState > s
Definition: qregexp.cpp:1153
int size() const
Returns the number of items in the vector.
Definition: qvector.h:137

◆ addLookahead()

int QRegExpEngine::addLookahead ( QRegExpEngine eng,
bool  negative 
)
private

Definition at line 1769 of file qregexp.cpp.

Referenced by parseAtom().

1770 {
1771  int n = ahead.size();
1772  if (n == MaxLookaheads) {
1773  error(RXERR_LIMIT);
1774  return 0;
1775  }
1776  ahead += new QRegExpLookahead(eng, negative);
1777  return Anchor_FirstLookahead << n;
1778 }
void error(const char *msg)
Definition: qregexp.cpp:3181
QVector< QRegExpLookahead * > ahead
Definition: qregexp.cpp:1166
int size() const
Returns the number of items in the vector.
Definition: qvector.h:137
#define RXERR_LIMIT
Definition: qregexp.cpp:74

◆ addPlusTransitions()

void QRegExpEngine::addPlusTransitions ( const QVector< int > &  from,
const QVector< int > &  to,
int  atom 
)

Definition at line 1535 of file qregexp.cpp.

Referenced by QRegExpEngine::Box::plus().

1536 {
1537  for (int i = 0; i < from.size(); i++) {
1538  QRegExpAutomatonState &st = s[from.at(i)];
1539  const QVector<int> oldOuts = st.outs;
1540  mergeInto(&st.outs, to);
1541  if (f.at(atom).capture != QRegExpAtom::NoCapture) {
1542  for (int j = 0; j < to.size(); j++) {
1543  // ### st.reenter.contains(to.at(j)) check looks suspicious
1544  if (!st.reenter.contains(to.at(j)) &&
1545  qBinaryFind(oldOuts.constBegin(), oldOuts.constEnd(), to.at(j)) == oldOuts.end())
1546  st.reenter.insert(to.at(j), atom);
1547  }
1548  }
1549  }
1550 }
QVector< int > outs
Definition: qregexp.cpp:956
static void mergeInto(QVector< int > *a, const QVector< int > &b)
Definition: qregexp.cpp:716
const_iterator constEnd() const
Returns a const STL-style iterator pointing to the imaginary item after the last item in the vector...
Definition: qvector.h:252
Q_OUTOFLINE_TEMPLATE RandomAccessIterator qBinaryFind(RandomAccessIterator begin, RandomAccessIterator end, const T &value)
Definition: qalgorithms.h:295
iterator end()
Returns an STL-style iterator pointing to the imaginary item after the last item in the vector...
Definition: qvector.h:250
const_iterator constBegin() const
Returns a const STL-style iterator pointing to the first item in the vector.
Definition: qvector.h:249
const T & at(int i) const
Returns the item at index position i in the vector.
Definition: qvector.h:350
iterator insert(const Key &key, const T &value)
Inserts a new item with the key key and a value of value.
Definition: qmap.h:559
#define st(var, type, card)
bool contains(const Key &key) const
Returns true if the map contains an item with key key; otherwise returns false.
Definition: qmap.h:553
QVector< QRegExpAtom > f
Definition: qregexp.cpp:1155
QVector< QRegExpAutomatonState > s
Definition: qregexp.cpp:1153
int size() const
Returns the number of items in the vector.
Definition: qvector.h:137
QMap< int, int > reenter
Definition: qregexp.cpp:957

◆ anchorAlternation()

int QRegExpEngine::anchorAlternation ( int  a,
int  b 
)

Definition at line 1557 of file qregexp.cpp.

Referenced by addAnchors(), anchorConcatenation(), and QRegExpEngine::Box::orx().

1558 {
1559  if (((a & b) == a || (a & b) == b) && ((a | b) & Anchor_Alternation) == 0)
1560  return a & b;
1561 
1562  int n = aa.size();
1563 #ifndef QT_NO_REGEXP_OPTIM
1564  if (n > 0 && aa.at(n - 1).a == a && aa.at(n - 1).b == b)
1565  return Anchor_Alternation | (n - 1);
1566 #endif
1567 
1568  QRegExpAnchorAlternation element = {a, b};
1569  aa.append(element);
1570  return Anchor_Alternation | n;
1571 }
long ASN1_INTEGER_get ASN1_INTEGER * a
void append(const T &t)
Inserts value at the end of the vector.
Definition: qvector.h:573
const T & at(int i) const
Returns the item at index position i in the vector.
Definition: qvector.h:350
QVector< QRegExpAnchorAlternation > aa
Definition: qregexp.cpp:1169
int size() const
Returns the number of items in the vector.
Definition: qvector.h:137

◆ anchorConcatenation()

int QRegExpEngine::anchorConcatenation ( int  a,
int  b 
)

Definition at line 1576 of file qregexp.cpp.

Referenced by QRegExpEngine::Box::addAnchorsToEngine(), QRegExpEngine::Box::cat(), and QRegExpEngine::Box::catAnchor().

1577 {
1578  if (((a | b) & Anchor_Alternation) == 0)
1579  return a | b;
1580  if ((b & Anchor_Alternation) != 0)
1581  qSwap(a, b);
1582 
1583  int aprime = anchorConcatenation(aa.at(a ^ Anchor_Alternation).a, b);
1584  int bprime = anchorConcatenation(aa.at(a ^ Anchor_Alternation).b, b);
1585  return anchorAlternation(aprime, bprime);
1586 }
int anchorAlternation(int a, int b)
Definition: qregexp.cpp:1557
long ASN1_INTEGER_get ASN1_INTEGER * a
int anchorConcatenation(int a, int b)
Definition: qregexp.cpp:1576
void qSwap(T &value1, T &value2)
Definition: qglobal.h:2181
const T & at(int i) const
Returns the item at index position i in the vector.
Definition: qvector.h:350
QVector< QRegExpAnchorAlternation > aa
Definition: qregexp.cpp:1169

◆ badCharMatch()

bool QRegExpEngine::badCharMatch ( QRegExpMatchState matchState) const
private

Definition at line 1892 of file qregexp.cpp.

1893 {
1894  int slideHead = 0;
1895  int slideNext = 0;
1896  int i;
1897  int lastPos = matchState.len - minl;
1898  memset(matchState.slideTab, 0, matchState.slideTabSize * sizeof(int));
1899 
1900  /*
1901  Set up the slide table, used for the bad-character heuristic,
1902  using the table of first occurrence of each character.
1903  */
1904  for (i = 0; i < minl; i++) {
1905  int sk = occ1[BadChar(matchState.in[matchState.pos + i])];
1906  if (sk == NoOccurrence)
1907  sk = i + 1;
1908  if (sk > 0) {
1909  int k = i + 1 - sk;
1910  if (k < 0) {
1911  sk = i + 1;
1912  k = 0;
1913  }
1914  if (sk > matchState.slideTab[k])
1915  matchState.slideTab[k] = sk;
1916  }
1917  }
1918 
1919  if (matchState.pos > lastPos)
1920  return false;
1921 
1922  for (;;) {
1923  if (++slideNext >= matchState.slideTabSize)
1924  slideNext = 0;
1925  if (matchState.slideTab[slideHead] > 0) {
1926  if (matchState.slideTab[slideHead] - 1 > matchState.slideTab[slideNext])
1927  matchState.slideTab[slideNext] = matchState.slideTab[slideHead] - 1;
1928  matchState.slideTab[slideHead] = 0;
1929  } else {
1930  if (matchState.matchHere())
1931  return true;
1932  }
1933 
1934  if (matchState.pos == lastPos)
1935  break;
1936 
1937  /*
1938  Update the slide table. This code has much in common with
1939  the initialization code.
1940  */
1941  int sk = occ1[BadChar(matchState.in[matchState.pos + minl])];
1942  if (sk == NoOccurrence) {
1943  matchState.slideTab[slideNext] = minl;
1944  } else if (sk > 0) {
1945  int k = slideNext + minl - sk;
1946  if (k >= matchState.slideTabSize)
1947  k -= matchState.slideTabSize;
1948  if (sk > matchState.slideTab[k])
1949  matchState.slideTab[k] = sk;
1950  }
1951  slideHead = slideNext;
1952  ++matchState.pos;
1953  }
1954  return false;
1955 }
QVector< int > occ1
Definition: qregexp.cpp:1191
#define BadChar(ch)
Definition: qregexp.cpp:699
const QChar * in
Definition: qregexp.cpp:904
const int NoOccurrence
Definition: qregexp.cpp:701

◆ captureCount()

int QRegExpEngine::captureCount ( ) const
inline

Definition at line 1085 of file qregexp.cpp.

Referenced by QRegExp::numCaptures(), and QRegExpMatchState::prepareForMatch().

1085 { return officialncap; }

◆ createState() [1/3]

int QRegExpEngine::createState ( QChar  ch)

Definition at line 1489 of file qregexp.cpp.

Referenced by QRegExpEngine::Box::set().

1490 {
1491  return setupState(ch.unicode());
1492 }
ushort unicode() const
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition: qchar.h:251
int setupState(int match)
Definition: qregexp.cpp:1731

◆ createState() [2/3]

int QRegExpEngine::createState ( const QRegExpCharClass cc)

Definition at line 1494 of file qregexp.cpp.

1495 {
1496 #ifndef QT_NO_REGEXP_CCLASS
1497  int n = cl.size();
1498  cl += QRegExpCharClass(cc);
1499  return setupState(CharClassBit | n);
1500 #else
1501  Q_UNUSED(cc);
1502  return setupState(CharClassBit);
1503 #endif
1504 }
int setupState(int match)
Definition: qregexp.cpp:1731
QVector< QRegExpCharClass > cl
Definition: qregexp.cpp:1163
#define Q_UNUSED(x)
Indicates to the compiler that the parameter with the specified name is not used in the body of a fun...
Definition: qglobal.h:1729
int size() const
Returns the number of items in the vector.
Definition: qvector.h:137

◆ createState() [3/3]

int QRegExpEngine::createState ( int  bref)

Definition at line 1507 of file qregexp.cpp.

1508 {
1509  if (bref > nbrefs) {
1510  nbrefs = bref;
1511  if (nbrefs > MaxBackRefs) {
1512  error(RXERR_LIMIT);
1513  return 0;
1514  }
1515  }
1516  return setupState(BackRefBit | bref);
1517 }
void error(const char *msg)
Definition: qregexp.cpp:3181
int setupState(int match)
Definition: qregexp.cpp:1731
#define RXERR_LIMIT
Definition: qregexp.cpp:74

◆ dump()

void QRegExpEngine::dump ( ) const

Definition at line 1649 of file qregexp.cpp.

1650 {
1651  int i, j;
1652  qDebug("Case %ssensitive engine", cs ? "" : "in");
1653  qDebug(" States");
1654  for (i = 0; i < s.size(); i++) {
1655  qDebug(" %d%s", i, i == InitialState ? " (initial)" : i == FinalState ? " (final)" : "");
1656 #ifndef QT_NO_REGEXP_CAPTURE
1657  if (nf > 0)
1658  qDebug(" in atom %d", s[i].atom);
1659 #endif
1660  int m = s[i].match;
1661  if ((m & CharClassBit) != 0) {
1662  qDebug(" match character class %d", m ^ CharClassBit);
1663 #ifndef QT_NO_REGEXP_CCLASS
1664  cl[m ^ CharClassBit].dump();
1665 #else
1666  qDebug(" negative character class");
1667 #endif
1668  } else if ((m & BackRefBit) != 0) {
1669  qDebug(" match back-reference %d", m ^ BackRefBit);
1670  } else if (m >= 0x20 && m <= 0x7e) {
1671  qDebug(" match 0x%.4x (%c)", m, m);
1672  } else {
1673  qDebug(" match 0x%.4x", m);
1674  }
1675  for (j = 0; j < s[i].outs.size(); j++) {
1676  int next = s[i].outs[j];
1677  qDebug(" -> %d", next);
1678  if (s[i].reenter.contains(next))
1679  qDebug(" [reenter %d]", s[i].reenter[next]);
1680  if (s[i].anchors.value(next) != 0)
1681  qDebug(" [anchors 0x%.8x]", s[i].anchors[next]);
1682  }
1683  }
1684 #ifndef QT_NO_REGEXP_CAPTURE
1685  if (nf > 0) {
1686  qDebug(" Atom Parent Capture");
1687  for (i = 0; i < nf; i++) {
1688  if (f[i].capture == QRegExpAtom::NoCapture) {
1689  qDebug(" %6d %6d nil", i, f[i].parent);
1690  } else {
1691  int cap = f[i].capture;
1692  bool official = captureForOfficialCapture.contains(cap);
1693  qDebug(" %6d %6d %6d %s", i, f[i].parent, f[i].capture,
1694  official ? "official" : "");
1695  }
1696  }
1697  }
1698 #endif
1699 #ifndef QT_NO_REGEXP_ANCHOR_ALT
1700  for (i = 0; i < aa.size(); i++)
1701  qDebug(" Anchor alternation 0x%.8x: 0x%.8x 0x%.9x", i, aa[i].a, aa[i].b);
1702 #endif
1703 }
Qt::CaseSensitivity cs
Definition: qregexp.cpp:1176
QVector< int > captureForOfficialCapture
Definition: qregexp.cpp:1158
long ASN1_INTEGER_get ASN1_INTEGER * a
Q_CORE_EXPORT void qDebug(const char *,...)
T value(int i) const
Returns the value at index position i in the vector.
Definition: qvector.h:559
QVector< QRegExpAnchorAlternation > aa
Definition: qregexp.cpp:1169
QVector< QRegExpCharClass > cl
Definition: qregexp.cpp:1163
bool contains(const T &t) const
Returns true if the vector contains an occurrence of value; otherwise returns false.
Definition: qvector.h:731
QVector< QRegExpAtom > f
Definition: qregexp.cpp:1155
QVector< QRegExpAutomatonState > s
Definition: qregexp.cpp:1153
int size() const
Returns the number of items in the vector.
Definition: qvector.h:137

◆ error()

void QRegExpEngine::error ( const char *  msg)
private

Definition at line 3181 of file qregexp.cpp.

Referenced by addLookahead(), createState(), getEscape(), getRep(), getToken(), parseAtom(), and QRegExpEngine().

3182 {
3183  if (yyError.isEmpty())
3184  yyError = QLatin1String(msg);
3185 }
QLatin1String(DBUS_INTERFACE_DBUS))) Q_GLOBAL_STATIC_WITH_ARGS(QString
bool isEmpty() const
Returns true if the string has no characters; otherwise returns false.
Definition: qstring.h:704
QString yyError
Definition: qregexp.cpp:1284

◆ errorString()

const QString& QRegExpEngine::errorString ( ) const
inline

Definition at line 1084 of file qregexp.cpp.

Referenced by QRegExp::errorString().

1084 { return yyError; }
QString yyError
Definition: qregexp.cpp:1284

◆ finishAtom()

void QRegExpEngine::finishAtom ( int  atom,
bool  needCapture 
)
private

Definition at line 1757 of file qregexp.cpp.

Referenced by parse(), and parseFactor().

1758 {
1759  if (greedyQuantifiers && needCapture && f[atom].capture == QRegExpAtom::NoCapture)
1760  f[atom].capture = QRegExpAtom::UnofficialCapture;
1761  cf = f.at(atom).parent;
1762 }
bool greedyQuantifiers
Definition: qregexp.cpp:1177
const T & at(int i) const
Returns the item at index position i in the vector.
Definition: qvector.h:350
QVector< QRegExpAtom > f
Definition: qregexp.cpp:1155

◆ getChar()

int QRegExpEngine::getChar ( )
private

Definition at line 2855 of file qregexp.cpp.

Referenced by getEscape(), getRep(), getToken(), skipChars(), and startTokenizer().

2856 {
2857  return (yyPos == yyLen) ? EOS : yyIn[yyPos++].unicode();
2858 }
ushort unicode() const
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition: qchar.h:251
const int EOS
Definition: qregexp.cpp:705
const QChar * yyIn
Definition: qregexp.cpp:1276

◆ getEscape()

int QRegExpEngine::getEscape ( )
private

Definition at line 2860 of file qregexp.cpp.

Referenced by getToken().

2861 {
2862 #ifndef QT_NO_REGEXP_ESCAPE
2863  const char tab[] = "afnrtv"; // no b, as \b means word boundary
2864  const char backTab[] = "\a\f\n\r\t\v";
2865  ushort low;
2866  int i;
2867 #endif
2868  ushort val;
2869  int prevCh = yyCh;
2870 
2871  if (prevCh == EOS) {
2872  error(RXERR_END);
2873  return Tok_Char | '\\';
2874  }
2875  yyCh = getChar();
2876 #ifndef QT_NO_REGEXP_ESCAPE
2877  if ((prevCh & ~0xff) == 0) {
2878  const char *p = strchr(tab, prevCh);
2879  if (p != 0)
2880  return Tok_Char | backTab[p - tab];
2881  }
2882 #endif
2883 
2884  switch (prevCh) {
2885 #ifndef QT_NO_REGEXP_ESCAPE
2886  case '0':
2887  val = 0;
2888  for (i = 0; i < 3; i++) {
2889  if (yyCh >= '0' && yyCh <= '7')
2890  val = (val << 3) | (yyCh - '0');
2891  else
2892  break;
2893  yyCh = getChar();
2894  }
2895  if ((val & ~0377) != 0)
2896  error(RXERR_OCTAL);
2897  return Tok_Char | val;
2898 #endif
2899 #ifndef QT_NO_REGEXP_ESCAPE
2900  case 'B':
2901  return Tok_NonWord;
2902 #endif
2903 #ifndef QT_NO_REGEXP_CCLASS
2904  case 'D':
2905  // see QChar::isDigit()
2906  yyCharClass->addCategories(0x7fffffef);
2907  return Tok_CharClass;
2908  case 'S':
2909  // see QChar::isSpace()
2910  yyCharClass->addCategories(0x7ffff87f);
2911  yyCharClass->addRange(0x0000, 0x0008);
2912  yyCharClass->addRange(0x000e, 0x001f);
2913  yyCharClass->addRange(0x007f, 0x009f);
2914  return Tok_CharClass;
2915  case 'W':
2916  // see QChar::isLetterOrNumber() and QChar::isMark()
2917  yyCharClass->addCategories(0x7fe07f81);
2918  yyCharClass->addRange(0x203f, 0x2040);
2919  yyCharClass->addSingleton(0x2040);
2920  yyCharClass->addSingleton(0x2054);
2921  yyCharClass->addSingleton(0x30fb);
2922  yyCharClass->addRange(0xfe33, 0xfe34);
2923  yyCharClass->addRange(0xfe4d, 0xfe4f);
2924  yyCharClass->addSingleton(0xff3f);
2925  yyCharClass->addSingleton(0xff65);
2926  return Tok_CharClass;
2927 #endif
2928 #ifndef QT_NO_REGEXP_ESCAPE
2929  case 'b':
2930  return Tok_Word;
2931 #endif
2932 #ifndef QT_NO_REGEXP_CCLASS
2933  case 'd':
2934  // see QChar::isDigit()
2935  yyCharClass->addCategories(0x00000010);
2936  return Tok_CharClass;
2937  case 's':
2938  // see QChar::isSpace()
2939  yyCharClass->addCategories(0x00000380);
2940  yyCharClass->addRange(0x0009, 0x000d);
2941  return Tok_CharClass;
2942  case 'w':
2943  // see QChar::isLetterOrNumber() and QChar::isMark()
2944  yyCharClass->addCategories(0x000f807e);
2945  yyCharClass->addSingleton(0x005f); // '_'
2946  return Tok_CharClass;
2947  case 'I':
2948  if (xmlSchemaExtensions) {
2950  // fall through
2951  }
2952  case 'i':
2953  if (xmlSchemaExtensions) {
2954  yyCharClass->addCategories(0x000f807e);
2955  yyCharClass->addSingleton(0x003a); // ':'
2956  yyCharClass->addSingleton(0x005f); // '_'
2957  yyCharClass->addRange(0x0041, 0x005a); // [A-Z]
2958  yyCharClass->addRange(0x0061, 0x007a); // [a-z]
2959  yyCharClass->addRange(0xc0, 0xd6);
2960  yyCharClass->addRange(0xd8, 0xf6);
2961  yyCharClass->addRange(0xf8, 0x2ff);
2962  yyCharClass->addRange(0x370, 0x37d);
2963  yyCharClass->addRange(0x37f, 0x1fff);
2964  yyCharClass->addRange(0x200c, 0x200d);
2965  yyCharClass->addRange(0x2070, 0x218f);
2966  yyCharClass->addRange(0x2c00, 0x2fef);
2967  yyCharClass->addRange(0x3001, 0xd7ff);
2968  yyCharClass->addRange(0xf900, 0xfdcf);
2969  yyCharClass->addRange(0xfdf0, 0xfffd);
2970  yyCharClass->addRange((ushort)0x10000, (ushort)0xeffff);
2971  }
2972  return Tok_CharClass;
2973  case 'C':
2974  if (xmlSchemaExtensions) {
2976  // fall through
2977  }
2978  case 'c':
2979  if (xmlSchemaExtensions) {
2980  yyCharClass->addCategories(0x000f807e);
2981  yyCharClass->addSingleton(0x002d); // '-'
2982  yyCharClass->addSingleton(0x002e); // '.'
2983  yyCharClass->addSingleton(0x003a); // ':'
2984  yyCharClass->addSingleton(0x005f); // '_'
2985  yyCharClass->addSingleton(0xb7);
2986  yyCharClass->addRange(0x0030, 0x0039); // [0-9]
2987  yyCharClass->addRange(0x0041, 0x005a); // [A-Z]
2988  yyCharClass->addRange(0x0061, 0x007a); // [a-z]
2989  yyCharClass->addRange(0xc0, 0xd6);
2990  yyCharClass->addRange(0xd8, 0xf6);
2991  yyCharClass->addRange(0xf8, 0x2ff);
2992  yyCharClass->addRange(0x370, 0x37d);
2993  yyCharClass->addRange(0x37f, 0x1fff);
2994  yyCharClass->addRange(0x200c, 0x200d);
2995  yyCharClass->addRange(0x2070, 0x218f);
2996  yyCharClass->addRange(0x2c00, 0x2fef);
2997  yyCharClass->addRange(0x3001, 0xd7ff);
2998  yyCharClass->addRange(0xf900, 0xfdcf);
2999  yyCharClass->addRange(0xfdf0, 0xfffd);
3000  yyCharClass->addRange((ushort)0x10000, (ushort)0xeffff);
3001  yyCharClass->addRange(0x0300, 0x036f);
3002  yyCharClass->addRange(0x203f, 0x2040);
3003  }
3004  return Tok_CharClass;
3005  case 'P':
3006  if (xmlSchemaExtensions) {
3008  // fall through
3009  }
3010  case 'p':
3011  if (xmlSchemaExtensions) {
3012  if (yyCh != '{') {
3014  return Tok_CharClass;
3015  }
3016 
3017  QByteArray category;
3018  yyCh = getChar();
3019  while (yyCh != '}') {
3020  if (yyCh == EOS) {
3021  error(RXERR_END);
3022  return Tok_CharClass;
3023  }
3024  category.append(yyCh);
3025  yyCh = getChar();
3026  }
3027  yyCh = getChar(); // skip closing '}'
3028 
3029  if (category == "M") {
3030  yyCharClass->addCategories(0x0000000e);
3031  } else if (category == "Mn") {
3032  yyCharClass->addCategories(0x00000002);
3033  } else if (category == "Mc") {
3034  yyCharClass->addCategories(0x00000004);
3035  } else if (category == "Me") {
3036  yyCharClass->addCategories(0x00000008);
3037  } else if (category == "N") {
3038  yyCharClass->addCategories(0x00000070);
3039  } else if (category == "Nd") {
3040  yyCharClass->addCategories(0x00000010);
3041  } else if (category == "Nl") {
3042  yyCharClass->addCategories(0x00000020);
3043  } else if (category == "No") {
3044  yyCharClass->addCategories(0x00000040);
3045  } else if (category == "Z") {
3046  yyCharClass->addCategories(0x00000380);
3047  } else if (category == "Zs") {
3048  yyCharClass->addCategories(0x00000080);
3049  } else if (category == "Zl") {
3050  yyCharClass->addCategories(0x00000100);
3051  } else if (category == "Zp") {
3052  yyCharClass->addCategories(0x00000200);
3053  } else if (category == "C") {
3054  yyCharClass->addCategories(0x00006c00);
3055  } else if (category == "Cc") {
3056  yyCharClass->addCategories(0x00000400);
3057  } else if (category == "Cf") {
3058  yyCharClass->addCategories(0x00000800);
3059  } else if (category == "Cs") {
3060  yyCharClass->addCategories(0x00001000);
3061  } else if (category == "Co") {
3062  yyCharClass->addCategories(0x00002000);
3063  } else if (category == "Cn") {
3064  yyCharClass->addCategories(0x00004000);
3065  } else if (category == "L") {
3066  yyCharClass->addCategories(0x000f8000);
3067  } else if (category == "Lu") {
3068  yyCharClass->addCategories(0x00008000);
3069  } else if (category == "Ll") {
3070  yyCharClass->addCategories(0x00010000);
3071  } else if (category == "Lt") {
3072  yyCharClass->addCategories(0x00020000);
3073  } else if (category == "Lm") {
3074  yyCharClass->addCategories(0x00040000);
3075  } else if (category == "Lo") {
3076  yyCharClass->addCategories(0x00080000);
3077  } else if (category == "P") {
3078  yyCharClass->addCategories(0x4f580780);
3079  } else if (category == "Pc") {
3080  yyCharClass->addCategories(0x00100000);
3081  } else if (category == "Pd") {
3082  yyCharClass->addCategories(0x00200000);
3083  } else if (category == "Ps") {
3084  yyCharClass->addCategories(0x00400000);
3085  } else if (category == "Pe") {
3086  yyCharClass->addCategories(0x00800000);
3087  } else if (category == "Pi") {
3088  yyCharClass->addCategories(0x01000000);
3089  } else if (category == "Pf") {
3090  yyCharClass->addCategories(0x02000000);
3091  } else if (category == "Po") {
3092  yyCharClass->addCategories(0x04000000);
3093  } else if (category == "S") {
3094  yyCharClass->addCategories(0x78000000);
3095  } else if (category == "Sm") {
3096  yyCharClass->addCategories(0x08000000);
3097  } else if (category == "Sc") {
3098  yyCharClass->addCategories(0x10000000);
3099  } else if (category == "Sk") {
3100  yyCharClass->addCategories(0x20000000);
3101  } else if (category == "So") {
3102  yyCharClass->addCategories(0x40000000);
3103  } else if (category.startsWith("Is")) {
3106 
3107  if (categoriesRangeMap.contains(category)) {
3108  const QPair<int, int> range = categoriesRangeMap.value(category);
3109  yyCharClass->addRange(range.first, range.second);
3110  } else {
3112  }
3113  } else {
3115  }
3116  }
3117  return Tok_CharClass;
3118 #endif
3119 #ifndef QT_NO_REGEXP_ESCAPE
3120  case 'x':
3121  val = 0;
3122  for (i = 0; i < 4; i++) {
3123  low = QChar(yyCh).toLower().unicode();
3124  if (low >= '0' && low <= '9')
3125  val = (val << 4) | (low - '0');
3126  else if (low >= 'a' && low <= 'f')
3127  val = (val << 4) | (low - 'a' + 10);
3128  else
3129  break;
3130  yyCh = getChar();
3131  }
3132  return Tok_Char | val;
3133 #endif
3134  default:
3135  if (prevCh >= '1' && prevCh <= '9') {
3136 #ifndef QT_NO_REGEXP_BACKREF
3137  val = prevCh - '0';
3138  while (yyCh >= '0' && yyCh <= '9') {
3139  val = (val * 10) + (yyCh - '0');
3140  yyCh = getChar();
3141  }
3142  return Tok_BackRef | val;
3143 #else
3145 #endif
3146  }
3147  return Tok_Char | prevCh;
3148  }
3149 }
#define RXERR_OCTAL
Definition: qregexp.cpp:71
void addRange(ushort from, ushort to)
Definition: qregexp.cpp:2365
QHash< QByteArray, QPair< int, int > > categoriesRangeMap
Definition: qregexp.cpp:1297
ushort unicode() const
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition: qchar.h:251
QByteArray & append(char c)
Appends the character ch to this byte array.
void addSingleton(ushort ch)
Definition: qregexp.cpp:1035
The QByteArray class provides an array of bytes.
Definition: qbytearray.h:135
void error(const char *msg)
Definition: qregexp.cpp:3181
T1 first
Definition: qpair.h:65
T2 second
Definition: qpair.h:66
#define RXERR_CATEGORY
Definition: qregexp.cpp:76
bool startsWith(const QByteArray &a) const
Returns true if this byte array starts with byte array ba; otherwise returns false.
bool contains(const Key &key) const
Returns true if the hash contains an item with the key; otherwise returns false.
Definition: qhash.h:872
The QChar class provides a 16-bit Unicode character.
Definition: qchar.h:72
const T value(const Key &key) const
Returns the value associated with the key.
Definition: qhash.h:606
#define RXERR_DISABLED
Definition: qregexp.cpp:66
bool negative() const
Definition: qregexp.cpp:1031
void setNegative(bool negative)
Definition: qregexp.cpp:2349
bool isEmpty() const
Returns true if the hash contains no items; otherwise returns false.
Definition: qhash.h:297
#define RXERR_CHARCLASS
Definition: qregexp.cpp:67
void setupCategoriesRangeMap()
Definition: qregexp.cpp:2709
const int EOS
Definition: qregexp.cpp:705
unsigned short ushort
Definition: qglobal.h:995
QScopedPointer< QRegExpCharClass > yyCharClass
Definition: qregexp.cpp:1281
void addCategories(int cats)
Definition: qregexp.cpp:2357
#define RXERR_END
Definition: qregexp.cpp:73
bool xmlSchemaExtensions
Definition: qregexp.cpp:1178
QChar toLower() const
Returns the lowercase equivalent if the character is uppercase or titlecase; otherwise returns the ch...
Definition: qchar.cpp:1239

◆ getRep()

int QRegExpEngine::getRep ( int  def)
private

Definition at line 3152 of file qregexp.cpp.

Referenced by getToken().

3153 {
3154  if (yyCh >= '0' && yyCh <= '9') {
3155  int rep = 0;
3156  do {
3157  rep = 10 * rep + yyCh - '0';
3158  if (rep >= InftyRep) {
3160  rep = def;
3161  }
3162  yyCh = getChar();
3163  } while (yyCh >= '0' && yyCh <= '9');
3164  return rep;
3165  } else {
3166  return def;
3167  }
3168 }
const int InftyRep
Definition: qregexp.cpp:704
void error(const char *msg)
Definition: qregexp.cpp:3181
#define RXERR_REPETITION
Definition: qregexp.cpp:70

◆ getToken()

int QRegExpEngine::getToken ( )
private

Definition at line 3200 of file qregexp.cpp.

Referenced by parse(), parseAtom(), parseExpression(), and parseFactor().

3201 {
3202 #ifndef QT_NO_REGEXP_CCLASS
3203  ushort pendingCh = 0;
3204  bool charPending;
3205  bool rangePending;
3206  int tok;
3207 #endif
3208  int prevCh = yyCh;
3209 
3210  yyPos0 = yyPos - 1;
3211 #ifndef QT_NO_REGEXP_CCLASS
3212  yyCharClass->clear();
3213 #endif
3214  yyMinRep = 0;
3215  yyMaxRep = 0;
3216  yyCh = getChar();
3217 
3218  switch (prevCh) {
3219  case EOS:
3220  yyPos0 = yyPos;
3221  return Tok_Eos;
3222  case '$':
3223  return Tok_Dollar;
3224  case '(':
3225  if (yyCh == '?') {
3226  prevCh = getChar();
3227  yyCh = getChar();
3228  switch (prevCh) {
3229 #ifndef QT_NO_REGEXP_LOOKAHEAD
3230  case '!':
3231  return Tok_NegLookahead;
3232  case '=':
3233  return Tok_PosLookahead;
3234 #endif
3235  case ':':
3236  return Tok_MagicLeftParen;
3237  case '<':
3239  return Tok_MagicLeftParen;
3240  default:
3242  return Tok_MagicLeftParen;
3243  }
3244  } else {
3245  return Tok_LeftParen;
3246  }
3247  case ')':
3248  return Tok_RightParen;
3249  case '*':
3250  yyMinRep = 0;
3251  yyMaxRep = InftyRep;
3252  return Tok_Quantifier;
3253  case '+':
3254  yyMinRep = 1;
3255  yyMaxRep = InftyRep;
3256  return Tok_Quantifier;
3257  case '.':
3258 #ifndef QT_NO_REGEXP_CCLASS
3259  yyCharClass->setNegative(true);
3260 #endif
3261  return Tok_CharClass;
3262  case '?':
3263  yyMinRep = 0;
3264  yyMaxRep = 1;
3265  return Tok_Quantifier;
3266  case '[':
3267 #ifndef QT_NO_REGEXP_CCLASS
3268  if (yyCh == '^') {
3269  yyCharClass->setNegative(true);
3270  yyCh = getChar();
3271  }
3272  charPending = false;
3273  rangePending = false;
3274  do {
3275  if (yyCh == '-' && charPending && !rangePending) {
3276  rangePending = true;
3277  yyCh = getChar();
3278  } else {
3279  if (charPending && !rangePending) {
3280  yyCharClass->addSingleton(pendingCh);
3281  charPending = false;
3282  }
3283  if (yyCh == '\\') {
3284  yyCh = getChar();
3285  tok = getEscape();
3286  if (tok == Tok_Word)
3287  tok = '\b';
3288  } else {
3289  tok = Tok_Char | yyCh;
3290  yyCh = getChar();
3291  }
3292  if (tok == Tok_CharClass) {
3293  if (rangePending) {
3294  yyCharClass->addSingleton('-');
3295  yyCharClass->addSingleton(pendingCh);
3296  charPending = false;
3297  rangePending = false;
3298  }
3299  } else if ((tok & Tok_Char) != 0) {
3300  if (rangePending) {
3301  yyCharClass->addRange(pendingCh, tok ^ Tok_Char);
3302  charPending = false;
3303  rangePending = false;
3304  } else {
3305  pendingCh = tok ^ Tok_Char;
3306  charPending = true;
3307  }
3308  } else {
3310  }
3311  }
3312  } while (yyCh != ']' && yyCh != EOS);
3313  if (rangePending)
3314  yyCharClass->addSingleton('-');
3315  if (charPending)
3316  yyCharClass->addSingleton(pendingCh);
3317  if (yyCh == EOS)
3318  error(RXERR_END);
3319  else
3320  yyCh = getChar();
3321  return Tok_CharClass;
3322 #else
3323  error(RXERR_END);
3324  return Tok_Char | '[';
3325 #endif
3326  case '\\':
3327  return getEscape();
3328  case ']':
3330  return Tok_Char | ']';
3331  case '^':
3332  return Tok_Caret;
3333  case '{':
3334 #ifndef QT_NO_REGEXP_INTERVAL
3335  yyMinRep = getRep(0);
3336  yyMaxRep = yyMinRep;
3337  if (yyCh == ',') {
3338  yyCh = getChar();
3340  }
3341  if (yyMaxRep < yyMinRep)
3343  if (yyCh != '}')
3345  yyCh = getChar();
3346  return Tok_Quantifier;
3347 #else
3349  return Tok_Char | '{';
3350 #endif
3351  case '|':
3352  return Tok_Bar;
3353  case '}':
3355  return Tok_Char | '}';
3356  default:
3357  return Tok_Char | prevCh;
3358  }
3359 }
const int InftyRep
Definition: qregexp.cpp:704
void addRange(ushort from, ushort to)
Definition: qregexp.cpp:2365
void addSingleton(ushort ch)
Definition: qregexp.cpp:1035
void error(const char *msg)
Definition: qregexp.cpp:3181
#define RXERR_LEFTDELIM
Definition: qregexp.cpp:72
#define RXERR_LOOKBEHIND
Definition: qregexp.cpp:69
#define RXERR_REPETITION
Definition: qregexp.cpp:70
#define RXERR_DISABLED
Definition: qregexp.cpp:66
void setNegative(bool negative)
Definition: qregexp.cpp:2349
#define RXERR_INTERVAL
Definition: qregexp.cpp:75
#define RXERR_CHARCLASS
Definition: qregexp.cpp:67
#define RXERR_LOOKAHEAD
Definition: qregexp.cpp:68
int getEscape()
Definition: qregexp.cpp:2860
const int EOS
Definition: qregexp.cpp:705
unsigned short ushort
Definition: qglobal.h:995
QScopedPointer< QRegExpCharClass > yyCharClass
Definition: qregexp.cpp:1281
#define RXERR_END
Definition: qregexp.cpp:73
int getRep(int def)
Definition: qregexp.cpp:3152

◆ goodStringMatch()

bool QRegExpEngine::goodStringMatch ( QRegExpMatchState matchState) const
private

Definition at line 1872 of file qregexp.cpp.

1873 {
1874  int k = matchState.pos + goodEarlyStart;
1875  QStringMatcher matcher(goodStr.unicode(), goodStr.length(), cs);
1876  while ((k = matcher.indexIn(matchState.in, matchState.len, k)) != -1) {
1877  int from = k - goodLateStart;
1878  int to = k - goodEarlyStart;
1879  if (from > matchState.pos)
1880  matchState.pos = from;
1881 
1882  while (matchState.pos <= to) {
1883  if (matchState.matchHere())
1884  return true;
1885  ++matchState.pos;
1886  }
1887  ++k;
1888  }
1889  return false;
1890 }
QString goodStr
Definition: qregexp.cpp:1188
Qt::CaseSensitivity cs
Definition: qregexp.cpp:1176
int length() const
Returns the number of characters in this string.
Definition: qstring.h:696
The QStringMatcher class holds a sequence of characters that can be quickly matched in a Unicode stri...
const QChar * unicode() const
Returns a &#39;\0&#39;-terminated Unicode representation of the string.
Definition: qstring.h:706
int goodEarlyStart
Definition: qregexp.cpp:1186
const QChar * in
Definition: qregexp.cpp:904

◆ heuristicallyChooseHeuristic()

void QRegExpEngine::heuristicallyChooseHeuristic ( )

Definition at line 1616 of file qregexp.cpp.

Referenced by QRegExpEngine::Box::setupHeuristics().

1617 {
1618  if (minl == 0) {
1619  useGoodStringHeuristic = false;
1620  } else if (trivial) {
1621  useGoodStringHeuristic = true;
1622  } else {
1623  /*
1624  Magic formula: The good string has to constitute a good
1625  proportion of the minimum-length string, and appear at a
1626  more-or-less known index.
1627  */
1628  int goodStringScore = (64 * goodStr.length() / minl) -
1630  /*
1631  Less magic formula: We pick some characters at random, and
1632  check whether they are good or bad.
1633  */
1634  int badCharScore = 0;
1635  int step = qMax(1, NumBadChars / 32);
1636  for (int i = 1; i < NumBadChars; i += step) {
1637  if (occ1.at(i) == NoOccurrence)
1638  badCharScore += minl;
1639  else
1640  badCharScore += occ1.at(i);
1641  }
1642  badCharScore /= minl;
1643  useGoodStringHeuristic = (goodStringScore > badCharScore);
1644  }
1645 }
QString goodStr
Definition: qregexp.cpp:1188
const int NumBadChars
Definition: qregexp.cpp:698
int length() const
Returns the number of characters in this string.
Definition: qstring.h:696
QVector< int > occ1
Definition: qregexp.cpp:1191
Q_DECL_CONSTEXPR const T & qMax(const T &a, const T &b)
Definition: qglobal.h:1217
int goodEarlyStart
Definition: qregexp.cpp:1186
const T & at(int i) const
Returns the item at index position i in the vector.
Definition: qvector.h:350
bool useGoodStringHeuristic
Definition: qregexp.cpp:1184
const int NoOccurrence
Definition: qregexp.cpp:701

◆ isValid()

bool QRegExpEngine::isValid ( ) const
inline

Definition at line 1083 of file qregexp.cpp.

Referenced by QRegExp::errorString().

1083 { return valid; }

◆ parse()

int QRegExpEngine::parse ( const QChar rx,
int  len 
)
private

Definition at line 3361 of file qregexp.cpp.

Referenced by parseAtom(), and QRegExpEngine().

3362 {
3363  valid = true;
3364  startTokenizer(pattern, len);
3365  yyTok = getToken();
3366 #ifndef QT_NO_REGEXP_CAPTURE
3367  yyMayCapture = true;
3368 #else
3369  yyMayCapture = false;
3370 #endif
3371 
3372 #ifndef QT_NO_REGEXP_CAPTURE
3373  int atom = startAtom(false);
3374 #endif
3375  QRegExpCharClass anything;
3376  Box box(this); // create InitialState
3377  box.set(anything);
3378  Box rightBox(this); // create FinalState
3379  rightBox.set(anything);
3380 
3381  Box middleBox(this);
3382  parseExpression(&middleBox);
3383 #ifndef QT_NO_REGEXP_CAPTURE
3384  finishAtom(atom, false);
3385 #endif
3386 #ifndef QT_NO_REGEXP_OPTIM
3387  middleBox.setupHeuristics();
3388 #endif
3389  box.cat(middleBox);
3390  box.cat(rightBox);
3391  yyCharClass.reset(0);
3392 
3393 #ifndef QT_NO_REGEXP_CAPTURE
3394  for (int i = 0; i < nf; ++i) {
3395  switch (f[i].capture) {
3397  break;
3399  f[i].capture = ncap;
3401  ++ncap;
3402  ++officialncap;
3403  break;
3405  f[i].capture = greedyQuantifiers ? ncap++ : QRegExpAtom::NoCapture;
3406  }
3407  }
3408 
3409 #ifndef QT_NO_REGEXP_BACKREF
3410 #ifndef QT_NO_REGEXP_OPTIM
3411  if (officialncap == 0 && nbrefs == 0) {
3412  ncap = nf = 0;
3413  f.clear();
3414  }
3415 #endif
3416  // handle the case where there's a \5 with no corresponding capture
3417  // (captureForOfficialCapture.size() != officialncap)
3418  for (int i = 0; i < nbrefs - officialncap; ++i) {
3420  ++ncap;
3421  }
3422 #endif
3423 #endif
3424 
3425  if (!yyError.isEmpty())
3426  return -1;
3427 
3428 #ifndef QT_NO_REGEXP_OPTIM
3429  const QRegExpAutomatonState &sinit = s.at(InitialState);
3430  caretAnchored = !sinit.anchors.isEmpty();
3431  if (caretAnchored) {
3432  const QMap<int, int> &anchors = sinit.anchors;
3434  for (a = anchors.constBegin(); a != anchors.constEnd(); ++a) {
3435  if (
3436 #ifndef QT_NO_REGEXP_ANCHOR_ALT
3437  (*a & Anchor_Alternation) != 0 ||
3438 #endif
3439  (*a & Anchor_Caret) == 0)
3440  {
3441  caretAnchored = false;
3442  break;
3443  }
3444  }
3445  }
3446 #endif
3447 
3448  // cleanup anchors
3449  int numStates = s.count();
3450  for (int i = 0; i < numStates; ++i) {
3451  QRegExpAutomatonState &state = s[i];
3452  if (!state.anchors.isEmpty()) {
3454  while (a != state.anchors.end()) {
3455  if (a.value() == 0)
3456  a = state.anchors.erase(a);
3457  else
3458  ++a;
3459  }
3460  }
3461  }
3462 
3463  return yyPos0;
3464 }
QMap< int, int > anchors
Definition: qregexp.cpp:958
bool yyMayCapture
Definition: qregexp.cpp:1296
int count(const T &t) const
Returns the number of occurrences of value in the vector.
Definition: qvector.h:742
bool caretAnchored
Definition: qregexp.cpp:1172
QVector< int > captureForOfficialCapture
Definition: qregexp.cpp:1158
long ASN1_INTEGER_get ASN1_INTEGER * a
void parseExpression(Box *box)
Definition: qregexp.cpp:3634
void clear()
Removes all the elements from the vector and releases the memory used by the vector.
Definition: qvector.h:347
bool isEmpty() const
Returns true if the string has no characters; otherwise returns false.
Definition: qstring.h:704
const T value(const Key &key) const
Returns the value associated with the key key.
Definition: qmap.h:499
void append(const T &t)
Inserts value at the end of the vector.
Definition: qvector.h:573
int getToken()
Definition: qregexp.cpp:3200
bool greedyQuantifiers
Definition: qregexp.cpp:1177
const_iterator constBegin() const
Returns a const STL-style iterator pointing to the first item in the map.
Definition: qmap.h:374
void reset(T *other=0)
Deletes the existing object it is pointing to if any, and sets its pointer to other.
const T & at(int i) const
Returns the item at index position i in the vector.
Definition: qvector.h:350
iterator begin()
Returns an STL-style iterator pointing to the first item in the map.
Definition: qmap.h:372
QString yyError
Definition: qregexp.cpp:1284
const_iterator constEnd() const
Returns a const STL-style iterator pointing to the imaginary item after the last item in the map...
Definition: qmap.h:380
iterator end()
Returns an STL-style iterator pointing to the imaginary item after the last item in the map...
Definition: qmap.h:375
void finishAtom(int atom, bool needCapture)
Definition: qregexp.cpp:1757
int startAtom(bool officialCapture)
Definition: qregexp.cpp:1747
QScopedPointer< QRegExpCharClass > yyCharClass
Definition: qregexp.cpp:1281
bool isEmpty() const
Returns true if the map contains no items; otherwise returns false.
Definition: qmap.h:203
QVector< QRegExpAtom > f
Definition: qregexp.cpp:1155
iterator erase(iterator it)
Removes the (key, value) pair pointed to by the iterator pos from the map, and returns an iterator to...
Definition: qmap.h:717
friend class Box
Definition: qregexp.cpp:1254
QVector< QRegExpAutomatonState > s
Definition: qregexp.cpp:1153
void startTokenizer(const QChar *rx, int len)
Definition: qregexp.cpp:3187

◆ parseAtom()

void QRegExpEngine::parseAtom ( Box box)
private

Definition at line 3466 of file qregexp.cpp.

Referenced by parseFactor().

3467 {
3468 #ifndef QT_NO_REGEXP_LOOKAHEAD
3469  QRegExpEngine *eng = 0;
3470  bool neg;
3471  int len;
3472 #endif
3473 
3474  if ((yyTok & Tok_Char) != 0) {
3475  box->set(QChar(yyTok ^ Tok_Char));
3476  } else {
3477 #ifndef QT_NO_REGEXP_OPTIM
3478  trivial = false;
3479 #endif
3480  switch (yyTok) {
3481  case Tok_Dollar:
3482  box->catAnchor(Anchor_Dollar);
3483  break;
3484  case Tok_Caret:
3485  box->catAnchor(Anchor_Caret);
3486  break;
3487 #ifndef QT_NO_REGEXP_LOOKAHEAD
3488  case Tok_PosLookahead:
3489  case Tok_NegLookahead:
3490  neg = (yyTok == Tok_NegLookahead);
3491  eng = new QRegExpEngine(cs, greedyQuantifiers);
3492  len = eng->parse(yyIn + yyPos - 1, yyLen - yyPos + 1);
3493  if (len >= 0)
3494  skipChars(len);
3495  else
3497  box->catAnchor(addLookahead(eng, neg));
3498  yyTok = getToken();
3499  if (yyTok != Tok_RightParen)
3501  break;
3502 #endif
3503 #ifndef QT_NO_REGEXP_ESCAPE
3504  case Tok_Word:
3505  box->catAnchor(Anchor_Word);
3506  break;
3507  case Tok_NonWord:
3508  box->catAnchor(Anchor_NonWord);
3509  break;
3510 #endif
3511  case Tok_LeftParen:
3512  case Tok_MagicLeftParen:
3513  yyTok = getToken();
3514  parseExpression(box);
3515  if (yyTok != Tok_RightParen)
3516  error(RXERR_END);
3517  break;
3518  case Tok_CharClass:
3519  box->set(*yyCharClass);
3520  break;
3521  case Tok_Quantifier:
3523  break;
3524  default:
3525 #ifndef QT_NO_REGEXP_BACKREF
3526  if ((yyTok & Tok_BackRef) != 0)
3527  box->set(yyTok ^ Tok_BackRef);
3528  else
3529 #endif
3531  }
3532  }
3533  yyTok = getToken();
3534 }
int parse(const QChar *rx, int len)
Definition: qregexp.cpp:3361
Qt::CaseSensitivity cs
Definition: qregexp.cpp:1176
void error(const char *msg)
Definition: qregexp.cpp:3181
void parseExpression(Box *box)
Definition: qregexp.cpp:3634
The QChar class provides a 16-bit Unicode character.
Definition: qchar.h:72
#define RXERR_REPETITION
Definition: qregexp.cpp:70
#define RXERR_DISABLED
Definition: qregexp.cpp:66
int addLookahead(QRegExpEngine *eng, bool negative)
Definition: qregexp.cpp:1769
#define RXERR_LOOKAHEAD
Definition: qregexp.cpp:68
int getToken()
Definition: qregexp.cpp:3200
bool greedyQuantifiers
Definition: qregexp.cpp:1177
QRegExpEngine(Qt::CaseSensitivity cs, bool greedyQuantifiers)
Definition: qregexp.cpp:1077
QScopedPointer< QRegExpCharClass > yyCharClass
Definition: qregexp.cpp:1281
#define RXERR_END
Definition: qregexp.cpp:73
const QChar * yyIn
Definition: qregexp.cpp:1276
void skipChars(int n)
Definition: qregexp.cpp:3172

◆ parseExpression()

void QRegExpEngine::parseExpression ( Box box)
private

Definition at line 3634 of file qregexp.cpp.

Referenced by parse(), and parseAtom().

3635 {
3636  parseTerm(box);
3637  while (yyTok == Tok_Bar) {
3638 #ifndef QT_NO_REGEXP_OPTIM
3639  trivial = false;
3640 #endif
3641  Box rightBox(this);
3642  yyTok = getToken();
3643  parseTerm(&rightBox);
3644  box->orx(rightBox);
3645  }
3646 }
int getToken()
Definition: qregexp.cpp:3200
void parseTerm(Box *box)
Definition: qregexp.cpp:3621
friend class Box
Definition: qregexp.cpp:1254

◆ parseFactor()

void QRegExpEngine::parseFactor ( Box box)
private

Definition at line 3536 of file qregexp.cpp.

Referenced by parseTerm().

3537 {
3538 #ifndef QT_NO_REGEXP_CAPTURE
3539  int outerAtom = greedyQuantifiers ? startAtom(false) : -1;
3540  int innerAtom = startAtom(yyMayCapture && yyTok == Tok_LeftParen);
3541  bool magicLeftParen = (yyTok == Tok_MagicLeftParen);
3542 #else
3543  const int innerAtom = -1;
3544 #endif
3545 
3546 #ifndef QT_NO_REGEXP_INTERVAL
3547 #define YYREDO() \
3548  yyIn = in, yyPos0 = pos0, yyPos = pos, yyLen = len, yyCh = ch, \
3549  *yyCharClass = charClass, yyMinRep = 0, yyMaxRep = 0, yyTok = tok
3550 
3551  const QChar *in = yyIn;
3552  int pos0 = yyPos0;
3553  int pos = yyPos;
3554  int len = yyLen;
3555  int ch = yyCh;
3556  QRegExpCharClass charClass;
3557  if (yyTok == Tok_CharClass)
3558  charClass = *yyCharClass;
3559  int tok = yyTok;
3560  bool mayCapture = yyMayCapture;
3561 #endif
3562 
3563  parseAtom(box);
3564 #ifndef QT_NO_REGEXP_CAPTURE
3565  finishAtom(innerAtom, magicLeftParen);
3566 #endif
3567 
3568  bool hasQuantifier = (yyTok == Tok_Quantifier);
3569  if (hasQuantifier) {
3570 #ifndef QT_NO_REGEXP_OPTIM
3571  trivial = false;
3572 #endif
3573  if (yyMaxRep == InftyRep) {
3574  box->plus(innerAtom);
3575 #ifndef QT_NO_REGEXP_INTERVAL
3576  } else if (yyMaxRep == 0) {
3577  box->clear();
3578 #endif
3579  }
3580  if (yyMinRep == 0)
3581  box->opt();
3582 
3583 #ifndef QT_NO_REGEXP_INTERVAL
3584  yyMayCapture = false;
3585  int alpha = (yyMinRep == 0) ? 0 : yyMinRep - 1;
3586  int beta = (yyMaxRep == InftyRep) ? 0 : yyMaxRep - (alpha + 1);
3587 
3588  Box rightBox(this);
3589  int i;
3590 
3591  for (i = 0; i < beta; i++) {
3592  YYREDO();
3593  Box leftBox(this);
3594  parseAtom(&leftBox);
3595  leftBox.cat(rightBox);
3596  leftBox.opt();
3597  rightBox = leftBox;
3598  }
3599  for (i = 0; i < alpha; i++) {
3600  YYREDO();
3601  Box leftBox(this);
3602  parseAtom(&leftBox);
3603  leftBox.cat(rightBox);
3604  rightBox = leftBox;
3605  }
3606  rightBox.cat(*box);
3607  *box = rightBox;
3608 #endif
3609  yyTok = getToken();
3610 #ifndef QT_NO_REGEXP_INTERVAL
3611  yyMayCapture = mayCapture;
3612 #endif
3613  }
3614 #undef YYREDO
3615 #ifndef QT_NO_REGEXP_CAPTURE
3616  if (greedyQuantifiers)
3617  finishAtom(outerAtom, hasQuantifier);
3618 #endif
3619 }
const int InftyRep
Definition: qregexp.cpp:704
bool yyMayCapture
Definition: qregexp.cpp:1296
void parseAtom(Box *box)
Definition: qregexp.cpp:3466
The QChar class provides a 16-bit Unicode character.
Definition: qchar.h:72
#define YYREDO()
int getToken()
Definition: qregexp.cpp:3200
bool greedyQuantifiers
Definition: qregexp.cpp:1177
void finishAtom(int atom, bool needCapture)
Definition: qregexp.cpp:1757
int startAtom(bool officialCapture)
Definition: qregexp.cpp:1747
QScopedPointer< QRegExpCharClass > yyCharClass
Definition: qregexp.cpp:1281
friend class Box
Definition: qregexp.cpp:1254
const QChar * yyIn
Definition: qregexp.cpp:1276

◆ parseTerm()

void QRegExpEngine::parseTerm ( Box box)
private

Definition at line 3621 of file qregexp.cpp.

Referenced by parseExpression().

3622 {
3623 #ifndef QT_NO_REGEXP_OPTIM
3624  if (yyTok != Tok_Eos && yyTok != Tok_RightParen && yyTok != Tok_Bar)
3625  parseFactor(box);
3626 #endif
3627  while (yyTok != Tok_Eos && yyTok != Tok_RightParen && yyTok != Tok_Bar) {
3628  Box rightBox(this);
3629  parseFactor(&rightBox);
3630  box->cat(rightBox);
3631  }
3632 }
void parseFactor(Box *box)
Definition: qregexp.cpp:3536
friend class Box
Definition: qregexp.cpp:1254

◆ setup()

void QRegExpEngine::setup ( )
private

Definition at line 1706 of file qregexp.cpp.

Referenced by QRegExpEngine().

1707 {
1708  ref = 1;
1709 #ifndef QT_NO_REGEXP_CAPTURE
1710  f.resize(32);
1711  nf = 0;
1712  cf = -1;
1713 #endif
1714  officialncap = 0;
1715  ncap = 0;
1716 #ifndef QT_NO_REGEXP_OPTIM
1717  caretAnchored = true;
1718  trivial = true;
1719 #endif
1720  valid = false;
1721 #ifndef QT_NO_REGEXP_BACKREF
1722  nbrefs = 0;
1723 #endif
1724 #ifndef QT_NO_REGEXP_OPTIM
1725  useGoodStringHeuristic = true;
1726  minl = 0;
1727  occ1.fill(0, NumBadChars);
1728 #endif
1729 }
QVector< T > & fill(const T &t, int size=-1)
Assigns value to all items in the vector.
Definition: qvector.h:665
const int NumBadChars
Definition: qregexp.cpp:698
bool caretAnchored
Definition: qregexp.cpp:1172
QVector< int > occ1
Definition: qregexp.cpp:1191
void resize(int size)
Sets the size of the vector to size.
Definition: qvector.h:342
bool useGoodStringHeuristic
Definition: qregexp.cpp:1184
QAtomicInt ref
Definition: qregexp.cpp:1115
QVector< QRegExpAtom > f
Definition: qregexp.cpp:1155

◆ setupCategoriesRangeMap()

void QRegExpEngine::setupCategoriesRangeMap ( )
private

Definition at line 2709 of file qregexp.cpp.

Referenced by getEscape().

2710 {
2711  categoriesRangeMap.insert("IsBasicLatin", qMakePair(0x0000, 0x007F));
2712  categoriesRangeMap.insert("IsLatin-1Supplement", qMakePair(0x0080, 0x00FF));
2713  categoriesRangeMap.insert("IsLatinExtended-A", qMakePair(0x0100, 0x017F));
2714  categoriesRangeMap.insert("IsLatinExtended-B", qMakePair(0x0180, 0x024F));
2715  categoriesRangeMap.insert("IsIPAExtensions", qMakePair(0x0250, 0x02AF));
2716  categoriesRangeMap.insert("IsSpacingModifierLetters", qMakePair(0x02B0, 0x02FF));
2717  categoriesRangeMap.insert("IsCombiningDiacriticalMarks", qMakePair(0x0300, 0x036F));
2718  categoriesRangeMap.insert("IsGreek", qMakePair(0x0370, 0x03FF));
2719  categoriesRangeMap.insert("IsCyrillic", qMakePair(0x0400, 0x04FF));
2720  categoriesRangeMap.insert("IsCyrillicSupplement", qMakePair(0x0500, 0x052F));
2721  categoriesRangeMap.insert("IsArmenian", qMakePair(0x0530, 0x058F));
2722  categoriesRangeMap.insert("IsHebrew", qMakePair(0x0590, 0x05FF));
2723  categoriesRangeMap.insert("IsArabic", qMakePair(0x0600, 0x06FF));
2724  categoriesRangeMap.insert("IsSyriac", qMakePair(0x0700, 0x074F));
2725  categoriesRangeMap.insert("IsArabicSupplement", qMakePair(0x0750, 0x077F));
2726  categoriesRangeMap.insert("IsThaana", qMakePair(0x0780, 0x07BF));
2727  categoriesRangeMap.insert("IsDevanagari", qMakePair(0x0900, 0x097F));
2728  categoriesRangeMap.insert("IsBengali", qMakePair(0x0980, 0x09FF));
2729  categoriesRangeMap.insert("IsGurmukhi", qMakePair(0x0A00, 0x0A7F));
2730  categoriesRangeMap.insert("IsGujarati", qMakePair(0x0A80, 0x0AFF));
2731  categoriesRangeMap.insert("IsOriya", qMakePair(0x0B00, 0x0B7F));
2732  categoriesRangeMap.insert("IsTamil", qMakePair(0x0B80, 0x0BFF));
2733  categoriesRangeMap.insert("IsTelugu", qMakePair(0x0C00, 0x0C7F));
2734  categoriesRangeMap.insert("IsKannada", qMakePair(0x0C80, 0x0CFF));
2735  categoriesRangeMap.insert("IsMalayalam", qMakePair(0x0D00, 0x0D7F));
2736  categoriesRangeMap.insert("IsSinhala", qMakePair(0x0D80, 0x0DFF));
2737  categoriesRangeMap.insert("IsThai", qMakePair(0x0E00, 0x0E7F));
2738  categoriesRangeMap.insert("IsLao", qMakePair(0x0E80, 0x0EFF));
2739  categoriesRangeMap.insert("IsTibetan", qMakePair(0x0F00, 0x0FFF));
2740  categoriesRangeMap.insert("IsMyanmar", qMakePair(0x1000, 0x109F));
2741  categoriesRangeMap.insert("IsGeorgian", qMakePair(0x10A0, 0x10FF));
2742  categoriesRangeMap.insert("IsHangulJamo", qMakePair(0x1100, 0x11FF));
2743  categoriesRangeMap.insert("IsEthiopic", qMakePair(0x1200, 0x137F));
2744  categoriesRangeMap.insert("IsEthiopicSupplement", qMakePair(0x1380, 0x139F));
2745  categoriesRangeMap.insert("IsCherokee", qMakePair(0x13A0, 0x13FF));
2746  categoriesRangeMap.insert("IsUnifiedCanadianAboriginalSyllabics", qMakePair(0x1400, 0x167F));
2747  categoriesRangeMap.insert("IsOgham", qMakePair(0x1680, 0x169F));
2748  categoriesRangeMap.insert("IsRunic", qMakePair(0x16A0, 0x16FF));
2749  categoriesRangeMap.insert("IsTagalog", qMakePair(0x1700, 0x171F));
2750  categoriesRangeMap.insert("IsHanunoo", qMakePair(0x1720, 0x173F));
2751  categoriesRangeMap.insert("IsBuhid", qMakePair(0x1740, 0x175F));
2752  categoriesRangeMap.insert("IsTagbanwa", qMakePair(0x1760, 0x177F));
2753  categoriesRangeMap.insert("IsKhmer", qMakePair(0x1780, 0x17FF));
2754  categoriesRangeMap.insert("IsMongolian", qMakePair(0x1800, 0x18AF));
2755  categoriesRangeMap.insert("IsLimbu", qMakePair(0x1900, 0x194F));
2756  categoriesRangeMap.insert("IsTaiLe", qMakePair(0x1950, 0x197F));
2757  categoriesRangeMap.insert("IsNewTaiLue", qMakePair(0x1980, 0x19DF));
2758  categoriesRangeMap.insert("IsKhmerSymbols", qMakePair(0x19E0, 0x19FF));
2759  categoriesRangeMap.insert("IsBuginese", qMakePair(0x1A00, 0x1A1F));
2760  categoriesRangeMap.insert("IsPhoneticExtensions", qMakePair(0x1D00, 0x1D7F));
2761  categoriesRangeMap.insert("IsPhoneticExtensionsSupplement", qMakePair(0x1D80, 0x1DBF));
2762  categoriesRangeMap.insert("IsCombiningDiacriticalMarksSupplement", qMakePair(0x1DC0, 0x1DFF));
2763  categoriesRangeMap.insert("IsLatinExtendedAdditional", qMakePair(0x1E00, 0x1EFF));
2764  categoriesRangeMap.insert("IsGreekExtended", qMakePair(0x1F00, 0x1FFF));
2765  categoriesRangeMap.insert("IsGeneralPunctuation", qMakePair(0x2000, 0x206F));
2766  categoriesRangeMap.insert("IsSuperscriptsandSubscripts", qMakePair(0x2070, 0x209F));
2767  categoriesRangeMap.insert("IsCurrencySymbols", qMakePair(0x20A0, 0x20CF));
2768  categoriesRangeMap.insert("IsCombiningMarksforSymbols", qMakePair(0x20D0, 0x20FF));
2769  categoriesRangeMap.insert("IsLetterlikeSymbols", qMakePair(0x2100, 0x214F));
2770  categoriesRangeMap.insert("IsNumberForms", qMakePair(0x2150, 0x218F));
2771  categoriesRangeMap.insert("IsArrows", qMakePair(0x2190, 0x21FF));
2772  categoriesRangeMap.insert("IsMathematicalOperators", qMakePair(0x2200, 0x22FF));
2773  categoriesRangeMap.insert("IsMiscellaneousTechnical", qMakePair(0x2300, 0x23FF));
2774  categoriesRangeMap.insert("IsControlPictures", qMakePair(0x2400, 0x243F));
2775  categoriesRangeMap.insert("IsOpticalCharacterRecognition", qMakePair(0x2440, 0x245F));
2776  categoriesRangeMap.insert("IsEnclosedAlphanumerics", qMakePair(0x2460, 0x24FF));
2777  categoriesRangeMap.insert("IsBoxDrawing", qMakePair(0x2500, 0x257F));
2778  categoriesRangeMap.insert("IsBlockElements", qMakePair(0x2580, 0x259F));
2779  categoriesRangeMap.insert("IsGeometricShapes", qMakePair(0x25A0, 0x25FF));
2780  categoriesRangeMap.insert("IsMiscellaneousSymbols", qMakePair(0x2600, 0x26FF));
2781  categoriesRangeMap.insert("IsDingbats", qMakePair(0x2700, 0x27BF));
2782  categoriesRangeMap.insert("IsMiscellaneousMathematicalSymbols-A", qMakePair(0x27C0, 0x27EF));
2783  categoriesRangeMap.insert("IsSupplementalArrows-A", qMakePair(0x27F0, 0x27FF));
2784  categoriesRangeMap.insert("IsBraillePatterns", qMakePair(0x2800, 0x28FF));
2785  categoriesRangeMap.insert("IsSupplementalArrows-B", qMakePair(0x2900, 0x297F));
2786  categoriesRangeMap.insert("IsMiscellaneousMathematicalSymbols-B", qMakePair(0x2980, 0x29FF));
2787  categoriesRangeMap.insert("IsSupplementalMathematicalOperators", qMakePair(0x2A00, 0x2AFF));
2788  categoriesRangeMap.insert("IsMiscellaneousSymbolsandArrows", qMakePair(0x2B00, 0x2BFF));
2789  categoriesRangeMap.insert("IsGlagolitic", qMakePair(0x2C00, 0x2C5F));
2790  categoriesRangeMap.insert("IsCoptic", qMakePair(0x2C80, 0x2CFF));
2791  categoriesRangeMap.insert("IsGeorgianSupplement", qMakePair(0x2D00, 0x2D2F));
2792  categoriesRangeMap.insert("IsTifinagh", qMakePair(0x2D30, 0x2D7F));
2793  categoriesRangeMap.insert("IsEthiopicExtended", qMakePair(0x2D80, 0x2DDF));
2794  categoriesRangeMap.insert("IsSupplementalPunctuation", qMakePair(0x2E00, 0x2E7F));
2795  categoriesRangeMap.insert("IsCJKRadicalsSupplement", qMakePair(0x2E80, 0x2EFF));
2796  categoriesRangeMap.insert("IsKangxiRadicals", qMakePair(0x2F00, 0x2FDF));
2797  categoriesRangeMap.insert("IsIdeographicDescriptionCharacters", qMakePair(0x2FF0, 0x2FFF));
2798  categoriesRangeMap.insert("IsCJKSymbolsandPunctuation", qMakePair(0x3000, 0x303F));
2799  categoriesRangeMap.insert("IsHiragana", qMakePair(0x3040, 0x309F));
2800  categoriesRangeMap.insert("IsKatakana", qMakePair(0x30A0, 0x30FF));
2801  categoriesRangeMap.insert("IsBopomofo", qMakePair(0x3100, 0x312F));
2802  categoriesRangeMap.insert("IsHangulCompatibilityJamo", qMakePair(0x3130, 0x318F));
2803  categoriesRangeMap.insert("IsKanbun", qMakePair(0x3190, 0x319F));
2804  categoriesRangeMap.insert("IsBopomofoExtended", qMakePair(0x31A0, 0x31BF));
2805  categoriesRangeMap.insert("IsCJKStrokes", qMakePair(0x31C0, 0x31EF));
2806  categoriesRangeMap.insert("IsKatakanaPhoneticExtensions", qMakePair(0x31F0, 0x31FF));
2807  categoriesRangeMap.insert("IsEnclosedCJKLettersandMonths", qMakePair(0x3200, 0x32FF));
2808  categoriesRangeMap.insert("IsCJKCompatibility", qMakePair(0x3300, 0x33FF));
2809  categoriesRangeMap.insert("IsCJKUnifiedIdeographsExtensionA", qMakePair(0x3400, 0x4DB5));
2810  categoriesRangeMap.insert("IsYijingHexagramSymbols", qMakePair(0x4DC0, 0x4DFF));
2811  categoriesRangeMap.insert("IsCJKUnifiedIdeographs", qMakePair(0x4E00, 0x9FFF));
2812  categoriesRangeMap.insert("IsYiSyllables", qMakePair(0xA000, 0xA48F));
2813  categoriesRangeMap.insert("IsYiRadicals", qMakePair(0xA490, 0xA4CF));
2814  categoriesRangeMap.insert("IsModifierToneLetters", qMakePair(0xA700, 0xA71F));
2815  categoriesRangeMap.insert("IsSylotiNagri", qMakePair(0xA800, 0xA82F));
2816  categoriesRangeMap.insert("IsHangulSyllables", qMakePair(0xAC00, 0xD7A3));
2817  categoriesRangeMap.insert("IsPrivateUse", qMakePair(0xE000, 0xF8FF));
2818  categoriesRangeMap.insert("IsCJKCompatibilityIdeographs", qMakePair(0xF900, 0xFAFF));
2819  categoriesRangeMap.insert("IsAlphabeticPresentationForms", qMakePair(0xFB00, 0xFB4F));
2820  categoriesRangeMap.insert("IsArabicPresentationForms-A", qMakePair(0xFB50, 0xFDFF));
2821  categoriesRangeMap.insert("IsVariationSelectors", qMakePair(0xFE00, 0xFE0F));
2822  categoriesRangeMap.insert("IsVerticalForms", qMakePair(0xFE10, 0xFE1F));
2823  categoriesRangeMap.insert("IsCombiningHalfMarks", qMakePair(0xFE20, 0xFE2F));
2824  categoriesRangeMap.insert("IsCJKCompatibilityForms", qMakePair(0xFE30, 0xFE4F));
2825  categoriesRangeMap.insert("IsSmallFormVariants", qMakePair(0xFE50, 0xFE6F));
2826  categoriesRangeMap.insert("IsArabicPresentationForms-B", qMakePair(0xFE70, 0xFEFF));
2827  categoriesRangeMap.insert("IsHalfwidthandFullwidthForms", qMakePair(0xFF00, 0xFFEF));
2828  categoriesRangeMap.insert("IsSpecials", qMakePair(0xFFF0, 0xFFFF));
2829  categoriesRangeMap.insert("IsLinearBSyllabary", qMakePair(0x10000, 0x1007F));
2830  categoriesRangeMap.insert("IsLinearBIdeograms", qMakePair(0x10080, 0x100FF));
2831  categoriesRangeMap.insert("IsAegeanNumbers", qMakePair(0x10100, 0x1013F));
2832  categoriesRangeMap.insert("IsAncientGreekNumbers", qMakePair(0x10140, 0x1018F));
2833  categoriesRangeMap.insert("IsOldItalic", qMakePair(0x10300, 0x1032F));
2834  categoriesRangeMap.insert("IsGothic", qMakePair(0x10330, 0x1034F));
2835  categoriesRangeMap.insert("IsUgaritic", qMakePair(0x10380, 0x1039F));
2836  categoriesRangeMap.insert("IsOldPersian", qMakePair(0x103A0, 0x103DF));
2837  categoriesRangeMap.insert("IsDeseret", qMakePair(0x10400, 0x1044F));
2838  categoriesRangeMap.insert("IsShavian", qMakePair(0x10450, 0x1047F));
2839  categoriesRangeMap.insert("IsOsmanya", qMakePair(0x10480, 0x104AF));
2840  categoriesRangeMap.insert("IsCypriotSyllabary", qMakePair(0x10800, 0x1083F));
2841  categoriesRangeMap.insert("IsKharoshthi", qMakePair(0x10A00, 0x10A5F));
2842  categoriesRangeMap.insert("IsByzantineMusicalSymbols", qMakePair(0x1D000, 0x1D0FF));
2843  categoriesRangeMap.insert("IsMusicalSymbols", qMakePair(0x1D100, 0x1D1FF));
2844  categoriesRangeMap.insert("IsAncientGreekMusicalNotation", qMakePair(0x1D200, 0x1D24F));
2845  categoriesRangeMap.insert("IsTaiXuanJingSymbols", qMakePair(0x1D300, 0x1D35F));
2846  categoriesRangeMap.insert("IsMathematicalAlphanumericSymbols", qMakePair(0x1D400, 0x1D7FF));
2847  categoriesRangeMap.insert("IsCJKUnifiedIdeographsExtensionB", qMakePair(0x20000, 0x2A6DF));
2848  categoriesRangeMap.insert("IsCJKCompatibilityIdeographsSupplement", qMakePair(0x2F800, 0x2FA1F));
2849  categoriesRangeMap.insert("IsTags", qMakePair(0xE0000, 0xE007F));
2850  categoriesRangeMap.insert("IsVariationSelectorsSupplement", qMakePair(0xE0100, 0xE01EF));
2851  categoriesRangeMap.insert("IsSupplementaryPrivateUseArea-A", qMakePair(0xF0000, 0xFFFFF));
2852  categoriesRangeMap.insert("IsSupplementaryPrivateUseArea-B", qMakePair(0x100000, 0x10FFFF));
2853 }
QHash< QByteArray, QPair< int, int > > categoriesRangeMap
Definition: qregexp.cpp:1297
iterator insert(const Key &key, const T &value)
Inserts a new item with the key and a value of value.
Definition: qhash.h:753
Q_OUTOFLINE_TEMPLATE QPair< T1, T2 > qMakePair(const T1 &x, const T2 &y)
Definition: qpair.h:102

◆ setupState()

int QRegExpEngine::setupState ( int  match)
private

Definition at line 1731 of file qregexp.cpp.

Referenced by createState().

1732 {
1733 #ifndef QT_NO_REGEXP_CAPTURE
1735 #else
1737 #endif
1738  return s.size() - 1;
1739 }
static bool match(const uchar *found, const char *target, uint len)
QVector< QRegExpAutomatonState > s
Definition: qregexp.cpp:1153
int size() const
Returns the number of items in the vector.
Definition: qvector.h:137

◆ skipChars()

void QRegExpEngine::skipChars ( int  n)
private

Definition at line 3172 of file qregexp.cpp.

Referenced by parseAtom().

3173 {
3174  if (n > 0) {
3175  yyPos += n - 1;
3176  yyCh = getChar();
3177  }
3178 }

◆ startAtom()

int QRegExpEngine::startAtom ( bool  officialCapture)
private

Definition at line 1747 of file qregexp.cpp.

Referenced by parse(), and parseFactor().

1748 {
1749  if ((nf & (nf + 1)) == 0 && nf + 1 >= f.size())
1750  f.resize((nf + 1) << 1);
1751  f[nf].parent = cf;
1752  cf = nf++;
1753  f[cf].capture = officialCapture ? QRegExpAtom::OfficialCapture : QRegExpAtom::NoCapture;
1754  return cf;
1755 }
void resize(int size)
Sets the size of the vector to size.
Definition: qvector.h:342
QVector< QRegExpAtom > f
Definition: qregexp.cpp:1155
int size() const
Returns the number of items in the vector.
Definition: qvector.h:137

◆ startTokenizer()

void QRegExpEngine::startTokenizer ( const QChar rx,
int  len 
)
private

Definition at line 3187 of file qregexp.cpp.

Referenced by parse().

3188 {
3189  yyIn = rx;
3190  yyPos0 = 0;
3191  yyPos = 0;
3192  yyLen = len;
3193  yyCh = getChar();
3195  yyMinRep = 0;
3196  yyMaxRep = 0;
3197  yyError = QString();
3198 }
The QString class provides a Unicode character string.
Definition: qstring.h:83
void reset(T *other=0)
Deletes the existing object it is pointing to if any, and sets its pointer to other.
QString yyError
Definition: qregexp.cpp:1284
QScopedPointer< QRegExpCharClass > yyCharClass
Definition: qregexp.cpp:1281
const QChar * yyIn
Definition: qregexp.cpp:1276

Friends and Related Functions

◆ Box

friend class Box
friend

Definition at line 1254 of file qregexp.cpp.

◆ QRegExpMatchState

friend struct QRegExpMatchState
friend

Definition at line 1299 of file qregexp.cpp.

Properties

◆ aa

QVector<QRegExpAnchorAlternation> QRegExpEngine::aa
private

Definition at line 1169 of file qregexp.cpp.

Referenced by anchorAlternation(), anchorConcatenation(), and dump().

◆ ahead

QVector<QRegExpLookahead *> QRegExpEngine::ahead
private

Definition at line 1166 of file qregexp.cpp.

Referenced by addLookahead(), QRegExpMatchState::testAnchor(), and ~QRegExpEngine().

◆ captureForOfficialCapture

QVector<int> QRegExpEngine::captureForOfficialCapture
private

Definition at line 1158 of file qregexp.cpp.

Referenced by dump(), and parse().

◆ caretAnchored

bool QRegExpEngine::caretAnchored
private

Definition at line 1172 of file qregexp.cpp.

Referenced by parse(), and setup().

◆ categoriesRangeMap

QHash<QByteArray, QPair<int, int> > QRegExpEngine::categoriesRangeMap
private

Definition at line 1297 of file qregexp.cpp.

Referenced by getEscape(), and setupCategoriesRangeMap().

◆ cf

int QRegExpEngine::cf
private

Definition at line 1157 of file qregexp.cpp.

Referenced by finishAtom(), setup(), setupState(), and startAtom().

◆ cl

QVector<QRegExpCharClass> QRegExpEngine::cl
private

Definition at line 1163 of file qregexp.cpp.

Referenced by createState(), and dump().

◆ cs

Qt::CaseSensitivity QRegExpEngine::cs
private

◆ f

QVector<QRegExpAtom> QRegExpEngine::f
private

Definition at line 1155 of file qregexp.cpp.

Referenced by addPlusTransitions(), dump(), finishAtom(), parse(), setup(), and startAtom().

◆ goodEarlyStart

int QRegExpEngine::goodEarlyStart
private

◆ goodLateStart

int QRegExpEngine::goodLateStart
private

◆ goodStr

QString QRegExpEngine::goodStr
private

◆ greedyQuantifiers

bool QRegExpEngine::greedyQuantifiers
private

Definition at line 1177 of file qregexp.cpp.

Referenced by finishAtom(), parse(), parseAtom(), and parseFactor().

◆ minl

int QRegExpEngine::minl
private

◆ nbrefs

int QRegExpEngine::nbrefs
private

Definition at line 1180 of file qregexp.cpp.

Referenced by createState(), parse(), and setup().

◆ ncap

int QRegExpEngine::ncap
private

◆ nf

int QRegExpEngine::nf
private

Definition at line 1156 of file qregexp.cpp.

Referenced by dump(), parse(), setup(), and startAtom().

◆ occ1

QVector<int> QRegExpEngine::occ1
private

◆ officialncap

int QRegExpEngine::officialncap
private

Definition at line 1160 of file qregexp.cpp.

Referenced by parse(), and setup().

◆ ref

QAtomicInt QRegExpEngine::ref

Definition at line 1115 of file qregexp.cpp.

Referenced by derefEngine(), QRegExp::operator=(), and prepareEngine_helper().

◆ s

QVector<QRegExpAutomatonState> QRegExpEngine::s
private

◆ trivial

bool QRegExpEngine::trivial
private

◆ useGoodStringHeuristic

bool QRegExpEngine::useGoodStringHeuristic
private

Definition at line 1184 of file qregexp.cpp.

Referenced by heuristicallyChooseHeuristic(), and setup().

◆ valid

bool QRegExpEngine::valid
private

Definition at line 1175 of file qregexp.cpp.

Referenced by parse(), QRegExpEngine(), and setup().

◆ xmlSchemaExtensions

bool QRegExpEngine::xmlSchemaExtensions
private

Definition at line 1178 of file qregexp.cpp.

Referenced by getEscape().

◆ yyCh

int QRegExpEngine::yyCh
private

Definition at line 1280 of file qregexp.cpp.

Referenced by getEscape(), getRep(), getToken(), parseFactor(), skipChars(), and startTokenizer().

◆ yyCharClass

QScopedPointer<QRegExpCharClass> QRegExpEngine::yyCharClass
private

Definition at line 1281 of file qregexp.cpp.

Referenced by getEscape(), getToken(), parse(), parseAtom(), parseFactor(), and startTokenizer().

◆ yyError

QString QRegExpEngine::yyError
private

Definition at line 1284 of file qregexp.cpp.

Referenced by error(), parse(), and startTokenizer().

◆ yyIn

const QChar* QRegExpEngine::yyIn
private

Definition at line 1276 of file qregexp.cpp.

Referenced by getChar(), parseAtom(), parseFactor(), and startTokenizer().

◆ yyLen

int QRegExpEngine::yyLen
private

Definition at line 1279 of file qregexp.cpp.

Referenced by getChar(), parseAtom(), parseFactor(), and startTokenizer().

◆ yyMaxRep

int QRegExpEngine::yyMaxRep
private

Definition at line 1283 of file qregexp.cpp.

Referenced by getToken(), parseFactor(), and startTokenizer().

◆ yyMayCapture

bool QRegExpEngine::yyMayCapture
private

Definition at line 1296 of file qregexp.cpp.

Referenced by parse(), and parseFactor().

◆ yyMinRep

int QRegExpEngine::yyMinRep
private

Definition at line 1282 of file qregexp.cpp.

Referenced by getToken(), parseFactor(), and startTokenizer().

◆ yyPos

int QRegExpEngine::yyPos
private

Definition at line 1278 of file qregexp.cpp.

Referenced by getChar(), getToken(), parseAtom(), parseFactor(), skipChars(), and startTokenizer().

◆ yyPos0

int QRegExpEngine::yyPos0
private

Definition at line 1277 of file qregexp.cpp.

Referenced by getToken(), parse(), parseFactor(), and startTokenizer().

◆ yyTok

int QRegExpEngine::yyTok
private

Definition at line 1295 of file qregexp.cpp.

Referenced by parse(), parseAtom(), parseExpression(), parseFactor(), and parseTerm().


The documentation for this class was generated from the following file: