Qt 4.8
qregexp.cpp
Go to the documentation of this file.
1 /****************************************************************************
2 **
3 ** Copyright (C) 2014 Digia Plc and/or its subsidiary(-ies).
4 ** Contact: http://www.qt-project.org/legal
5 **
6 ** This file is part of the QtCore module of the Qt Toolkit.
7 **
8 ** $QT_BEGIN_LICENSE:LGPL$
9 ** Commercial License Usage
10 ** Licensees holding valid commercial Qt licenses may use this file in
11 ** accordance with the commercial license agreement provided with the
12 ** Software or, alternatively, in accordance with the terms contained in
13 ** a written agreement between you and Digia. For licensing terms and
14 ** conditions see http://qt.digia.com/licensing. For further information
15 ** use the contact form at http://qt.digia.com/contact-us.
16 **
17 ** GNU Lesser General Public License Usage
18 ** Alternatively, this file may be used under the terms of the GNU Lesser
19 ** General Public License version 2.1 as published by the Free Software
20 ** Foundation and appearing in the file LICENSE.LGPL included in the
21 ** packaging of this file. Please review the following information to
22 ** ensure the GNU Lesser General Public License version 2.1 requirements
23 ** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
24 **
25 ** In addition, as a special exception, Digia gives you certain additional
26 ** rights. These rights are described in the Digia Qt LGPL Exception
27 ** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
28 **
29 ** GNU General Public License Usage
30 ** Alternatively, this file may be used under the terms of the GNU
31 ** General Public License version 3.0 as published by the Free Software
32 ** Foundation and appearing in the file LICENSE.GPL included in the
33 ** packaging of this file. Please review the following information to
34 ** ensure the GNU General Public License version 3.0 requirements will be
35 ** met: http://www.gnu.org/copyleft/gpl.html.
36 **
37 **
38 ** $QT_END_LICENSE$
39 **
40 ****************************************************************************/
41 
42 #include "qregexp.h"
43 
44 #include "qalgorithms.h"
45 #include "qbitarray.h"
46 #include "qcache.h"
47 #include "qdatastream.h"
48 #include "qlist.h"
49 #include "qmap.h"
50 #include "qmutex.h"
51 #include "qstring.h"
52 #include "qstringlist.h"
53 #include "qstringmatcher.h"
54 #include "qvector.h"
55 #include "private/qfunctions_p.h"
56 
57 #include <limits.h>
58 
60 
61 int qFindString(const QChar *haystack, int haystackLen, int from,
62  const QChar *needle, int needleLen, Qt::CaseSensitivity cs);
63 
64 // error strings for the regexp parser
65 #define RXERR_OK QT_TRANSLATE_NOOP("QRegExp", "no error occurred")
66 #define RXERR_DISABLED QT_TRANSLATE_NOOP("QRegExp", "disabled feature used")
67 #define RXERR_CHARCLASS QT_TRANSLATE_NOOP("QRegExp", "bad char class syntax")
68 #define RXERR_LOOKAHEAD QT_TRANSLATE_NOOP("QRegExp", "bad lookahead syntax")
69 #define RXERR_LOOKBEHIND QT_TRANSLATE_NOOP("QRegExp", "lookbehinds not supported, see QTBUG-2371")
70 #define RXERR_REPETITION QT_TRANSLATE_NOOP("QRegExp", "bad repetition syntax")
71 #define RXERR_OCTAL QT_TRANSLATE_NOOP("QRegExp", "invalid octal value")
72 #define RXERR_LEFTDELIM QT_TRANSLATE_NOOP("QRegExp", "missing left delim")
73 #define RXERR_END QT_TRANSLATE_NOOP("QRegExp", "unexpected end")
74 #define RXERR_LIMIT QT_TRANSLATE_NOOP("QRegExp", "met internal limit")
75 #define RXERR_INTERVAL QT_TRANSLATE_NOOP("QRegExp", "invalid interval")
76 #define RXERR_CATEGORY QT_TRANSLATE_NOOP("QRegExp", "invalid category")
77 
694 #if defined(Q_OS_VXWORKS) && defined(EOS)
695 # undef EOS
696 #endif
697 
698 const int NumBadChars = 64;
699 #define BadChar(ch) ((ch).unicode() % NumBadChars)
700 
701 const int NoOccurrence = INT_MAX;
702 const int EmptyCapture = INT_MAX;
703 const int InftyLen = INT_MAX;
704 const int InftyRep = 1025;
705 const int EOS = -1;
706 
707 static bool isWord(QChar ch)
708 {
709  return ch.isLetterOrNumber() || ch.isMark() || ch == QLatin1Char('_');
710 }
711 
712 /*
713  Merges two vectors of ints and puts the result into the first
714  one.
715 */
716 static void mergeInto(QVector<int> *a, const QVector<int> &b)
717 {
718  int asize = a->size();
719  int bsize = b.size();
720  if (asize == 0) {
721  *a = b;
722 #ifndef QT_NO_REGEXP_OPTIM
723  } else if (bsize == 1 && a->at(asize - 1) < b.at(0)) {
724  a->resize(asize + 1);
725  (*a)[asize] = b.at(0);
726 #endif
727  } else if (bsize >= 1) {
728  int csize = asize + bsize;
729  QVector<int> c(csize);
730  int i = 0, j = 0, k = 0;
731  while (i < asize) {
732  if (j < bsize) {
733  if (a->at(i) == b.at(j)) {
734  ++i;
735  --csize;
736  } else if (a->at(i) < b.at(j)) {
737  c[k++] = a->at(i++);
738  } else {
739  c[k++] = b.at(j++);
740  }
741  } else {
742  memcpy(c.data() + k, a->constData() + i, (asize - i) * sizeof(int));
743  break;
744  }
745  }
746  c.resize(csize);
747  if (j < bsize)
748  memcpy(c.data() + k, b.constData() + j, (bsize - j) * sizeof(int));
749  *a = c;
750  }
751 }
752 
753 #ifndef QT_NO_REGEXP_WILDCARD
754 /*
755  Translates a wildcard pattern to an equivalent regular expression
756  pattern (e.g., *.cpp to .*\.cpp).
757 
758  If enableEscaping is true, it is possible to escape the wildcard
759  characters with \
760 */
761 static QString wc2rx(const QString &wc_str, const bool enableEscaping)
762 {
763  const int wclen = wc_str.length();
764  QString rx;
765  int i = 0;
766  bool isEscaping = false; // the previous character is '\'
767  const QChar *wc = wc_str.unicode();
768 
769  while (i < wclen) {
770  const QChar c = wc[i++];
771  switch (c.unicode()) {
772  case '\\':
773  if (enableEscaping) {
774  if (isEscaping) {
775  rx += QLatin1String("\\\\");
776  } // we insert the \\ later if necessary
777  if (i == wclen) { // the end
778  rx += QLatin1String("\\\\");
779  }
780  } else {
781  rx += QLatin1String("\\\\");
782  }
783  isEscaping = true;
784  break;
785  case '*':
786  if (isEscaping) {
787  rx += QLatin1String("\\*");
788  isEscaping = false;
789  } else {
790  rx += QLatin1String(".*");
791  }
792  break;
793  case '?':
794  if (isEscaping) {
795  rx += QLatin1String("\\?");
796  isEscaping = false;
797  } else {
798  rx += QLatin1Char('.');
799  }
800 
801  break;
802  case '$':
803  case '(':
804  case ')':
805  case '+':
806  case '.':
807  case '^':
808  case '{':
809  case '|':
810  case '}':
811  if (isEscaping) {
812  isEscaping = false;
813  rx += QLatin1String("\\\\");
814  }
815  rx += QLatin1Char('\\');
816  rx += c;
817  break;
818  case '[':
819  if (isEscaping) {
820  isEscaping = false;
821  rx += QLatin1String("\\[");
822  } else {
823  rx += c;
824  if (wc[i] == QLatin1Char('^'))
825  rx += wc[i++];
826  if (i < wclen) {
827  if (rx[i] == QLatin1Char(']'))
828  rx += wc[i++];
829  while (i < wclen && wc[i] != QLatin1Char(']')) {
830  if (wc[i] == QLatin1Char('\\'))
831  rx += QLatin1Char('\\');
832  rx += wc[i++];
833  }
834  }
835  }
836  break;
837 
838  case ']':
839  if(isEscaping){
840  isEscaping = false;
841  rx += QLatin1String("\\");
842  }
843  rx += c;
844  break;
845 
846  default:
847  if(isEscaping){
848  isEscaping = false;
849  rx += QLatin1String("\\\\");
850  }
851  rx += c;
852  }
853  }
854  return rx;
855 }
856 #endif
857 
858 static int caretIndex(int offset, QRegExp::CaretMode caretMode)
859 {
860  if (caretMode == QRegExp::CaretAtZero) {
861  return 0;
862  } else if (caretMode == QRegExp::CaretAtOffset) {
863  return offset;
864  } else { // QRegExp::CaretWontMatch
865  return -1;
866  }
867 }
868 
869 /*
870  The QRegExpEngineKey struct uniquely identifies an engine.
871 */
873 {
877 
878  inline QRegExpEngineKey(const QString &pattern, QRegExp::PatternSyntax patternSyntax,
880  : pattern(pattern), patternSyntax(patternSyntax), cs(cs) {}
881 
882  inline void clear() {
883  pattern.clear();
884  patternSyntax = QRegExp::RegExp;
885  cs = Qt::CaseSensitive;
886  }
887 };
888 
890 {
891  return key1.pattern == key2.pattern && key1.patternSyntax == key2.patternSyntax
892  && key1.cs == key2.cs;
893 }
894 
895 class QRegExpEngine;
896 
897 //Q_DECLARE_TYPEINFO(QVector<int>, Q_MOVABLE_TYPE);
898 
899 /*
900  This is the engine state during matching.
901 */
903 {
904  const QChar *in; // a pointer to the input string data
905  int pos; // the current position in the string
906  int caretPos;
907  int len; // the length of the input string
908  bool minimal; // minimal matching?
909  int *bigArray; // big array holding the data for the next pointers
910  int *inNextStack; // is state is nextStack?
911  int *curStack; // stack of current states
912  int *nextStack; // stack of next states
913  int *curCapBegin; // start of current states' captures
914  int *nextCapBegin; // start of next states' captures
915  int *curCapEnd; // end of current states' captures
916  int *nextCapEnd; // end of next states' captures
917  int *tempCapBegin; // start of temporary captures
918  int *tempCapEnd; // end of temporary captures
919  int *capBegin; // start of captures for a next state
920  int *capEnd; // end of captures for a next state
921  int *slideTab; // bump-along slide table for bad-character heuristic
922  int *captured; // what match() returned last
923  int slideTabSize; // size of slide table
925 #ifndef QT_NO_REGEXP_BACKREF
926  QList<QVector<int> > sleeping; // list of back-reference sleepers
927 #endif
928  int matchLen; // length of match
929  int oneTestMatchedLen; // length of partial match
930 
932 
933  inline QRegExpMatchState() : bigArray(0), captured(0) {}
934  inline ~QRegExpMatchState() { free(bigArray); }
935 
936  void drain() { free(bigArray); bigArray = 0; captured = 0; } // to save memory
937  void prepareForMatch(QRegExpEngine *eng);
938  void match(const QChar *str, int len, int pos, bool minimal,
939  bool oneTest, int caretIndex);
940  bool matchHere();
941  bool testAnchor(int i, int a, const int *capBegin);
942 };
943 
944 /*
945  The struct QRegExpAutomatonState represents one state in a modified NFA. The
946  input characters matched are stored in the state instead of on
947  the transitions, something possible for an automaton
948  constructed from a regular expression.
949 */
951 {
952 #ifndef QT_NO_REGEXP_CAPTURE
953  int atom; // which atom does this state belong to?
954 #endif
955  int match; // what does it match? (see CharClassBit and BackRefBit)
956  QVector<int> outs; // out-transitions
957  QMap<int, int> reenter; // atoms reentered when transiting out
958  QMap<int, int> anchors; // anchors met when transiting out
959 
961 #ifndef QT_NO_REGEXP_CAPTURE
962  inline QRegExpAutomatonState(int a, int m)
963  : atom(a), match(m) { }
964 #else
965  inline QRegExpAutomatonState(int m)
966  : match(m) { }
967 #endif
968 };
969 
971 
972 /*
973  The struct QRegExpCharClassRange represents a range of characters (e.g.,
974  [0-9] denotes range 48 to 57).
975 */
977 {
978  ushort from; // 48
979  ushort len; // 10
980 };
981 
983 
984 #ifndef QT_NO_REGEXP_CAPTURE
985 /*
986  The struct QRegExpAtom represents one node in the hierarchy of regular
987  expression atoms.
988 */
990 {
991  enum { NoCapture = -1, OfficialCapture = -2, UnofficialCapture = -3 };
992 
993  int parent; // index of parent in array of atoms
994  int capture; // index of capture, from 1 to ncap - 1
995 };
996 
998 #endif
999 
1000 struct QRegExpLookahead;
1001 
1002 #ifndef QT_NO_REGEXP_ANCHOR_ALT
1003 /*
1004  The struct QRegExpAnchorAlternation represents a pair of anchors with
1005  OR semantics.
1006 */
1008 {
1009  int a; // this anchor...
1010  int b; // ...or this one
1011 };
1012 
1014 #endif
1015 
1016 #ifndef QT_NO_REGEXP_CCLASS
1017 /*
1018  The class QRegExpCharClass represents a set of characters, such as can
1019  be found in regular expressions (e.g., [a-z] denotes the set
1020  {a, b, ..., z}).
1021 */
1023 {
1024 public:
1025  QRegExpCharClass();
1026  inline QRegExpCharClass(const QRegExpCharClass &cc) { operator=(cc); }
1027 
1028  QRegExpCharClass &operator=(const QRegExpCharClass &cc);
1029 
1030  void clear();
1031  bool negative() const { return n; }
1032  void setNegative(bool negative);
1033  void addCategories(int cats);
1034  void addRange(ushort from, ushort to);
1035  void addSingleton(ushort ch) { addRange(ch, ch); }
1036 
1037  bool in(QChar ch) const;
1038 #ifndef QT_NO_REGEXP_OPTIM
1039  const QVector<int> &firstOccurrence() const { return occ1; }
1040 #endif
1041 
1042 #if defined(QT_DEBUG)
1043  void dump() const;
1044 #endif
1045 
1046 private:
1047  int c; // character classes
1048  QVector<QRegExpCharClassRange> r; // character ranges
1049  bool n; // negative?
1050 #ifndef QT_NO_REGEXP_OPTIM
1051  QVector<int> occ1; // first-occurrence array
1052 #endif
1053 };
1054 #else
1055 struct QRegExpCharClass
1056 {
1057  int dummy;
1058 
1059 #ifndef QT_NO_REGEXP_OPTIM
1060  QRegExpCharClass() { occ1.fill(0, NumBadChars); }
1061 
1062  const QVector<int> &firstOccurrence() const { return occ1; }
1063  QVector<int> occ1;
1064 #endif
1065 };
1066 #endif
1067 
1069 
1070 /*
1071  The QRegExpEngine class encapsulates a modified nondeterministic
1072  finite automaton (NFA).
1073 */
1075 {
1076 public:
1077  QRegExpEngine(Qt::CaseSensitivity cs, bool greedyQuantifiers)
1078  : cs(cs), greedyQuantifiers(greedyQuantifiers) { setup(); }
1079 
1081  ~QRegExpEngine();
1082 
1083  bool isValid() const { return valid; }
1084  const QString &errorString() const { return yyError; }
1085  int captureCount() const { return officialncap; }
1086 
1087  int createState(QChar ch);
1088  int createState(const QRegExpCharClass &cc);
1089 #ifndef QT_NO_REGEXP_BACKREF
1090  int createState(int bref);
1091 #endif
1092 
1093  void addCatTransitions(const QVector<int> &from, const QVector<int> &to);
1094 #ifndef QT_NO_REGEXP_CAPTURE
1095  void addPlusTransitions(const QVector<int> &from, const QVector<int> &to, int atom);
1096 #endif
1097 
1098 #ifndef QT_NO_REGEXP_ANCHOR_ALT
1099  int anchorAlternation(int a, int b);
1100  int anchorConcatenation(int a, int b);
1101 #else
1102  int anchorAlternation(int a, int b) { return a & b; }
1103  int anchorConcatenation(int a, int b) { return a | b; }
1104 #endif
1105  void addAnchors(int from, int to, int a);
1106 
1107 #ifndef QT_NO_REGEXP_OPTIM
1108  void heuristicallyChooseHeuristic();
1109 #endif
1110 
1111 #if defined(QT_DEBUG)
1112  void dump() const;
1113 #endif
1114 
1116 
1117 private:
1118  enum { CharClassBit = 0x10000, BackRefBit = 0x20000 };
1119  enum { InitialState = 0, FinalState = 1 };
1120 
1121  void setup();
1122  int setupState(int match);
1123 
1124  /*
1125  Let's hope that 13 lookaheads and 14 back-references are
1126  enough.
1127  */
1128  enum { MaxLookaheads = 13, MaxBackRefs = 14 };
1129  enum { Anchor_Dollar = 0x00000001, Anchor_Caret = 0x00000002, Anchor_Word = 0x00000004,
1130  Anchor_NonWord = 0x00000008, Anchor_FirstLookahead = 0x00000010,
1131  Anchor_BackRef1Empty = Anchor_FirstLookahead << MaxLookaheads,
1132  Anchor_BackRef0Empty = Anchor_BackRef1Empty >> 1,
1133  Anchor_Alternation = unsigned(Anchor_BackRef1Empty) << MaxBackRefs,
1134 
1135  Anchor_LookaheadMask = (Anchor_FirstLookahead - 1) ^
1136  ((Anchor_FirstLookahead << MaxLookaheads) - 1) };
1137 #ifndef QT_NO_REGEXP_CAPTURE
1138  int startAtom(bool officialCapture);
1139  void finishAtom(int atom, bool needCapture);
1140 #endif
1141 
1142 #ifndef QT_NO_REGEXP_LOOKAHEAD
1143  int addLookahead(QRegExpEngine *eng, bool negative);
1144 #endif
1145 
1146 #ifndef QT_NO_REGEXP_OPTIM
1147  bool goodStringMatch(QRegExpMatchState &matchState) const;
1148  bool badCharMatch(QRegExpMatchState &matchState) const;
1149 #else
1150  bool bruteMatch(QRegExpMatchState &matchState) const;
1151 #endif
1152 
1153  QVector<QRegExpAutomatonState> s; // array of states
1154 #ifndef QT_NO_REGEXP_CAPTURE
1155  QVector<QRegExpAtom> f; // atom hierarchy
1156  int nf; // number of atoms
1157  int cf; // current atom
1159 #endif
1160  int officialncap; // number of captures, seen from the outside
1161  int ncap; // number of captures, seen from the inside
1162 #ifndef QT_NO_REGEXP_CCLASS
1163  QVector<QRegExpCharClass> cl; // array of character classes
1164 #endif
1165 #ifndef QT_NO_REGEXP_LOOKAHEAD
1166  QVector<QRegExpLookahead *> ahead; // array of lookaheads
1167 #endif
1168 #ifndef QT_NO_REGEXP_ANCHOR_ALT
1169  QVector<QRegExpAnchorAlternation> aa; // array of (a, b) pairs of anchors
1170 #endif
1171 #ifndef QT_NO_REGEXP_OPTIM
1172  bool caretAnchored; // does the regexp start with ^?
1173  bool trivial; // is the good-string all that needs to match?
1174 #endif
1175  bool valid; // is the regular expression valid?
1176  Qt::CaseSensitivity cs; // case sensitive?
1177  bool greedyQuantifiers; // RegExp2?
1179 #ifndef QT_NO_REGEXP_BACKREF
1180  int nbrefs; // number of back-references
1181 #endif
1182 
1183 #ifndef QT_NO_REGEXP_OPTIM
1184  bool useGoodStringHeuristic; // use goodStringMatch? otherwise badCharMatch
1185 
1186  int goodEarlyStart; // the index where goodStr can first occur in a match
1187  int goodLateStart; // the index where goodStr can last occur in a match
1188  QString goodStr; // the string that any match has to contain
1189 
1190  int minl; // the minimum length of a match
1191  QVector<int> occ1; // first-occurrence array
1192 #endif
1193 
1194  /*
1195  The class Box is an abstraction for a regular expression
1196  fragment. It can also be seen as one node in the syntax tree of
1197  a regular expression with synthetized attributes.
1198 
1199  Its interface is ugly for performance reasons.
1200  */
1201  class Box
1202  {
1203  public:
1204  Box(QRegExpEngine *engine);
1205  Box(const Box &b) { operator=(b); }
1206 
1207  Box &operator=(const Box &b);
1208 
1209  void clear() { operator=(Box(eng)); }
1210  void set(QChar ch);
1211  void set(const QRegExpCharClass &cc);
1212 #ifndef QT_NO_REGEXP_BACKREF
1213  void set(int bref);
1214 #endif
1215 
1216  void cat(const Box &b);
1217  void orx(const Box &b);
1218  void plus(int atom);
1219  void opt();
1220  void catAnchor(int a);
1221 #ifndef QT_NO_REGEXP_OPTIM
1222  void setupHeuristics();
1223 #endif
1224 
1225 #if defined(QT_DEBUG)
1226  void dump() const;
1227 #endif
1228 
1229  private:
1230  void addAnchorsToEngine(const Box &to) const;
1231 
1232  QRegExpEngine *eng; // the automaton under construction
1233  QVector<int> ls; // the left states (firstpos)
1234  QVector<int> rs; // the right states (lastpos)
1235  QMap<int, int> lanchors; // the left anchors
1236  QMap<int, int> ranchors; // the right anchors
1237  int skipanchors; // the anchors to match if the box is skipped
1238 
1239 #ifndef QT_NO_REGEXP_OPTIM
1240  int earlyStart; // the index where str can first occur
1241  int lateStart; // the index where str can last occur
1242  QString str; // a string that has to occur in any match
1243  QString leftStr; // a string occurring at the left of this box
1244  QString rightStr; // a string occurring at the right of this box
1245  int maxl; // the maximum length of this box (possibly InftyLen)
1246 #endif
1247 
1248  int minl; // the minimum length of this box
1249 #ifndef QT_NO_REGEXP_OPTIM
1250  QVector<int> occ1; // first-occurrence array
1251 #endif
1252  };
1253 
1254  friend class Box;
1255 
1256  void setupCategoriesRangeMap();
1257 
1258  /*
1259  This is the lexical analyzer for regular expressions.
1260  */
1261  enum { Tok_Eos, Tok_Dollar, Tok_LeftParen, Tok_MagicLeftParen, Tok_PosLookahead,
1262  Tok_NegLookahead, Tok_RightParen, Tok_CharClass, Tok_Caret, Tok_Quantifier, Tok_Bar,
1263  Tok_Word, Tok_NonWord, Tok_Char = 0x10000, Tok_BackRef = 0x20000 };
1264  int getChar();
1265  int getEscape();
1266 #ifndef QT_NO_REGEXP_INTERVAL
1267  int getRep(int def);
1268 #endif
1269 #ifndef QT_NO_REGEXP_LOOKAHEAD
1270  void skipChars(int n);
1271 #endif
1272  void error(const char *msg);
1273  void startTokenizer(const QChar *rx, int len);
1274  int getToken();
1275 
1276  const QChar *yyIn; // a pointer to the input regular expression pattern
1277  int yyPos0; // the position of yyTok in the input pattern
1278  int yyPos; // the position of the next character to read
1279  int yyLen; // the length of yyIn
1280  int yyCh; // the last character read
1281  QScopedPointer<QRegExpCharClass> yyCharClass; // attribute for Tok_CharClass tokens
1282  int yyMinRep; // attribute for Tok_Quantifier
1283  int yyMaxRep; // ditto
1284  QString yyError; // syntax error or overflow during parsing?
1285 
1286  /*
1287  This is the syntactic analyzer for regular expressions.
1288  */
1289  int parse(const QChar *rx, int len);
1290  void parseAtom(Box *box);
1291  void parseFactor(Box *box);
1292  void parseTerm(Box *box);
1293  void parseExpression(Box *box);
1294 
1295  int yyTok; // the last token read
1296  bool yyMayCapture; // set this to false to disable capturing
1297  QHash<QByteArray, QPair<int, int> > categoriesRangeMap; // fast lookup hash for xml schema extensions
1298 
1299  friend struct QRegExpMatchState;
1300 };
1301 
1302 #ifndef QT_NO_REGEXP_LOOKAHEAD
1303 /*
1304  The struct QRegExpLookahead represents a lookahead a la Perl (e.g.,
1305  (?=foo) and (?!bar)).
1306 */
1308 {
1309  QRegExpEngine *eng; // NFA representing the embedded regular expression
1310  bool neg; // negative lookahead?
1311 
1312  inline QRegExpLookahead(QRegExpEngine *eng0, bool neg0)
1313  : eng(eng0), neg(neg0) { }
1314  inline ~QRegExpLookahead() { delete eng; }
1315 };
1316 #endif
1317 
1324 {
1325  switch (patternSyntax) {
1326 #ifndef QT_NO_REGEXP_WILDCARD
1327  case QRegExp::Wildcard:
1328  return wc2rx(pattern, false);
1329  break;
1330  case QRegExp::WildcardUnix:
1331  return wc2rx(pattern, true);
1332  break;
1333 #endif
1334  case QRegExp::FixedString:
1335  return QRegExp::escape(pattern);
1336  break;
1338  default:
1339  return pattern;
1340  }
1341 }
1342 
1344  : cs(key.cs), greedyQuantifiers(key.patternSyntax == QRegExp::RegExp2),
1345  xmlSchemaExtensions(key.patternSyntax == QRegExp::W3CXmlSchema11)
1346 {
1347  setup();
1348 
1350 
1351  valid = (parse(rx.unicode(), rx.length()) == rx.length());
1352  if (!valid) {
1353 #ifndef QT_NO_REGEXP_OPTIM
1354  trivial = false;
1355 #endif
1357  }
1358 }
1359 
1361 {
1362 #ifndef QT_NO_REGEXP_LOOKAHEAD
1363  qDeleteAll(ahead);
1364 #endif
1365 }
1366 
1368 {
1369  /*
1370  We use one QVector<int> for all the big data used a lot in
1371  matchHere() and friends.
1372  */
1373  int ns = eng->s.size(); // number of states
1374  int ncap = eng->ncap;
1375 #ifndef QT_NO_REGEXP_OPTIM
1376  int newSlideTabSize = qMax(eng->minl + 1, 16);
1377 #else
1378  int newSlideTabSize = 0;
1379 #endif
1380  int numCaptures = eng->captureCount();
1381  int newCapturedSize = 2 + 2 * numCaptures;
1382  bigArray = q_check_ptr((int *)realloc(bigArray, ((3 + 4 * ncap) * ns + 4 * ncap + newSlideTabSize + newCapturedSize)*sizeof(int)));
1383 
1384  // set all internal variables only _after_ bigArray is realloc'ed
1385  // to prevent a broken regexp in oom case
1386 
1387  slideTabSize = newSlideTabSize;
1388  capturedSize = newCapturedSize;
1389  inNextStack = bigArray;
1390  memset(inNextStack, -1, ns * sizeof(int));
1391  curStack = inNextStack + ns;
1392  nextStack = inNextStack + 2 * ns;
1393 
1394  curCapBegin = inNextStack + 3 * ns;
1395  nextCapBegin = curCapBegin + ncap * ns;
1396  curCapEnd = curCapBegin + 2 * ncap * ns;
1397  nextCapEnd = curCapBegin + 3 * ncap * ns;
1398 
1399  tempCapBegin = curCapBegin + 4 * ncap * ns;
1400  tempCapEnd = tempCapBegin + ncap;
1401  capBegin = tempCapBegin + 2 * ncap;
1402  capEnd = tempCapBegin + 3 * ncap;
1403 
1404  slideTab = tempCapBegin + 4 * ncap;
1405  captured = slideTab + slideTabSize;
1406  memset(captured, -1, capturedSize*sizeof(int));
1407  this->eng = eng;
1408 }
1409 
1410 /*
1411  Tries to match in str and returns an array of (begin, length) pairs
1412  for captured text. If there is no match, all pairs are (-1, -1).
1413 */
1414 void QRegExpMatchState::match(const QChar *str0, int len0, int pos0,
1415  bool minimal0, bool oneTest, int caretIndex)
1416 {
1417  bool matched = false;
1418  QChar char_null;
1419 
1420 #ifndef QT_NO_REGEXP_OPTIM
1421  if (eng->trivial && !oneTest) {
1422  pos = qFindString(str0, len0, pos0, eng->goodStr.unicode(), eng->goodStr.length(), eng->cs);
1423  matchLen = eng->goodStr.length();
1424  matched = (pos != -1);
1425  } else
1426 #endif
1427  {
1428  in = str0;
1429  if (in == 0)
1430  in = &char_null;
1431  pos = pos0;
1432  caretPos = caretIndex;
1433  len = len0;
1434  minimal = minimal0;
1435  matchLen = 0;
1436  oneTestMatchedLen = 0;
1437 
1438  if (eng->valid && pos >= 0 && pos <= len) {
1439 #ifndef QT_NO_REGEXP_OPTIM
1440  if (oneTest) {
1441  matched = matchHere();
1442  } else {
1443  if (pos <= len - eng->minl) {
1444  if (eng->caretAnchored) {
1445  matched = matchHere();
1446  } else if (eng->useGoodStringHeuristic) {
1447  matched = eng->goodStringMatch(*this);
1448  } else {
1449  matched = eng->badCharMatch(*this);
1450  }
1451  }
1452  }
1453 #else
1454  matched = oneTest ? matchHere() : eng->bruteMatch(*this);
1455 #endif
1456  }
1457  }
1458 
1459  if (matched) {
1460  int *c = captured;
1461  *c++ = pos;
1462  *c++ = matchLen;
1463 
1464  int numCaptures = (capturedSize - 2) >> 1;
1465 #ifndef QT_NO_REGEXP_CAPTURE
1466  for (int i = 0; i < numCaptures; ++i) {
1467  int j = eng->captureForOfficialCapture.at(i);
1468  if (capBegin[j] != EmptyCapture) {
1469  int len = capEnd[j] - capBegin[j];
1470  *c++ = (len > 0) ? pos + capBegin[j] : 0;
1471  *c++ = len;
1472  } else {
1473  *c++ = -1;
1474  *c++ = -1;
1475  }
1476  }
1477 #endif
1478  } else {
1479  // we rely on 2's complement here
1480  memset(captured, -1, capturedSize * sizeof(int));
1481  }
1482 }
1483 
1484 /*
1485  The three following functions add one state to the automaton and
1486  return the number of the state.
1487 */
1488 
1490 {
1491  return setupState(ch.unicode());
1492 }
1493 
1495 {
1496 #ifndef QT_NO_REGEXP_CCLASS
1497  int n = cl.size();
1498  cl += QRegExpCharClass(cc);
1499  return setupState(CharClassBit | n);
1500 #else
1501  Q_UNUSED(cc);
1502  return setupState(CharClassBit);
1503 #endif
1504 }
1505 
1506 #ifndef QT_NO_REGEXP_BACKREF
1508 {
1509  if (bref > nbrefs) {
1510  nbrefs = bref;
1511  if (nbrefs > MaxBackRefs) {
1512  error(RXERR_LIMIT);
1513  return 0;
1514  }
1515  }
1516  return setupState(BackRefBit | bref);
1517 }
1518 #endif
1519 
1520 /*
1521  The two following functions add a transition between all pairs of
1522  states (i, j) where i is found in from, and j is found in to.
1523 
1524  Cat-transitions are distinguished from plus-transitions for
1525  capturing.
1526 */
1527 
1529 {
1530  for (int i = 0; i < from.size(); i++)
1531  mergeInto(&s[from.at(i)].outs, to);
1532 }
1533 
1534 #ifndef QT_NO_REGEXP_CAPTURE
1535 void QRegExpEngine::addPlusTransitions(const QVector<int> &from, const QVector<int> &to, int atom)
1536 {
1537  for (int i = 0; i < from.size(); i++) {
1538  QRegExpAutomatonState &st = s[from.at(i)];
1539  const QVector<int> oldOuts = st.outs;
1540  mergeInto(&st.outs, to);
1541  if (f.at(atom).capture != QRegExpAtom::NoCapture) {
1542  for (int j = 0; j < to.size(); j++) {
1543  // ### st.reenter.contains(to.at(j)) check looks suspicious
1544  if (!st.reenter.contains(to.at(j)) &&
1545  qBinaryFind(oldOuts.constBegin(), oldOuts.constEnd(), to.at(j)) == oldOuts.end())
1546  st.reenter.insert(to.at(j), atom);
1547  }
1548  }
1549  }
1550 }
1551 #endif
1552 
1553 #ifndef QT_NO_REGEXP_ANCHOR_ALT
1554 /*
1555  Returns an anchor that means a OR b.
1556 */
1558 {
1559  if (((a & b) == a || (a & b) == b) && ((a | b) & Anchor_Alternation) == 0)
1560  return a & b;
1561 
1562  int n = aa.size();
1563 #ifndef QT_NO_REGEXP_OPTIM
1564  if (n > 0 && aa.at(n - 1).a == a && aa.at(n - 1).b == b)
1565  return Anchor_Alternation | (n - 1);
1566 #endif
1567 
1568  QRegExpAnchorAlternation element = {a, b};
1569  aa.append(element);
1570  return Anchor_Alternation | n;
1571 }
1572 
1573 /*
1574  Returns an anchor that means a AND b.
1575 */
1577 {
1578  if (((a | b) & Anchor_Alternation) == 0)
1579  return a | b;
1580  if ((b & Anchor_Alternation) != 0)
1581  qSwap(a, b);
1582 
1583  int aprime = anchorConcatenation(aa.at(a ^ Anchor_Alternation).a, b);
1584  int bprime = anchorConcatenation(aa.at(a ^ Anchor_Alternation).b, b);
1585  return anchorAlternation(aprime, bprime);
1586 }
1587 #endif
1588 
1589 /*
1590  Adds anchor a on a transition caracterised by its from state and
1591  its to state.
1592 */
1593 void QRegExpEngine::addAnchors(int from, int to, int a)
1594 {
1595  QRegExpAutomatonState &st = s[from];
1596  if (st.anchors.contains(to))
1597  a = anchorAlternation(st.anchors.value(to), a);
1598  st.anchors.insert(to, a);
1599 }
1600 
1601 #ifndef QT_NO_REGEXP_OPTIM
1602 /*
1603  This function chooses between the good-string and the bad-character
1604  heuristics. It computes two scores and chooses the heuristic with
1605  the highest score.
1606 
1607  Here are some common-sense constraints on the scores that should be
1608  respected if the formulas are ever modified: (1) If goodStr is
1609  empty, the good-string heuristic scores 0. (2) If the regular
1610  expression is trivial, the good-string heuristic should be used.
1611  (3) If the search is case insensitive, the good-string heuristic
1612  should be used, unless it scores 0. (Case insensitivity turns all
1613  entries of occ1 to 0.) (4) If (goodLateStart - goodEarlyStart) is
1614  big, the good-string heuristic should score less.
1615 */
1617 {
1618  if (minl == 0) {
1619  useGoodStringHeuristic = false;
1620  } else if (trivial) {
1621  useGoodStringHeuristic = true;
1622  } else {
1623  /*
1624  Magic formula: The good string has to constitute a good
1625  proportion of the minimum-length string, and appear at a
1626  more-or-less known index.
1627  */
1628  int goodStringScore = (64 * goodStr.length() / minl) -
1630  /*
1631  Less magic formula: We pick some characters at random, and
1632  check whether they are good or bad.
1633  */
1634  int badCharScore = 0;
1635  int step = qMax(1, NumBadChars / 32);
1636  for (int i = 1; i < NumBadChars; i += step) {
1637  if (occ1.at(i) == NoOccurrence)
1638  badCharScore += minl;
1639  else
1640  badCharScore += occ1.at(i);
1641  }
1642  badCharScore /= minl;
1643  useGoodStringHeuristic = (goodStringScore > badCharScore);
1644  }
1645 }
1646 #endif
1647 
1648 #if defined(QT_DEBUG)
1650 {
1651  int i, j;
1652  qDebug("Case %ssensitive engine", cs ? "" : "in");
1653  qDebug(" States");
1654  for (i = 0; i < s.size(); i++) {
1655  qDebug(" %d%s", i, i == InitialState ? " (initial)" : i == FinalState ? " (final)" : "");
1656 #ifndef QT_NO_REGEXP_CAPTURE
1657  if (nf > 0)
1658  qDebug(" in atom %d", s[i].atom);
1659 #endif
1660  int m = s[i].match;
1661  if ((m & CharClassBit) != 0) {
1662  qDebug(" match character class %d", m ^ CharClassBit);
1663 #ifndef QT_NO_REGEXP_CCLASS
1664  cl[m ^ CharClassBit].dump();
1665 #else
1666  qDebug(" negative character class");
1667 #endif
1668  } else if ((m & BackRefBit) != 0) {
1669  qDebug(" match back-reference %d", m ^ BackRefBit);
1670  } else if (m >= 0x20 && m <= 0x7e) {
1671  qDebug(" match 0x%.4x (%c)", m, m);
1672  } else {
1673  qDebug(" match 0x%.4x", m);
1674  }
1675  for (j = 0; j < s[i].outs.size(); j++) {
1676  int next = s[i].outs[j];
1677  qDebug(" -> %d", next);
1678  if (s[i].reenter.contains(next))
1679  qDebug(" [reenter %d]", s[i].reenter[next]);
1680  if (s[i].anchors.value(next) != 0)
1681  qDebug(" [anchors 0x%.8x]", s[i].anchors[next]);
1682  }
1683  }
1684 #ifndef QT_NO_REGEXP_CAPTURE
1685  if (nf > 0) {
1686  qDebug(" Atom Parent Capture");
1687  for (i = 0; i < nf; i++) {
1688  if (f[i].capture == QRegExpAtom::NoCapture) {
1689  qDebug(" %6d %6d nil", i, f[i].parent);
1690  } else {
1691  int cap = f[i].capture;
1692  bool official = captureForOfficialCapture.contains(cap);
1693  qDebug(" %6d %6d %6d %s", i, f[i].parent, f[i].capture,
1694  official ? "official" : "");
1695  }
1696  }
1697  }
1698 #endif
1699 #ifndef QT_NO_REGEXP_ANCHOR_ALT
1700  for (i = 0; i < aa.size(); i++)
1701  qDebug(" Anchor alternation 0x%.8x: 0x%.8x 0x%.9x", i, aa[i].a, aa[i].b);
1702 #endif
1703 }
1704 #endif
1705 
1707 {
1708  ref = 1;
1709 #ifndef QT_NO_REGEXP_CAPTURE
1710  f.resize(32);
1711  nf = 0;
1712  cf = -1;
1713 #endif
1714  officialncap = 0;
1715  ncap = 0;
1716 #ifndef QT_NO_REGEXP_OPTIM
1717  caretAnchored = true;
1718  trivial = true;
1719 #endif
1720  valid = false;
1721 #ifndef QT_NO_REGEXP_BACKREF
1722  nbrefs = 0;
1723 #endif
1724 #ifndef QT_NO_REGEXP_OPTIM
1725  useGoodStringHeuristic = true;
1726  minl = 0;
1727  occ1.fill(0, NumBadChars);
1728 #endif
1729 }
1730 
1732 {
1733 #ifndef QT_NO_REGEXP_CAPTURE
1734  s += QRegExpAutomatonState(cf, match);
1735 #else
1736  s += QRegExpAutomatonState(match);
1737 #endif
1738  return s.size() - 1;
1739 }
1740 
1741 #ifndef QT_NO_REGEXP_CAPTURE
1742 /*
1743  Functions startAtom() and finishAtom() should be called to delimit
1744  atoms. When a state is created, it is assigned to the current atom.
1745  The information is later used for capturing.
1746 */
1747 int QRegExpEngine::startAtom(bool officialCapture)
1748 {
1749  if ((nf & (nf + 1)) == 0 && nf + 1 >= f.size())
1750  f.resize((nf + 1) << 1);
1751  f[nf].parent = cf;
1752  cf = nf++;
1753  f[cf].capture = officialCapture ? QRegExpAtom::OfficialCapture : QRegExpAtom::NoCapture;
1754  return cf;
1755 }
1756 
1757 void QRegExpEngine::finishAtom(int atom, bool needCapture)
1758 {
1759  if (greedyQuantifiers && needCapture && f[atom].capture == QRegExpAtom::NoCapture)
1760  f[atom].capture = QRegExpAtom::UnofficialCapture;
1761  cf = f.at(atom).parent;
1762 }
1763 #endif
1764 
1765 #ifndef QT_NO_REGEXP_LOOKAHEAD
1766 /*
1767  Creates a lookahead anchor.
1768 */
1770 {
1771  int n = ahead.size();
1772  if (n == MaxLookaheads) {
1773  error(RXERR_LIMIT);
1774  return 0;
1775  }
1776  ahead += new QRegExpLookahead(eng, negative);
1777  return Anchor_FirstLookahead << n;
1778 }
1779 #endif
1780 
1781 #ifndef QT_NO_REGEXP_CAPTURE
1782 /*
1783  We want the longest leftmost captures.
1784 */
1785 static bool isBetterCapture(int ncap, const int *begin1, const int *end1, const int *begin2,
1786  const int *end2)
1787 {
1788  for (int i = 0; i < ncap; i++) {
1789  int delta = begin2[i] - begin1[i]; // it has to start early...
1790  if (delta == 0)
1791  delta = end1[i] - end2[i]; // ...and end late
1792 
1793  if (delta != 0)
1794  return delta > 0;
1795  }
1796  return false;
1797 }
1798 #endif
1799 
1800 /*
1801  Returns true if anchor a matches at position pos + i in the input
1802  string, otherwise false.
1803 */
1804 bool QRegExpMatchState::testAnchor(int i, int a, const int *capBegin)
1805 {
1806  int j;
1807 
1808 #ifndef QT_NO_REGEXP_ANCHOR_ALT
1809  if ((a & QRegExpEngine::Anchor_Alternation) != 0)
1810  return testAnchor(i, eng->aa.at(a ^ QRegExpEngine::Anchor_Alternation).a, capBegin)
1811  || testAnchor(i, eng->aa.at(a ^ QRegExpEngine::Anchor_Alternation).b, capBegin);
1812 #endif
1813 
1814  if ((a & QRegExpEngine::Anchor_Caret) != 0) {
1815  if (pos + i != caretPos)
1816  return false;
1817  }
1818  if ((a & QRegExpEngine::Anchor_Dollar) != 0) {
1819  if (pos + i != len)
1820  return false;
1821  }
1822 #ifndef QT_NO_REGEXP_ESCAPE
1824  bool before = false;
1825  bool after = false;
1826  if (pos + i != 0)
1827  before = isWord(in[pos + i - 1]);
1828  if (pos + i != len)
1829  after = isWord(in[pos + i]);
1830  if ((a & QRegExpEngine::Anchor_Word) != 0 && (before == after))
1831  return false;
1832  if ((a & QRegExpEngine::Anchor_NonWord) != 0 && (before != after))
1833  return false;
1834  }
1835 #endif
1836 #ifndef QT_NO_REGEXP_LOOKAHEAD
1837  if ((a & QRegExpEngine::Anchor_LookaheadMask) != 0) {
1838  const QVector<QRegExpLookahead *> &ahead = eng->ahead;
1839  for (j = 0; j < ahead.size(); j++) {
1840  if ((a & (QRegExpEngine::Anchor_FirstLookahead << j)) != 0) {
1841  QRegExpMatchState matchState;
1842  matchState.prepareForMatch(ahead[j]->eng);
1843  matchState.match(in + pos + i, len - pos - i, 0,
1844  true, true, caretPos - pos - i);
1845  if ((matchState.captured[0] == 0) == ahead[j]->neg)
1846  return false;
1847  }
1848  }
1849  }
1850 #endif
1851 #ifndef QT_NO_REGEXP_CAPTURE
1852 #ifndef QT_NO_REGEXP_BACKREF
1853  for (j = 0; j < eng->nbrefs; j++) {
1854  if ((a & (QRegExpEngine::Anchor_BackRef1Empty << j)) != 0) {
1855  int i = eng->captureForOfficialCapture.at(j);
1856  if (capBegin[i] != EmptyCapture)
1857  return false;
1858  }
1859  }
1860 #endif
1861 #endif
1862  return true;
1863 }
1864 
1865 #ifndef QT_NO_REGEXP_OPTIM
1866 /*
1867  The three following functions are what Jeffrey Friedl would call
1868  transmissions (or bump-alongs). Using one or the other should make
1869  no difference except in performance.
1870 */
1871 
1873 {
1874  int k = matchState.pos + goodEarlyStart;
1875  QStringMatcher matcher(goodStr.unicode(), goodStr.length(), cs);
1876  while ((k = matcher.indexIn(matchState.in, matchState.len, k)) != -1) {
1877  int from = k - goodLateStart;
1878  int to = k - goodEarlyStart;
1879  if (from > matchState.pos)
1880  matchState.pos = from;
1881 
1882  while (matchState.pos <= to) {
1883  if (matchState.matchHere())
1884  return true;
1885  ++matchState.pos;
1886  }
1887  ++k;
1888  }
1889  return false;
1890 }
1891 
1893 {
1894  int slideHead = 0;
1895  int slideNext = 0;
1896  int i;
1897  int lastPos = matchState.len - minl;
1898  memset(matchState.slideTab, 0, matchState.slideTabSize * sizeof(int));
1899 
1900  /*
1901  Set up the slide table, used for the bad-character heuristic,
1902  using the table of first occurrence of each character.
1903  */
1904  for (i = 0; i < minl; i++) {
1905  int sk = occ1[BadChar(matchState.in[matchState.pos + i])];
1906  if (sk == NoOccurrence)
1907  sk = i + 1;
1908  if (sk > 0) {
1909  int k = i + 1 - sk;
1910  if (k < 0) {
1911  sk = i + 1;
1912  k = 0;
1913  }
1914  if (sk > matchState.slideTab[k])
1915  matchState.slideTab[k] = sk;
1916  }
1917  }
1918 
1919  if (matchState.pos > lastPos)
1920  return false;
1921 
1922  for (;;) {
1923  if (++slideNext >= matchState.slideTabSize)
1924  slideNext = 0;
1925  if (matchState.slideTab[slideHead] > 0) {
1926  if (matchState.slideTab[slideHead] - 1 > matchState.slideTab[slideNext])
1927  matchState.slideTab[slideNext] = matchState.slideTab[slideHead] - 1;
1928  matchState.slideTab[slideHead] = 0;
1929  } else {
1930  if (matchState.matchHere())
1931  return true;
1932  }
1933 
1934  if (matchState.pos == lastPos)
1935  break;
1936 
1937  /*
1938  Update the slide table. This code has much in common with
1939  the initialization code.
1940  */
1941  int sk = occ1[BadChar(matchState.in[matchState.pos + minl])];
1942  if (sk == NoOccurrence) {
1943  matchState.slideTab[slideNext] = minl;
1944  } else if (sk > 0) {
1945  int k = slideNext + minl - sk;
1946  if (k >= matchState.slideTabSize)
1947  k -= matchState.slideTabSize;
1948  if (sk > matchState.slideTab[k])
1949  matchState.slideTab[k] = sk;
1950  }
1951  slideHead = slideNext;
1952  ++matchState.pos;
1953  }
1954  return false;
1955 }
1956 #else
1957 bool QRegExpEngine::bruteMatch(QRegExpMatchState &matchState) const
1958 {
1959  while (matchState.pos <= matchState.len) {
1960  if (matchState.matchHere())
1961  return true;
1962  ++matchState.pos;
1963  }
1964  return false;
1965 }
1966 #endif
1967 
1968 /*
1969  Here's the core of the engine. It tries to do a match here and now.
1970 */
1972 {
1973  int ncur = 1, nnext = 0;
1974  int i = 0, j, k, m;
1975  bool stop = false;
1976 
1977  matchLen = -1;
1978  oneTestMatchedLen = -1;
1979  curStack[0] = QRegExpEngine::InitialState;
1980 
1981  int ncap = eng->ncap;
1982 #ifndef QT_NO_REGEXP_CAPTURE
1983  if (ncap > 0) {
1984  for (j = 0; j < ncap; j++) {
1985  curCapBegin[j] = EmptyCapture;
1986  curCapEnd[j] = EmptyCapture;
1987  }
1988  }
1989 #endif
1990 
1991 #ifndef QT_NO_REGEXP_BACKREF
1992  while ((ncur > 0 || !sleeping.isEmpty()) && i <= len - pos && !stop)
1993 #else
1994  while (ncur > 0 && i <= len - pos && !stop)
1995 #endif
1996  {
1997  int ch = (i < len - pos) ? in[pos + i].unicode() : 0;
1998  for (j = 0; j < ncur; j++) {
1999  int cur = curStack[j];
2000  const QRegExpAutomatonState &scur = eng->s.at(cur);
2001  const QVector<int> &outs = scur.outs;
2002  for (k = 0; k < outs.size(); k++) {
2003  int next = outs.at(k);
2004  const QRegExpAutomatonState &snext = eng->s.at(next);
2005  bool inside = true;
2006 #if !defined(QT_NO_REGEXP_BACKREF) && !defined(QT_NO_REGEXP_CAPTURE)
2007  int needSomeSleep = 0;
2008 #endif
2009 
2010  /*
2011  First, check if the anchors are anchored properly.
2012  */
2013  int a = scur.anchors.value(next);
2014  if (a != 0 && !testAnchor(i, a, curCapBegin + j * ncap))
2015  inside = false;
2016 
2017  /*
2018  If indeed they are, check if the input character is
2019  correct for this transition.
2020  */
2021  if (inside) {
2022  m = snext.match;
2024  if (eng->cs)
2025  inside = (m == ch);
2026  else
2027  inside = (QChar(m).toLower() == QChar(ch).toLower());
2028  } else if (next == QRegExpEngine::FinalState) {
2029  matchLen = i;
2030  stop = minimal;
2031  inside = true;
2032  } else if ((m & QRegExpEngine::CharClassBit) != 0) {
2033 #ifndef QT_NO_REGEXP_CCLASS
2034  const QRegExpCharClass &cc = eng->cl.at(m ^ QRegExpEngine::CharClassBit);
2035  if (eng->cs)
2036  inside = cc.in(ch);
2037  else if (cc.negative())
2038  inside = cc.in(QChar(ch).toLower()) &&
2039  cc.in(QChar(ch).toUpper());
2040  else
2041  inside = cc.in(QChar(ch).toLower()) ||
2042  cc.in(QChar(ch).toUpper());
2043 #endif
2044 #if !defined(QT_NO_REGEXP_BACKREF) && !defined(QT_NO_REGEXP_CAPTURE)
2045  } else { /* ((m & QRegExpEngine::BackRefBit) != 0) */
2046  int bref = m ^ QRegExpEngine::BackRefBit;
2047  int ell = j * ncap + eng->captureForOfficialCapture.at(bref - 1);
2048 
2049  inside = bref <= ncap && curCapBegin[ell] != EmptyCapture;
2050  if (inside) {
2051  if (eng->cs)
2052  inside = (in[pos + curCapBegin[ell]] == QChar(ch));
2053  else
2054  inside = (in[pos + curCapBegin[ell]].toLower()
2055  == QChar(ch).toLower());
2056  }
2057 
2058  if (inside) {
2059  int delta;
2060  if (curCapEnd[ell] == EmptyCapture)
2061  delta = i - curCapBegin[ell];
2062  else
2063  delta = curCapEnd[ell] - curCapBegin[ell];
2064 
2065  inside = (delta <= len - (pos + i));
2066  if (inside && delta > 1) {
2067  int n = 1;
2068  if (eng->cs) {
2069  while (n < delta) {
2070  if (in[pos + curCapBegin[ell] + n]
2071  != in[pos + i + n])
2072  break;
2073  ++n;
2074  }
2075  } else {
2076  while (n < delta) {
2077  QChar a = in[pos + curCapBegin[ell] + n];
2078  QChar b = in[pos + i + n];
2079  if (a.toLower() != b.toLower())
2080  break;
2081  ++n;
2082  }
2083  }
2084  inside = (n == delta);
2085  if (inside)
2086  needSomeSleep = delta - 1;
2087  }
2088  }
2089 #endif
2090  }
2091  }
2092 
2093  /*
2094  We must now update our data structures.
2095  */
2096  if (inside) {
2097 #ifndef QT_NO_REGEXP_CAPTURE
2098  int *capBegin, *capEnd;
2099 #endif
2100  /*
2101  If the next state was not encountered yet, all
2102  is fine.
2103  */
2104  if ((m = inNextStack[next]) == -1) {
2105  m = nnext++;
2106  nextStack[m] = next;
2107  inNextStack[next] = m;
2108 #ifndef QT_NO_REGEXP_CAPTURE
2109  capBegin = nextCapBegin + m * ncap;
2110  capEnd = nextCapEnd + m * ncap;
2111 
2112  /*
2113  Otherwise, we'll first maintain captures in
2114  temporary arrays, and decide at the end whether
2115  it's best to keep the previous capture zones or
2116  the new ones.
2117  */
2118  } else {
2119  capBegin = tempCapBegin;
2120  capEnd = tempCapEnd;
2121 #endif
2122  }
2123 
2124 #ifndef QT_NO_REGEXP_CAPTURE
2125  /*
2126  Updating the capture zones is much of a task.
2127  */
2128  if (ncap > 0) {
2129  memcpy(capBegin, curCapBegin + j * ncap, ncap * sizeof(int));
2130  memcpy(capEnd, curCapEnd + j * ncap, ncap * sizeof(int));
2131  int c = scur.atom, n = snext.atom;
2132  int p = -1, q = -1;
2133  int cap;
2134 
2135  /*
2136  Lemma 1. For any x in the range [0..nf), we
2137  have f[x].parent < x.
2138 
2139  Proof. By looking at startAtom(), it is
2140  clear that cf < nf holds all the time, and
2141  thus that f[nf].parent < nf.
2142  */
2143 
2144  /*
2145  If we are reentering an atom, we empty all
2146  capture zones inside it.
2147  */
2148  if ((q = scur.reenter.value(next)) != 0) {
2149  QBitArray b(eng->nf, false);
2150  b.setBit(q, true);
2151  for (int ell = q + 1; ell < eng->nf; ell++) {
2152  if (b.testBit(eng->f.at(ell).parent)) {
2153  b.setBit(ell, true);
2154  cap = eng->f.at(ell).capture;
2155  if (cap >= 0) {
2156  capBegin[cap] = EmptyCapture;
2157  capEnd[cap] = EmptyCapture;
2158  }
2159  }
2160  }
2161  p = eng->f.at(q).parent;
2162 
2163  /*
2164  Otherwise, close the capture zones we are
2165  leaving. We are leaving f[c].capture,
2166  f[f[c].parent].capture,
2167  f[f[f[c].parent].parent].capture, ...,
2168  until f[x].capture, with x such that
2169  f[x].parent is the youngest common ancestor
2170  for c and n.
2171 
2172  We go up along c's and n's ancestry until
2173  we find x.
2174  */
2175  } else {
2176  p = c;
2177  q = n;
2178  while (p != q) {
2179  if (p > q) {
2180  cap = eng->f.at(p).capture;
2181  if (cap >= 0) {
2182  if (capBegin[cap] == i) {
2183  capBegin[cap] = EmptyCapture;
2184  capEnd[cap] = EmptyCapture;
2185  } else {
2186  capEnd[cap] = i;
2187  }
2188  }
2189  p = eng->f.at(p).parent;
2190  } else {
2191  q = eng->f.at(q).parent;
2192  }
2193  }
2194  }
2195 
2196  /*
2197  In any case, we now open the capture zones
2198  we are entering. We work upwards from n
2199  until we reach p (the parent of the atom we
2200  reenter or the youngest common ancestor).
2201  */
2202  while (n > p) {
2203  cap = eng->f.at(n).capture;
2204  if (cap >= 0) {
2205  capBegin[cap] = i;
2206  capEnd[cap] = EmptyCapture;
2207  }
2208  n = eng->f.at(n).parent;
2209  }
2210  /*
2211  If the next state was already in
2212  nextStack, we must choose carefully which
2213  capture zones we want to keep.
2214  */
2215  if (capBegin == tempCapBegin &&
2216  isBetterCapture(ncap, capBegin, capEnd, nextCapBegin + m * ncap,
2217  nextCapEnd + m * ncap)) {
2218  memcpy(nextCapBegin + m * ncap, capBegin, ncap * sizeof(int));
2219  memcpy(nextCapEnd + m * ncap, capEnd, ncap * sizeof(int));
2220  }
2221  }
2222 #ifndef QT_NO_REGEXP_BACKREF
2223  /*
2224  We are done with updating the capture zones.
2225  It's now time to put the next state to sleep,
2226  if it needs to, and to remove it from
2227  nextStack.
2228  */
2229  if (needSomeSleep > 0) {
2230  QVector<int> zzZ(2 + 2 * ncap);
2231  zzZ[0] = i + needSomeSleep;
2232  zzZ[1] = next;
2233  if (ncap > 0) {
2234  memcpy(zzZ.data() + 2, capBegin, ncap * sizeof(int));
2235  memcpy(zzZ.data() + 2 + ncap, capEnd, ncap * sizeof(int));
2236  }
2237  inNextStack[nextStack[--nnext]] = -1;
2238  sleeping.append(zzZ);
2239  }
2240 #endif
2241 #endif
2242  }
2243  }
2244  }
2245 #ifndef QT_NO_REGEXP_CAPTURE
2246  /*
2247  If we reached the final state, hurray! Copy the captured
2248  zone.
2249  */
2250  if (ncap > 0 && (m = inNextStack[QRegExpEngine::FinalState]) != -1) {
2251  memcpy(capBegin, nextCapBegin + m * ncap, ncap * sizeof(int));
2252  memcpy(capEnd, nextCapEnd + m * ncap, ncap * sizeof(int));
2253  }
2254 #ifndef QT_NO_REGEXP_BACKREF
2255  /*
2256  It's time to wake up the sleepers.
2257  */
2258  j = 0;
2259  while (j < sleeping.count()) {
2260  if (sleeping.at(j)[0] == i) {
2261  const QVector<int> &zzZ = sleeping.at(j);
2262  int next = zzZ[1];
2263  const int *capBegin = zzZ.data() + 2;
2264  const int *capEnd = zzZ.data() + 2 + ncap;
2265  bool copyOver = true;
2266 
2267  if ((m = inNextStack[next]) == -1) {
2268  m = nnext++;
2269  nextStack[m] = next;
2270  inNextStack[next] = m;
2271  } else {
2272  copyOver = isBetterCapture(ncap, nextCapBegin + m * ncap, nextCapEnd + m * ncap,
2273  capBegin, capEnd);
2274  }
2275  if (copyOver) {
2276  memcpy(nextCapBegin + m * ncap, capBegin, ncap * sizeof(int));
2277  memcpy(nextCapEnd + m * ncap, capEnd, ncap * sizeof(int));
2278  }
2279 
2280  sleeping.removeAt(j);
2281  } else {
2282  ++j;
2283  }
2284  }
2285 #endif
2286 #endif
2287  for (j = 0; j < nnext; j++)
2288  inNextStack[nextStack[j]] = -1;
2289 
2290  // avoid needless iteration that confuses oneTestMatchedLen
2291  if (nnext == 1 && nextStack[0] == QRegExpEngine::FinalState
2292 #ifndef QT_NO_REGEXP_BACKREF
2293  && sleeping.isEmpty()
2294 #endif
2295  )
2296  stop = true;
2297 
2298  qSwap(curStack, nextStack);
2299 #ifndef QT_NO_REGEXP_CAPTURE
2300  qSwap(curCapBegin, nextCapBegin);
2301  qSwap(curCapEnd, nextCapEnd);
2302 #endif
2303  ncur = nnext;
2304  nnext = 0;
2305  ++i;
2306  }
2307 
2308 #ifndef QT_NO_REGEXP_BACKREF
2309  /*
2310  If minimal matching is enabled, we might have some sleepers
2311  left.
2312  */
2313  if (!sleeping.isEmpty())
2314  sleeping.clear();
2315 #endif
2316 
2317  oneTestMatchedLen = i - 1;
2318  return (matchLen >= 0);
2319 }
2320 
2321 #ifndef QT_NO_REGEXP_CCLASS
2322 
2324  : c(0), n(false)
2325 {
2326 #ifndef QT_NO_REGEXP_OPTIM
2328 #endif
2329 }
2330 
2332 {
2333  c = cc.c;
2334  r = cc.r;
2335  n = cc.n;
2336 #ifndef QT_NO_REGEXP_OPTIM
2337  occ1 = cc.occ1;
2338 #endif
2339  return *this;
2340 }
2341 
2343 {
2344  c = 0;
2345  r.resize(0);
2346  n = false;
2347 }
2348 
2350 {
2351  n = negative;
2352 #ifndef QT_NO_REGEXP_OPTIM
2353  occ1.fill(0, NumBadChars);
2354 #endif
2355 }
2356 
2358 {
2359  c |= cats;
2360 #ifndef QT_NO_REGEXP_OPTIM
2361  occ1.fill(0, NumBadChars);
2362 #endif
2363 }
2364 
2366 {
2367  if (from > to)
2368  qSwap(from, to);
2369  int m = r.size();
2370  r.resize(m + 1);
2371  r[m].from = from;
2372  r[m].len = to - from + 1;
2373 
2374 #ifndef QT_NO_REGEXP_OPTIM
2375  int i;
2376 
2377  if (to - from < NumBadChars) {
2378  if (from % NumBadChars <= to % NumBadChars) {
2379  for (i = from % NumBadChars; i <= to % NumBadChars; i++)
2380  occ1[i] = 0;
2381  } else {
2382  for (i = 0; i <= to % NumBadChars; i++)
2383  occ1[i] = 0;
2384  for (i = from % NumBadChars; i < NumBadChars; i++)
2385  occ1[i] = 0;
2386  }
2387  } else {
2388  occ1.fill(0, NumBadChars);
2389  }
2390 #endif
2391 }
2392 
2394 {
2395 #ifndef QT_NO_REGEXP_OPTIM
2396  if (occ1.at(BadChar(ch)) == NoOccurrence)
2397  return n;
2398 #endif
2399 
2400  if (c != 0 && (c & (1 << (int)ch.category())) != 0)
2401  return !n;
2402 
2403  const int uc = ch.unicode();
2404  int size = r.size();
2405 
2406  for (int i = 0; i < size; ++i) {
2407  const QRegExpCharClassRange &range = r.at(i);
2408  if (uint(uc - range.from) < uint(r.at(i).len))
2409  return !n;
2410  }
2411  return n;
2412 }
2413 
2414 #if defined(QT_DEBUG)
2416 {
2417  int i;
2418  qDebug(" %stive character class", n ? "nega" : "posi");
2419 #ifndef QT_NO_REGEXP_CCLASS
2420  if (c != 0)
2421  qDebug(" categories 0x%.8x", c);
2422 #endif
2423  for (i = 0; i < r.size(); i++)
2424  qDebug(" 0x%.4x through 0x%.4x", r[i].from, r[i].from + r[i].len - 1);
2425 }
2426 #endif
2427 #endif
2428 
2430  : eng(engine), skipanchors(0)
2431 #ifndef QT_NO_REGEXP_OPTIM
2432  , earlyStart(0), lateStart(0), maxl(0)
2433 #endif
2434 {
2435 #ifndef QT_NO_REGEXP_OPTIM
2437 #endif
2438  minl = 0;
2439 }
2440 
2442 {
2443  eng = b.eng;
2444  ls = b.ls;
2445  rs = b.rs;
2446  lanchors = b.lanchors;
2447  ranchors = b.ranchors;
2449 #ifndef QT_NO_REGEXP_OPTIM
2450  earlyStart = b.earlyStart;
2451  lateStart = b.lateStart;
2452  str = b.str;
2453  leftStr = b.leftStr;
2454  rightStr = b.rightStr;
2455  maxl = b.maxl;
2456  occ1 = b.occ1;
2457 #endif
2458  minl = b.minl;
2459  return *this;
2460 }
2461 
2463 {
2464  ls.resize(1);
2465  ls[0] = eng->createState(ch);
2466  rs = ls;
2467 #ifndef QT_NO_REGEXP_OPTIM
2468  str = ch;
2469  leftStr = ch;
2470  rightStr = ch;
2471  maxl = 1;
2472  occ1[BadChar(ch)] = 0;
2473 #endif
2474  minl = 1;
2475 }
2476 
2478 {
2479  ls.resize(1);
2480  ls[0] = eng->createState(cc);
2481  rs = ls;
2482 #ifndef QT_NO_REGEXP_OPTIM
2483  maxl = 1;
2484  occ1 = cc.firstOccurrence();
2485 #endif
2486  minl = 1;
2487 }
2488 
2489 #ifndef QT_NO_REGEXP_BACKREF
2491 {
2492  ls.resize(1);
2493  ls[0] = eng->createState(bref);
2494  rs = ls;
2495  if (bref >= 1 && bref <= MaxBackRefs)
2497 #ifndef QT_NO_REGEXP_OPTIM
2498  maxl = InftyLen;
2499 #endif
2500  minl = 0;
2501 }
2502 #endif
2503 
2505 {
2506  eng->addCatTransitions(rs, b.ls);
2507  addAnchorsToEngine(b);
2508  if (minl == 0) {
2509  lanchors.unite(b.lanchors);
2510  if (skipanchors != 0) {
2511  for (int i = 0; i < b.ls.size(); i++) {
2512  int a = eng->anchorConcatenation(lanchors.value(b.ls.at(i), 0), skipanchors);
2513  lanchors.insert(b.ls.at(i), a);
2514  }
2515  }
2516  mergeInto(&ls, b.ls);
2517  }
2518  if (b.minl == 0) {
2519  ranchors.unite(b.ranchors);
2520  if (b.skipanchors != 0) {
2521  for (int i = 0; i < rs.size(); i++) {
2522  int a = eng->anchorConcatenation(ranchors.value(rs.at(i), 0), b.skipanchors);
2523  ranchors.insert(rs.at(i), a);
2524  }
2525  }
2526  mergeInto(&rs, b.rs);
2527  } else {
2528  ranchors = b.ranchors;
2529  rs = b.rs;
2530  }
2531 
2532 #ifndef QT_NO_REGEXP_OPTIM
2533  if (maxl != InftyLen) {
2534  if (rightStr.length() + b.leftStr.length() >
2535  qMax(str.length(), b.str.length())) {
2537  lateStart = maxl - rightStr.length();
2538  str = rightStr + b.leftStr;
2539  } else if (b.str.length() > str.length()) {
2540  earlyStart = minl + b.earlyStart;
2541  lateStart = maxl + b.lateStart;
2542  str = b.str;
2543  }
2544  }
2545 
2546  if (leftStr.length() == maxl)
2547  leftStr += b.leftStr;
2548 
2549  if (b.rightStr.length() == b.maxl) {
2550  rightStr += b.rightStr;
2551  } else {
2552  rightStr = b.rightStr;
2553  }
2554 
2555  if (maxl == InftyLen || b.maxl == InftyLen) {
2556  maxl = InftyLen;
2557  } else {
2558  maxl += b.maxl;
2559  }
2560 
2561  for (int i = 0; i < NumBadChars; i++) {
2562  if (b.occ1.at(i) != NoOccurrence && minl + b.occ1.at(i) < occ1.at(i))
2563  occ1[i] = minl + b.occ1.at(i);
2564  }
2565 #endif
2566 
2567  minl += b.minl;
2568  if (minl == 0)
2570  else
2571  skipanchors = 0;
2572 }
2573 
2575 {
2576  mergeInto(&ls, b.ls);
2577  lanchors.unite(b.lanchors);
2578  mergeInto(&rs, b.rs);
2579  ranchors.unite(b.ranchors);
2580 
2581  if (b.minl == 0) {
2582  if (minl == 0)
2584  else
2586  }
2587 
2588 #ifndef QT_NO_REGEXP_OPTIM
2589  for (int i = 0; i < NumBadChars; i++) {
2590  if (occ1.at(i) > b.occ1.at(i))
2591  occ1[i] = b.occ1.at(i);
2592  }
2593  earlyStart = 0;
2594  lateStart = 0;
2595  str = QString();
2596  leftStr = QString();
2597  rightStr = QString();
2598  if (b.maxl > maxl)
2599  maxl = b.maxl;
2600 #endif
2601  if (b.minl < minl)
2602  minl = b.minl;
2603 }
2604 
2606 {
2607 #ifndef QT_NO_REGEXP_CAPTURE
2608  eng->addPlusTransitions(rs, ls, atom);
2609 #else
2610  Q_UNUSED(atom);
2612 #endif
2613  addAnchorsToEngine(*this);
2614 #ifndef QT_NO_REGEXP_OPTIM
2615  maxl = InftyLen;
2616 #endif
2617 }
2618 
2620 {
2621 #ifndef QT_NO_REGEXP_OPTIM
2622  earlyStart = 0;
2623  lateStart = 0;
2624  str = QString();
2625  leftStr = QString();
2626  rightStr = QString();
2627 #endif
2628  skipanchors = 0;
2629  minl = 0;
2630 }
2631 
2633 {
2634  if (a != 0) {
2635  for (int i = 0; i < rs.size(); i++) {
2636  a = eng->anchorConcatenation(ranchors.value(rs.at(i), 0), a);
2637  ranchors.insert(rs.at(i), a);
2638  }
2639  if (minl == 0)
2641  }
2642 }
2643 
2644 #ifndef QT_NO_REGEXP_OPTIM
2646 {
2649  eng->goodStr = eng->cs ? str : str.toLower();
2650 
2651  eng->minl = minl;
2652  if (eng->cs) {
2653  /*
2654  A regular expression such as 112|1 has occ1['2'] = 2 and minl =
2655  1 at this point. An entry of occ1 has to be at most minl or
2656  infinity for the rest of the algorithm to go well.
2657 
2658  We waited until here before normalizing these cases (instead of
2659  doing it in Box::orx()) because sometimes things improve by
2660  themselves. Consider for example (112|1)34.
2661  */
2662  for (int i = 0; i < NumBadChars; i++) {
2663  if (occ1.at(i) != NoOccurrence && occ1.at(i) >= minl)
2664  occ1[i] = minl;
2665  }
2666  eng->occ1 = occ1;
2667  } else {
2668  eng->occ1.fill(0, NumBadChars);
2669  }
2670 
2672 }
2673 #endif
2674 
2675 #if defined(QT_DEBUG)
2677 {
2678  int i;
2679  qDebug("Box of at least %d character%s", minl, minl == 1 ? "" : "s");
2680  qDebug(" Left states:");
2681  for (i = 0; i < ls.size(); i++) {
2682  if (lanchors.value(ls[i], 0) == 0)
2683  qDebug(" %d", ls[i]);
2684  else
2685  qDebug(" %d [anchors 0x%.8x]", ls[i], lanchors[ls[i]]);
2686  }
2687  qDebug(" Right states:");
2688  for (i = 0; i < rs.size(); i++) {
2689  if (ranchors.value(rs[i], 0) == 0)
2690  qDebug(" %d", rs[i]);
2691  else
2692  qDebug(" %d [anchors 0x%.8x]", rs[i], ranchors[rs[i]]);
2693  }
2694  qDebug(" Skip anchors: 0x%.8x", skipanchors);
2695 }
2696 #endif
2697 
2699 {
2700  for (int i = 0; i < to.ls.size(); i++) {
2701  for (int j = 0; j < rs.size(); j++) {
2702  int a = eng->anchorConcatenation(ranchors.value(rs.at(j), 0),
2703  to.lanchors.value(to.ls.at(i), 0));
2704  eng->addAnchors(rs[j], to.ls[i], a);
2705  }
2706  }
2707 }
2708 
2710 {
2711  categoriesRangeMap.insert("IsBasicLatin", qMakePair(0x0000, 0x007F));
2712  categoriesRangeMap.insert("IsLatin-1Supplement", qMakePair(0x0080, 0x00FF));
2713  categoriesRangeMap.insert("IsLatinExtended-A", qMakePair(0x0100, 0x017F));
2714  categoriesRangeMap.insert("IsLatinExtended-B", qMakePair(0x0180, 0x024F));
2715  categoriesRangeMap.insert("IsIPAExtensions", qMakePair(0x0250, 0x02AF));
2716  categoriesRangeMap.insert("IsSpacingModifierLetters", qMakePair(0x02B0, 0x02FF));
2717  categoriesRangeMap.insert("IsCombiningDiacriticalMarks", qMakePair(0x0300, 0x036F));
2718  categoriesRangeMap.insert("IsGreek", qMakePair(0x0370, 0x03FF));
2719  categoriesRangeMap.insert("IsCyrillic", qMakePair(0x0400, 0x04FF));
2720  categoriesRangeMap.insert("IsCyrillicSupplement", qMakePair(0x0500, 0x052F));
2721  categoriesRangeMap.insert("IsArmenian", qMakePair(0x0530, 0x058F));
2722  categoriesRangeMap.insert("IsHebrew", qMakePair(0x0590, 0x05FF));
2723  categoriesRangeMap.insert("IsArabic", qMakePair(0x0600, 0x06FF));
2724  categoriesRangeMap.insert("IsSyriac", qMakePair(0x0700, 0x074F));
2725  categoriesRangeMap.insert("IsArabicSupplement", qMakePair(0x0750, 0x077F));
2726  categoriesRangeMap.insert("IsThaana", qMakePair(0x0780, 0x07BF));
2727  categoriesRangeMap.insert("IsDevanagari", qMakePair(0x0900, 0x097F));
2728  categoriesRangeMap.insert("IsBengali", qMakePair(0x0980, 0x09FF));
2729  categoriesRangeMap.insert("IsGurmukhi", qMakePair(0x0A00, 0x0A7F));
2730  categoriesRangeMap.insert("IsGujarati", qMakePair(0x0A80, 0x0AFF));
2731  categoriesRangeMap.insert("IsOriya", qMakePair(0x0B00, 0x0B7F));
2732  categoriesRangeMap.insert("IsTamil", qMakePair(0x0B80, 0x0BFF));
2733  categoriesRangeMap.insert("IsTelugu", qMakePair(0x0C00, 0x0C7F));
2734  categoriesRangeMap.insert("IsKannada", qMakePair(0x0C80, 0x0CFF));
2735  categoriesRangeMap.insert("IsMalayalam", qMakePair(0x0D00, 0x0D7F));
2736  categoriesRangeMap.insert("IsSinhala", qMakePair(0x0D80, 0x0DFF));
2737  categoriesRangeMap.insert("IsThai", qMakePair(0x0E00, 0x0E7F));
2738  categoriesRangeMap.insert("IsLao", qMakePair(0x0E80, 0x0EFF));
2739  categoriesRangeMap.insert("IsTibetan", qMakePair(0x0F00, 0x0FFF));
2740  categoriesRangeMap.insert("IsMyanmar", qMakePair(0x1000, 0x109F));
2741  categoriesRangeMap.insert("IsGeorgian", qMakePair(0x10A0, 0x10FF));
2742  categoriesRangeMap.insert("IsHangulJamo", qMakePair(0x1100, 0x11FF));
2743  categoriesRangeMap.insert("IsEthiopic", qMakePair(0x1200, 0x137F));
2744  categoriesRangeMap.insert("IsEthiopicSupplement", qMakePair(0x1380, 0x139F));
2745  categoriesRangeMap.insert("IsCherokee", qMakePair(0x13A0, 0x13FF));
2746  categoriesRangeMap.insert("IsUnifiedCanadianAboriginalSyllabics", qMakePair(0x1400, 0x167F));
2747  categoriesRangeMap.insert("IsOgham", qMakePair(0x1680, 0x169F));
2748  categoriesRangeMap.insert("IsRunic", qMakePair(0x16A0, 0x16FF));
2749  categoriesRangeMap.insert("IsTagalog", qMakePair(0x1700, 0x171F));
2750  categoriesRangeMap.insert("IsHanunoo", qMakePair(0x1720, 0x173F));
2751  categoriesRangeMap.insert("IsBuhid", qMakePair(0x1740, 0x175F));
2752  categoriesRangeMap.insert("IsTagbanwa", qMakePair(0x1760, 0x177F));
2753  categoriesRangeMap.insert("IsKhmer", qMakePair(0x1780, 0x17FF));
2754  categoriesRangeMap.insert("IsMongolian", qMakePair(0x1800, 0x18AF));
2755  categoriesRangeMap.insert("IsLimbu", qMakePair(0x1900, 0x194F));
2756  categoriesRangeMap.insert("IsTaiLe", qMakePair(0x1950, 0x197F));
2757  categoriesRangeMap.insert("IsNewTaiLue", qMakePair(0x1980, 0x19DF));
2758  categoriesRangeMap.insert("IsKhmerSymbols", qMakePair(0x19E0, 0x19FF));
2759  categoriesRangeMap.insert("IsBuginese", qMakePair(0x1A00, 0x1A1F));
2760  categoriesRangeMap.insert("IsPhoneticExtensions", qMakePair(0x1D00, 0x1D7F));
2761  categoriesRangeMap.insert("IsPhoneticExtensionsSupplement", qMakePair(0x1D80, 0x1DBF));
2762  categoriesRangeMap.insert("IsCombiningDiacriticalMarksSupplement", qMakePair(0x1DC0, 0x1DFF));
2763  categoriesRangeMap.insert("IsLatinExtendedAdditional", qMakePair(0x1E00, 0x1EFF));
2764  categoriesRangeMap.insert("IsGreekExtended", qMakePair(0x1F00, 0x1FFF));
2765  categoriesRangeMap.insert("IsGeneralPunctuation", qMakePair(0x2000, 0x206F));
2766  categoriesRangeMap.insert("IsSuperscriptsandSubscripts", qMakePair(0x2070, 0x209F));
2767  categoriesRangeMap.insert("IsCurrencySymbols", qMakePair(0x20A0, 0x20CF));
2768  categoriesRangeMap.insert("IsCombiningMarksforSymbols", qMakePair(0x20D0, 0x20FF));
2769  categoriesRangeMap.insert("IsLetterlikeSymbols", qMakePair(0x2100, 0x214F));
2770  categoriesRangeMap.insert("IsNumberForms", qMakePair(0x2150, 0x218F));
2771  categoriesRangeMap.insert("IsArrows", qMakePair(0x2190, 0x21FF));
2772  categoriesRangeMap.insert("IsMathematicalOperators", qMakePair(0x2200, 0x22FF));
2773  categoriesRangeMap.insert("IsMiscellaneousTechnical", qMakePair(0x2300, 0x23FF));
2774  categoriesRangeMap.insert("IsControlPictures", qMakePair(0x2400, 0x243F));
2775  categoriesRangeMap.insert("IsOpticalCharacterRecognition", qMakePair(0x2440, 0x245F));
2776  categoriesRangeMap.insert("IsEnclosedAlphanumerics", qMakePair(0x2460, 0x24FF));
2777  categoriesRangeMap.insert("IsBoxDrawing", qMakePair(0x2500, 0x257F));
2778  categoriesRangeMap.insert("IsBlockElements", qMakePair(0x2580, 0x259F));
2779  categoriesRangeMap.insert("IsGeometricShapes", qMakePair(0x25A0, 0x25FF));
2780  categoriesRangeMap.insert("IsMiscellaneousSymbols", qMakePair(0x2600, 0x26FF));
2781  categoriesRangeMap.insert("IsDingbats", qMakePair(0x2700, 0x27BF));
2782  categoriesRangeMap.insert("IsMiscellaneousMathematicalSymbols-A", qMakePair(0x27C0, 0x27EF));
2783  categoriesRangeMap.insert("IsSupplementalArrows-A", qMakePair(0x27F0, 0x27FF));
2784  categoriesRangeMap.insert("IsBraillePatterns", qMakePair(0x2800, 0x28FF));
2785  categoriesRangeMap.insert("IsSupplementalArrows-B", qMakePair(0x2900, 0x297F));
2786  categoriesRangeMap.insert("IsMiscellaneousMathematicalSymbols-B", qMakePair(0x2980, 0x29FF));
2787  categoriesRangeMap.insert("IsSupplementalMathematicalOperators", qMakePair(0x2A00, 0x2AFF));
2788  categoriesRangeMap.insert("IsMiscellaneousSymbolsandArrows", qMakePair(0x2B00, 0x2BFF));
2789  categoriesRangeMap.insert("IsGlagolitic", qMakePair(0x2C00, 0x2C5F));
2790  categoriesRangeMap.insert("IsCoptic", qMakePair(0x2C80, 0x2CFF));
2791  categoriesRangeMap.insert("IsGeorgianSupplement", qMakePair(0x2D00, 0x2D2F));
2792  categoriesRangeMap.insert("IsTifinagh", qMakePair(0x2D30, 0x2D7F));
2793  categoriesRangeMap.insert("IsEthiopicExtended", qMakePair(0x2D80, 0x2DDF));
2794  categoriesRangeMap.insert("IsSupplementalPunctuation", qMakePair(0x2E00, 0x2E7F));
2795  categoriesRangeMap.insert("IsCJKRadicalsSupplement", qMakePair(0x2E80, 0x2EFF));
2796  categoriesRangeMap.insert("IsKangxiRadicals", qMakePair(0x2F00, 0x2FDF));
2797  categoriesRangeMap.insert("IsIdeographicDescriptionCharacters", qMakePair(0x2FF0, 0x2FFF));
2798  categoriesRangeMap.insert("IsCJKSymbolsandPunctuation", qMakePair(0x3000, 0x303F));
2799  categoriesRangeMap.insert("IsHiragana", qMakePair(0x3040, 0x309F));
2800  categoriesRangeMap.insert("IsKatakana", qMakePair(0x30A0, 0x30FF));
2801  categoriesRangeMap.insert("IsBopomofo", qMakePair(0x3100, 0x312F));
2802  categoriesRangeMap.insert("IsHangulCompatibilityJamo", qMakePair(0x3130, 0x318F));
2803  categoriesRangeMap.insert("IsKanbun", qMakePair(0x3190, 0x319F));
2804  categoriesRangeMap.insert("IsBopomofoExtended", qMakePair(0x31A0, 0x31BF));
2805  categoriesRangeMap.insert("IsCJKStrokes", qMakePair(0x31C0, 0x31EF));
2806  categoriesRangeMap.insert("IsKatakanaPhoneticExtensions", qMakePair(0x31F0, 0x31FF));
2807  categoriesRangeMap.insert("IsEnclosedCJKLettersandMonths", qMakePair(0x3200, 0x32FF));
2808  categoriesRangeMap.insert("IsCJKCompatibility", qMakePair(0x3300, 0x33FF));
2809  categoriesRangeMap.insert("IsCJKUnifiedIdeographsExtensionA", qMakePair(0x3400, 0x4DB5));
2810  categoriesRangeMap.insert("IsYijingHexagramSymbols", qMakePair(0x4DC0, 0x4DFF));
2811  categoriesRangeMap.insert("IsCJKUnifiedIdeographs", qMakePair(0x4E00, 0x9FFF));
2812  categoriesRangeMap.insert("IsYiSyllables", qMakePair(0xA000, 0xA48F));
2813  categoriesRangeMap.insert("IsYiRadicals", qMakePair(0xA490, 0xA4CF));
2814  categoriesRangeMap.insert("IsModifierToneLetters", qMakePair(0xA700, 0xA71F));
2815  categoriesRangeMap.insert("IsSylotiNagri", qMakePair(0xA800, 0xA82F));
2816  categoriesRangeMap.insert("IsHangulSyllables", qMakePair(0xAC00, 0xD7A3));
2817  categoriesRangeMap.insert("IsPrivateUse", qMakePair(0xE000, 0xF8FF));
2818  categoriesRangeMap.insert("IsCJKCompatibilityIdeographs", qMakePair(0xF900, 0xFAFF));
2819  categoriesRangeMap.insert("IsAlphabeticPresentationForms", qMakePair(0xFB00, 0xFB4F));
2820  categoriesRangeMap.insert("IsArabicPresentationForms-A", qMakePair(0xFB50, 0xFDFF));
2821  categoriesRangeMap.insert("IsVariationSelectors", qMakePair(0xFE00, 0xFE0F));
2822  categoriesRangeMap.insert("IsVerticalForms", qMakePair(0xFE10, 0xFE1F));
2823  categoriesRangeMap.insert("IsCombiningHalfMarks", qMakePair(0xFE20, 0xFE2F));
2824  categoriesRangeMap.insert("IsCJKCompatibilityForms", qMakePair(0xFE30, 0xFE4F));
2825  categoriesRangeMap.insert("IsSmallFormVariants", qMakePair(0xFE50, 0xFE6F));
2826  categoriesRangeMap.insert("IsArabicPresentationForms-B", qMakePair(0xFE70, 0xFEFF));
2827  categoriesRangeMap.insert("IsHalfwidthandFullwidthForms", qMakePair(0xFF00, 0xFFEF));
2828  categoriesRangeMap.insert("IsSpecials", qMakePair(0xFFF0, 0xFFFF));
2829  categoriesRangeMap.insert("IsLinearBSyllabary", qMakePair(0x10000, 0x1007F));
2830  categoriesRangeMap.insert("IsLinearBIdeograms", qMakePair(0x10080, 0x100FF));
2831  categoriesRangeMap.insert("IsAegeanNumbers", qMakePair(0x10100, 0x1013F));
2832  categoriesRangeMap.insert("IsAncientGreekNumbers", qMakePair(0x10140, 0x1018F));
2833  categoriesRangeMap.insert("IsOldItalic", qMakePair(0x10300, 0x1032F));
2834  categoriesRangeMap.insert("IsGothic", qMakePair(0x10330, 0x1034F));
2835  categoriesRangeMap.insert("IsUgaritic", qMakePair(0x10380, 0x1039F));
2836  categoriesRangeMap.insert("IsOldPersian", qMakePair(0x103A0, 0x103DF));
2837  categoriesRangeMap.insert("IsDeseret", qMakePair(0x10400, 0x1044F));
2838  categoriesRangeMap.insert("IsShavian", qMakePair(0x10450, 0x1047F));
2839  categoriesRangeMap.insert("IsOsmanya", qMakePair(0x10480, 0x104AF));
2840  categoriesRangeMap.insert("IsCypriotSyllabary", qMakePair(0x10800, 0x1083F));
2841  categoriesRangeMap.insert("IsKharoshthi", qMakePair(0x10A00, 0x10A5F));
2842  categoriesRangeMap.insert("IsByzantineMusicalSymbols", qMakePair(0x1D000, 0x1D0FF));
2843  categoriesRangeMap.insert("IsMusicalSymbols", qMakePair(0x1D100, 0x1D1FF));
2844  categoriesRangeMap.insert("IsAncientGreekMusicalNotation", qMakePair(0x1D200, 0x1D24F));
2845  categoriesRangeMap.insert("IsTaiXuanJingSymbols", qMakePair(0x1D300, 0x1D35F));
2846  categoriesRangeMap.insert("IsMathematicalAlphanumericSymbols", qMakePair(0x1D400, 0x1D7FF));
2847  categoriesRangeMap.insert("IsCJKUnifiedIdeographsExtensionB", qMakePair(0x20000, 0x2A6DF));
2848  categoriesRangeMap.insert("IsCJKCompatibilityIdeographsSupplement", qMakePair(0x2F800, 0x2FA1F));
2849  categoriesRangeMap.insert("IsTags", qMakePair(0xE0000, 0xE007F));
2850  categoriesRangeMap.insert("IsVariationSelectorsSupplement", qMakePair(0xE0100, 0xE01EF));
2851  categoriesRangeMap.insert("IsSupplementaryPrivateUseArea-A", qMakePair(0xF0000, 0xFFFFF));
2852  categoriesRangeMap.insert("IsSupplementaryPrivateUseArea-B", qMakePair(0x100000, 0x10FFFF));
2853 }
2854 
2856 {
2857  return (yyPos == yyLen) ? EOS : yyIn[yyPos++].unicode();
2858 }
2859 
2861 {
2862 #ifndef QT_NO_REGEXP_ESCAPE
2863  const char tab[] = "afnrtv"; // no b, as \b means word boundary
2864  const char backTab[] = "\a\f\n\r\t\v";
2865  ushort low;
2866  int i;
2867 #endif
2868  ushort val;
2869  int prevCh = yyCh;
2870 
2871  if (prevCh == EOS) {
2872  error(RXERR_END);
2873  return Tok_Char | '\\';
2874  }
2875  yyCh = getChar();
2876 #ifndef QT_NO_REGEXP_ESCAPE
2877  if ((prevCh & ~0xff) == 0) {
2878  const char *p = strchr(tab, prevCh);
2879  if (p != 0)
2880  return Tok_Char | backTab[p - tab];
2881  }
2882 #endif
2883 
2884  switch (prevCh) {
2885 #ifndef QT_NO_REGEXP_ESCAPE
2886  case '0':
2887  val = 0;
2888  for (i = 0; i < 3; i++) {
2889  if (yyCh >= '0' && yyCh <= '7')
2890  val = (val << 3) | (yyCh - '0');
2891  else
2892  break;
2893  yyCh = getChar();
2894  }
2895  if ((val & ~0377) != 0)
2896  error(RXERR_OCTAL);
2897  return Tok_Char | val;
2898 #endif
2899 #ifndef QT_NO_REGEXP_ESCAPE
2900  case 'B':
2901  return Tok_NonWord;
2902 #endif
2903 #ifndef QT_NO_REGEXP_CCLASS
2904  case 'D':
2905  // see QChar::isDigit()
2906  yyCharClass->addCategories(0x7fffffef);
2907  return Tok_CharClass;
2908  case 'S':
2909  // see QChar::isSpace()
2910  yyCharClass->addCategories(0x7ffff87f);
2911  yyCharClass->addRange(0x0000, 0x0008);
2912  yyCharClass->addRange(0x000e, 0x001f);
2913  yyCharClass->addRange(0x007f, 0x009f);
2914  return Tok_CharClass;
2915  case 'W':
2916  // see QChar::isLetterOrNumber() and QChar::isMark()
2917  yyCharClass->addCategories(0x7fe07f81);
2918  yyCharClass->addRange(0x203f, 0x2040);
2919  yyCharClass->addSingleton(0x2040);
2920  yyCharClass->addSingleton(0x2054);
2921  yyCharClass->addSingleton(0x30fb);
2922  yyCharClass->addRange(0xfe33, 0xfe34);
2923  yyCharClass->addRange(0xfe4d, 0xfe4f);
2924  yyCharClass->addSingleton(0xff3f);
2925  yyCharClass->addSingleton(0xff65);
2926  return Tok_CharClass;
2927 #endif
2928 #ifndef QT_NO_REGEXP_ESCAPE
2929  case 'b':
2930  return Tok_Word;
2931 #endif
2932 #ifndef QT_NO_REGEXP_CCLASS
2933  case 'd':
2934  // see QChar::isDigit()
2935  yyCharClass->addCategories(0x00000010);
2936  return Tok_CharClass;
2937  case 's':
2938  // see QChar::isSpace()
2939  yyCharClass->addCategories(0x00000380);
2940  yyCharClass->addRange(0x0009, 0x000d);
2941  return Tok_CharClass;
2942  case 'w':
2943  // see QChar::isLetterOrNumber() and QChar::isMark()
2944  yyCharClass->addCategories(0x000f807e);
2945  yyCharClass->addSingleton(0x005f); // '_'
2946  return Tok_CharClass;
2947  case 'I':
2948  if (xmlSchemaExtensions) {
2950  // fall through
2951  }
2952  case 'i':
2953  if (xmlSchemaExtensions) {
2954  yyCharClass->addCategories(0x000f807e);
2955  yyCharClass->addSingleton(0x003a); // ':'
2956  yyCharClass->addSingleton(0x005f); // '_'
2957  yyCharClass->addRange(0x0041, 0x005a); // [A-Z]
2958  yyCharClass->addRange(0x0061, 0x007a); // [a-z]
2959  yyCharClass->addRange(0xc0, 0xd6);
2960  yyCharClass->addRange(0xd8, 0xf6);
2961  yyCharClass->addRange(0xf8, 0x2ff);
2962  yyCharClass->addRange(0x370, 0x37d);
2963  yyCharClass->addRange(0x37f, 0x1fff);
2964  yyCharClass->addRange(0x200c, 0x200d);
2965  yyCharClass->addRange(0x2070, 0x218f);
2966  yyCharClass->addRange(0x2c00, 0x2fef);
2967  yyCharClass->addRange(0x3001, 0xd7ff);
2968  yyCharClass->addRange(0xf900, 0xfdcf);
2969  yyCharClass->addRange(0xfdf0, 0xfffd);
2970  yyCharClass->addRange((ushort)0x10000, (ushort)0xeffff);
2971  }
2972  return Tok_CharClass;
2973  case 'C':
2974  if (xmlSchemaExtensions) {
2976  // fall through
2977  }
2978  case 'c':
2979  if (xmlSchemaExtensions) {
2980  yyCharClass->addCategories(0x000f807e);
2981  yyCharClass->addSingleton(0x002d); // '-'
2982  yyCharClass->addSingleton(0x002e); // '.'
2983  yyCharClass->addSingleton(0x003a); // ':'
2984  yyCharClass->addSingleton(0x005f); // '_'
2985  yyCharClass->addSingleton(0xb7);
2986  yyCharClass->addRange(0x0030, 0x0039); // [0-9]
2987  yyCharClass->addRange(0x0041, 0x005a); // [A-Z]
2988  yyCharClass->addRange(0x0061, 0x007a); // [a-z]
2989  yyCharClass->addRange(0xc0, 0xd6);
2990  yyCharClass->addRange(0xd8, 0xf6);
2991  yyCharClass->addRange(0xf8, 0x2ff);
2992  yyCharClass->addRange(0x370, 0x37d);
2993  yyCharClass->addRange(0x37f, 0x1fff);
2994  yyCharClass->addRange(0x200c, 0x200d);
2995  yyCharClass->addRange(0x2070, 0x218f);
2996  yyCharClass->addRange(0x2c00, 0x2fef);
2997  yyCharClass->addRange(0x3001, 0xd7ff);
2998  yyCharClass->addRange(0xf900, 0xfdcf);
2999  yyCharClass->addRange(0xfdf0, 0xfffd);
3000  yyCharClass->addRange((ushort)0x10000, (ushort)0xeffff);
3001  yyCharClass->addRange(0x0300, 0x036f);
3002  yyCharClass->addRange(0x203f, 0x2040);
3003  }
3004  return Tok_CharClass;
3005  case 'P':
3006  if (xmlSchemaExtensions) {
3008  // fall through
3009  }
3010  case 'p':
3011  if (xmlSchemaExtensions) {
3012  if (yyCh != '{') {
3014  return Tok_CharClass;
3015  }
3016 
3017  QByteArray category;
3018  yyCh = getChar();
3019  while (yyCh != '}') {
3020  if (yyCh == EOS) {
3021  error(RXERR_END);
3022  return Tok_CharClass;
3023  }
3024  category.append(yyCh);
3025  yyCh = getChar();
3026  }
3027  yyCh = getChar(); // skip closing '}'
3028 
3029  if (category == "M") {
3030  yyCharClass->addCategories(0x0000000e);
3031  } else if (category == "Mn") {
3032  yyCharClass->addCategories(0x00000002);
3033  } else if (category == "Mc") {
3034  yyCharClass->addCategories(0x00000004);
3035  } else if (category == "Me") {
3036  yyCharClass->addCategories(0x00000008);
3037  } else if (category == "N") {
3038  yyCharClass->addCategories(0x00000070);
3039  } else if (category == "Nd") {
3040  yyCharClass->addCategories(0x00000010);
3041  } else if (category == "Nl") {
3042  yyCharClass->addCategories(0x00000020);
3043  } else if (category == "No") {
3044  yyCharClass->addCategories(0x00000040);
3045  } else if (category == "Z") {
3046  yyCharClass->addCategories(0x00000380);
3047  } else if (category == "Zs") {
3048  yyCharClass->addCategories(0x00000080);
3049  } else if (category == "Zl") {
3050  yyCharClass->addCategories(0x00000100);
3051  } else if (category == "Zp") {
3052  yyCharClass->addCategories(0x00000200);
3053  } else if (category == "C") {
3054  yyCharClass->addCategories(0x00006c00);
3055  } else if (category == "Cc") {
3056  yyCharClass->addCategories(0x00000400);
3057  } else if (category == "Cf") {
3058  yyCharClass->addCategories(0x00000800);
3059  } else if (category == "Cs") {
3060  yyCharClass->addCategories(0x00001000);
3061  } else if (category == "Co") {
3062  yyCharClass->addCategories(0x00002000);
3063  } else if (category == "Cn") {
3064  yyCharClass->addCategories(0x00004000);
3065  } else if (category == "L") {
3066  yyCharClass->addCategories(0x000f8000);
3067  } else if (category == "Lu") {
3068  yyCharClass->addCategories(0x00008000);
3069  } else if (category == "Ll") {
3070  yyCharClass->addCategories(0x00010000);
3071  } else if (category == "Lt") {
3072  yyCharClass->addCategories(0x00020000);
3073  } else if (category == "Lm") {
3074  yyCharClass->addCategories(0x00040000);
3075  } else if (category == "Lo") {
3076  yyCharClass->addCategories(0x00080000);
3077  } else if (category == "P") {
3078  yyCharClass->addCategories(0x4f580780);
3079  } else if (category == "Pc") {
3080  yyCharClass->addCategories(0x00100000);
3081  } else if (category == "Pd") {
3082  yyCharClass->addCategories(0x00200000);
3083  } else if (category == "Ps") {
3084  yyCharClass->addCategories(0x00400000);
3085  } else if (category == "Pe") {
3086  yyCharClass->addCategories(0x00800000);
3087  } else if (category == "Pi") {
3088  yyCharClass->addCategories(0x01000000);
3089  } else if (category == "Pf") {
3090  yyCharClass->addCategories(0x02000000);
3091  } else if (category == "Po") {
3092  yyCharClass->addCategories(0x04000000);
3093  } else if (category == "S") {
3094  yyCharClass->addCategories(0x78000000);
3095  } else if (category == "Sm") {
3096  yyCharClass->addCategories(0x08000000);
3097  } else if (category == "Sc") {
3098  yyCharClass->addCategories(0x10000000);
3099  } else if (category == "Sk") {
3100  yyCharClass->addCategories(0x20000000);
3101  } else if (category == "So") {
3102  yyCharClass->addCategories(0x40000000);
3103  } else if (category.startsWith("Is")) {
3106 
3107  if (categoriesRangeMap.contains(category)) {
3108  const QPair<int, int> range = categoriesRangeMap.value(category);
3109  yyCharClass->addRange(range.first, range.second);
3110  } else {
3112  }
3113  } else {
3115  }
3116  }
3117  return Tok_CharClass;
3118 #endif
3119 #ifndef QT_NO_REGEXP_ESCAPE
3120  case 'x':
3121  val = 0;
3122  for (i = 0; i < 4; i++) {
3123  low = QChar(yyCh).toLower().unicode();
3124  if (low >= '0' && low <= '9')
3125  val = (val << 4) | (low - '0');
3126  else if (low >= 'a' && low <= 'f')
3127  val = (val << 4) | (low - 'a' + 10);
3128  else
3129  break;
3130  yyCh = getChar();
3131  }
3132  return Tok_Char | val;
3133 #endif
3134  default:
3135  if (prevCh >= '1' && prevCh <= '9') {
3136 #ifndef QT_NO_REGEXP_BACKREF
3137  val = prevCh - '0';
3138  while (yyCh >= '0' && yyCh <= '9') {
3139  val = (val * 10) + (yyCh - '0');
3140  yyCh = getChar();
3141  }
3142  return Tok_BackRef | val;
3143 #else
3145 #endif
3146  }
3147  return Tok_Char | prevCh;
3148  }
3149 }
3150 
3151 #ifndef QT_NO_REGEXP_INTERVAL
3153 {
3154  if (yyCh >= '0' && yyCh <= '9') {
3155  int rep = 0;
3156  do {
3157  rep = 10 * rep + yyCh - '0';
3158  if (rep >= InftyRep) {
3160  rep = def;
3161  }
3162  yyCh = getChar();
3163  } while (yyCh >= '0' && yyCh <= '9');
3164  return rep;
3165  } else {
3166  return def;
3167  }
3168 }
3169 #endif
3170 
3171 #ifndef QT_NO_REGEXP_LOOKAHEAD
3173 {
3174  if (n > 0) {
3175  yyPos += n - 1;
3176  yyCh = getChar();
3177  }
3178 }
3179 #endif
3180 
3181 void QRegExpEngine::error(const char *msg)
3182 {
3183  if (yyError.isEmpty())
3184  yyError = QLatin1String(msg);
3185 }
3186 
3187 void QRegExpEngine::startTokenizer(const QChar *rx, int len)
3188 {
3189  yyIn = rx;
3190  yyPos0 = 0;
3191  yyPos = 0;
3192  yyLen = len;
3193  yyCh = getChar();
3195  yyMinRep = 0;
3196  yyMaxRep = 0;
3197  yyError = QString();
3198 }
3199 
3201 {
3202 #ifndef QT_NO_REGEXP_CCLASS
3203  ushort pendingCh = 0;
3204  bool charPending;
3205  bool rangePending;
3206  int tok;
3207 #endif
3208  int prevCh = yyCh;
3209 
3210  yyPos0 = yyPos - 1;
3211 #ifndef QT_NO_REGEXP_CCLASS
3212  yyCharClass->clear();
3213 #endif
3214  yyMinRep = 0;
3215  yyMaxRep = 0;
3216  yyCh = getChar();
3217 
3218  switch (prevCh) {
3219  case EOS:
3220  yyPos0 = yyPos;
3221  return Tok_Eos;
3222  case '$':
3223  return Tok_Dollar;
3224  case '(':
3225  if (yyCh == '?') {
3226  prevCh = getChar();
3227  yyCh = getChar();
3228  switch (prevCh) {
3229 #ifndef QT_NO_REGEXP_LOOKAHEAD
3230  case '!':
3231  return Tok_NegLookahead;
3232  case '=':
3233  return Tok_PosLookahead;
3234 #endif
3235  case ':':
3236  return Tok_MagicLeftParen;
3237  case '<':
3239  return Tok_MagicLeftParen;
3240  default:
3242  return Tok_MagicLeftParen;
3243  }
3244  } else {
3245  return Tok_LeftParen;
3246  }
3247  case ')':
3248  return Tok_RightParen;
3249  case '*':
3250  yyMinRep = 0;
3251  yyMaxRep = InftyRep;
3252  return Tok_Quantifier;
3253  case '+':
3254  yyMinRep = 1;
3255  yyMaxRep = InftyRep;
3256  return Tok_Quantifier;
3257  case '.':
3258 #ifndef QT_NO_REGEXP_CCLASS
3259  yyCharClass->setNegative(true);
3260 #endif
3261  return Tok_CharClass;
3262  case '?':
3263  yyMinRep = 0;
3264  yyMaxRep = 1;
3265  return Tok_Quantifier;
3266  case '[':
3267 #ifndef QT_NO_REGEXP_CCLASS
3268  if (yyCh == '^') {
3269  yyCharClass->setNegative(true);
3270  yyCh = getChar();
3271  }
3272  charPending = false;
3273  rangePending = false;
3274  do {
3275  if (yyCh == '-' && charPending && !rangePending) {
3276  rangePending = true;
3277  yyCh = getChar();
3278  } else {
3279  if (charPending && !rangePending) {
3280  yyCharClass->addSingleton(pendingCh);
3281  charPending = false;
3282  }
3283  if (yyCh == '\\') {
3284  yyCh = getChar();
3285  tok = getEscape();
3286  if (tok == Tok_Word)
3287  tok = '\b';
3288  } else {
3289  tok = Tok_Char | yyCh;
3290  yyCh = getChar();
3291  }
3292  if (tok == Tok_CharClass) {
3293  if (rangePending) {
3294  yyCharClass->addSingleton('-');
3295  yyCharClass->addSingleton(pendingCh);
3296  charPending = false;
3297  rangePending = false;
3298  }
3299  } else if ((tok & Tok_Char) != 0) {
3300  if (rangePending) {
3301  yyCharClass->addRange(pendingCh, tok ^ Tok_Char);
3302  charPending = false;
3303  rangePending = false;
3304  } else {
3305  pendingCh = tok ^ Tok_Char;
3306  charPending = true;
3307  }
3308  } else {
3310  }
3311  }
3312  } while (yyCh != ']' && yyCh != EOS);
3313  if (rangePending)
3314  yyCharClass->addSingleton('-');
3315  if (charPending)
3316  yyCharClass->addSingleton(pendingCh);
3317  if (yyCh == EOS)
3318  error(RXERR_END);
3319  else
3320  yyCh = getChar();
3321  return Tok_CharClass;
3322 #else
3323  error(RXERR_END);
3324  return Tok_Char | '[';
3325 #endif
3326  case '\\':
3327  return getEscape();
3328  case ']':
3330  return Tok_Char | ']';
3331  case '^':
3332  return Tok_Caret;
3333  case '{':
3334 #ifndef QT_NO_REGEXP_INTERVAL
3335  yyMinRep = getRep(0);
3336  yyMaxRep = yyMinRep;
3337  if (yyCh == ',') {
3338  yyCh = getChar();
3340  }
3341  if (yyMaxRep < yyMinRep)
3343  if (yyCh != '}')
3345  yyCh = getChar();
3346  return Tok_Quantifier;
3347 #else
3349  return Tok_Char | '{';
3350 #endif
3351  case '|':
3352  return Tok_Bar;
3353  case '}':
3355  return Tok_Char | '}';
3356  default:
3357  return Tok_Char | prevCh;
3358  }
3359 }
3360 
3361 int QRegExpEngine::parse(const QChar *pattern, int len)
3362 {
3363  valid = true;
3364  startTokenizer(pattern, len);
3365  yyTok = getToken();
3366 #ifndef QT_NO_REGEXP_CAPTURE
3367  yyMayCapture = true;
3368 #else
3369  yyMayCapture = false;
3370 #endif
3371 
3372 #ifndef QT_NO_REGEXP_CAPTURE
3373  int atom = startAtom(false);
3374 #endif
3375  QRegExpCharClass anything;
3376  Box box(this); // create InitialState
3377  box.set(anything);
3378  Box rightBox(this); // create FinalState
3379  rightBox.set(anything);
3380 
3381  Box middleBox(this);
3382  parseExpression(&middleBox);
3383 #ifndef QT_NO_REGEXP_CAPTURE
3384  finishAtom(atom, false);
3385 #endif
3386 #ifndef QT_NO_REGEXP_OPTIM
3387  middleBox.setupHeuristics();
3388 #endif
3389  box.cat(middleBox);
3390  box.cat(rightBox);
3391  yyCharClass.reset(0);
3392 
3393 #ifndef QT_NO_REGEXP_CAPTURE
3394  for (int i = 0; i < nf; ++i) {
3395  switch (f[i].capture) {
3397  break;
3399  f[i].capture = ncap;
3401  ++ncap;
3402  ++officialncap;
3403  break;
3405  f[i].capture = greedyQuantifiers ? ncap++ : QRegExpAtom::NoCapture;
3406  }
3407  }
3408 
3409 #ifndef QT_NO_REGEXP_BACKREF
3410 #ifndef QT_NO_REGEXP_OPTIM
3411  if (officialncap == 0 && nbrefs == 0) {
3412  ncap = nf = 0;
3413  f.clear();
3414  }
3415 #endif
3416  // handle the case where there's a \5 with no corresponding capture
3417  // (captureForOfficialCapture.size() != officialncap)
3418  for (int i = 0; i < nbrefs - officialncap; ++i) {
3420  ++ncap;
3421  }
3422 #endif
3423 #endif
3424 
3425  if (!yyError.isEmpty())
3426  return -1;
3427 
3428 #ifndef QT_NO_REGEXP_OPTIM
3429  const QRegExpAutomatonState &sinit = s.at(InitialState);
3430  caretAnchored = !sinit.anchors.isEmpty();
3431  if (caretAnchored) {
3432  const QMap<int, int> &anchors = sinit.anchors;
3434  for (a = anchors.constBegin(); a != anchors.constEnd(); ++a) {
3435  if (
3436 #ifndef QT_NO_REGEXP_ANCHOR_ALT
3437  (*a & Anchor_Alternation) != 0 ||
3438 #endif
3439  (*a & Anchor_Caret) == 0)
3440  {
3441  caretAnchored = false;
3442  break;
3443  }
3444  }
3445  }
3446 #endif
3447 
3448  // cleanup anchors
3449  int numStates = s.count();
3450  for (int i = 0; i < numStates; ++i) {
3451  QRegExpAutomatonState &state = s[i];
3452  if (!state.anchors.isEmpty()) {
3454  while (a != state.anchors.end()) {
3455  if (a.value() == 0)
3456  a = state.anchors.erase(a);
3457  else
3458  ++a;
3459  }
3460  }
3461  }
3462 
3463  return yyPos0;
3464 }
3465 
3467 {
3468 #ifndef QT_NO_REGEXP_LOOKAHEAD
3469  QRegExpEngine *eng = 0;
3470  bool neg;
3471  int len;
3472 #endif
3473 
3474  if ((yyTok & Tok_Char) != 0) {
3475  box->set(QChar(yyTok ^ Tok_Char));
3476  } else {
3477 #ifndef QT_NO_REGEXP_OPTIM
3478  trivial = false;
3479 #endif
3480  switch (yyTok) {
3481  case Tok_Dollar:
3482  box->catAnchor(Anchor_Dollar);
3483  break;
3484  case Tok_Caret:
3485  box->catAnchor(Anchor_Caret);
3486  break;
3487 #ifndef QT_NO_REGEXP_LOOKAHEAD
3488  case Tok_PosLookahead:
3489  case Tok_NegLookahead:
3490  neg = (yyTok == Tok_NegLookahead);
3491  eng = new QRegExpEngine(cs, greedyQuantifiers);
3492  len = eng->parse(yyIn + yyPos - 1, yyLen - yyPos + 1);
3493  if (len >= 0)
3494  skipChars(len);
3495  else
3497  box->catAnchor(addLookahead(eng, neg));
3498  yyTok = getToken();
3499  if (yyTok != Tok_RightParen)
3501  break;
3502 #endif
3503 #ifndef QT_NO_REGEXP_ESCAPE
3504  case Tok_Word:
3505  box->catAnchor(Anchor_Word);
3506  break;
3507  case Tok_NonWord:
3508  box->catAnchor(Anchor_NonWord);
3509  break;
3510 #endif
3511  case Tok_LeftParen:
3512  case Tok_MagicLeftParen:
3513  yyTok = getToken();
3514  parseExpression(box);
3515  if (yyTok != Tok_RightParen)
3516  error(RXERR_END);
3517  break;
3518  case Tok_CharClass:
3519  box->set(*yyCharClass);
3520  break;
3521  case Tok_Quantifier:
3523  break;
3524  default:
3525 #ifndef QT_NO_REGEXP_BACKREF
3526  if ((yyTok & Tok_BackRef) != 0)
3527  box->set(yyTok ^ Tok_BackRef);
3528  else
3529 #endif
3531  }
3532  }
3533  yyTok = getToken();
3534 }
3535 
3537 {
3538 #ifndef QT_NO_REGEXP_CAPTURE
3539  int outerAtom = greedyQuantifiers ? startAtom(false) : -1;
3540  int innerAtom = startAtom(yyMayCapture && yyTok == Tok_LeftParen);
3541  bool magicLeftParen = (yyTok == Tok_MagicLeftParen);
3542 #else
3543  const int innerAtom = -1;
3544 #endif
3545 
3546 #ifndef QT_NO_REGEXP_INTERVAL
3547 #define YYREDO() \
3548  yyIn = in, yyPos0 = pos0, yyPos = pos, yyLen = len, yyCh = ch, \
3549  *yyCharClass = charClass, yyMinRep = 0, yyMaxRep = 0, yyTok = tok
3550 
3551  const QChar *in = yyIn;
3552  int pos0 = yyPos0;
3553  int pos = yyPos;
3554  int len = yyLen;
3555  int ch = yyCh;
3556  QRegExpCharClass charClass;
3557  if (yyTok == Tok_CharClass)
3558  charClass = *yyCharClass;
3559  int tok = yyTok;
3560  bool mayCapture = yyMayCapture;
3561 #endif
3562 
3563  parseAtom(box);
3564 #ifndef QT_NO_REGEXP_CAPTURE
3565  finishAtom(innerAtom, magicLeftParen);
3566 #endif
3567 
3568  bool hasQuantifier = (yyTok == Tok_Quantifier);
3569  if (hasQuantifier) {
3570 #ifndef QT_NO_REGEXP_OPTIM
3571  trivial = false;
3572 #endif
3573  if (yyMaxRep == InftyRep) {
3574  box->plus(innerAtom);
3575 #ifndef QT_NO_REGEXP_INTERVAL
3576  } else if (yyMaxRep == 0) {
3577  box->clear();
3578 #endif
3579  }
3580  if (yyMinRep == 0)
3581  box->opt();
3582 
3583 #ifndef QT_NO_REGEXP_INTERVAL
3584  yyMayCapture = false;
3585  int alpha = (yyMinRep == 0) ? 0 : yyMinRep - 1;
3586  int beta = (yyMaxRep == InftyRep) ? 0 : yyMaxRep - (alpha + 1);
3587 
3588  Box rightBox(this);
3589  int i;
3590 
3591  for (i = 0; i < beta; i++) {
3592  YYREDO();
3593  Box leftBox(this);
3594  parseAtom(&leftBox);
3595  leftBox.cat(rightBox);
3596  leftBox.opt();
3597  rightBox = leftBox;
3598  }
3599  for (i = 0; i < alpha; i++) {
3600  YYREDO();
3601  Box leftBox(this);
3602  parseAtom(&leftBox);
3603  leftBox.cat(rightBox);
3604  rightBox = leftBox;
3605  }
3606  rightBox.cat(*box);
3607  *box = rightBox;
3608 #endif
3609  yyTok = getToken();
3610 #ifndef QT_NO_REGEXP_INTERVAL
3611  yyMayCapture = mayCapture;
3612 #endif
3613  }
3614 #undef YYREDO
3615 #ifndef QT_NO_REGEXP_CAPTURE
3616  if (greedyQuantifiers)
3617  finishAtom(outerAtom, hasQuantifier);
3618 #endif
3619 }
3620 
3622 {
3623 #ifndef QT_NO_REGEXP_OPTIM
3624  if (yyTok != Tok_Eos && yyTok != Tok_RightParen && yyTok != Tok_Bar)
3625  parseFactor(box);
3626 #endif
3627  while (yyTok != Tok_Eos && yyTok != Tok_RightParen && yyTok != Tok_Bar) {
3628  Box rightBox(this);
3629  parseFactor(&rightBox);
3630  box->cat(rightBox);
3631  }
3632 }
3633 
3635 {
3636  parseTerm(box);
3637  while (yyTok == Tok_Bar) {
3638 #ifndef QT_NO_REGEXP_OPTIM
3639  trivial = false;
3640 #endif
3641  Box rightBox(this);
3642  yyTok = getToken();
3643  parseTerm(&rightBox);
3644  box->orx(rightBox);
3645  }
3646 }
3647 
3648 /*
3649  The struct QRegExpPrivate contains the private data of a regular
3650  expression other than the automaton. It makes it possible for many
3651  QRegExp objects to use the same QRegExpEngine object with different
3652  QRegExpPrivate objects.
3653 */
3655 {
3658  bool minimal;
3659 #ifndef QT_NO_REGEXP_CAPTURE
3660  QString t; // last string passed to QRegExp::indexIn() or lastIndexIn()
3661  QStringList capturedCache; // what QRegExp::capturedTexts() returned last
3662 #endif
3664 
3666  : eng(0), engineKey(QString(), QRegExp::RegExp, Qt::CaseSensitive), minimal(false) { }
3668  : eng(0), engineKey(key), minimal(false) {}
3669 };
3670 
3671 #if !defined(QT_NO_REGEXP_OPTIM)
3673 {
3674  return qHash(key.pattern);
3675 }
3676 
3678 Q_GLOBAL_STATIC(EngineCache, globalEngineCache)
3679 Q_GLOBAL_STATIC(QMutex, mutex)
3680 #endif // QT_NO_REGEXP_OPTIM
3681 
3683 {
3684  if (!eng->ref.deref()) {
3685 #if !defined(QT_NO_REGEXP_OPTIM)
3686  if (globalEngineCache()) {
3687  QMutexLocker locker(mutex());
3688  QT_TRY {
3689  globalEngineCache()->insert(key, eng, 4 + key.pattern.length() / 4);
3690  } QT_CATCH(const std::bad_alloc &) {
3691  // in case of an exception (e.g. oom), just delete the engine
3692  delete eng;
3693  }
3694  } else {
3695  delete eng;
3696  }
3697 #else
3698  Q_UNUSED(key);
3699  delete eng;
3700 #endif
3701  }
3702 }
3703 
3705 {
3706  bool initMatchState = !priv->eng;
3707 #if !defined(QT_NO_REGEXP_OPTIM)
3708  if (!priv->eng && globalEngineCache()) {
3709  QMutexLocker locker(mutex());
3710  priv->eng = globalEngineCache()->take(priv->engineKey);
3711  if (priv->eng != 0)
3712  priv->eng->ref.ref();
3713  }
3714 #endif // QT_NO_REGEXP_OPTIM
3715 
3716  if (!priv->eng)
3717  priv->eng = new QRegExpEngine(priv->engineKey);
3718 
3719  if (initMatchState)
3720  priv->matchState.prepareForMatch(priv->eng);
3721 }
3722 
3723 inline static void prepareEngine(QRegExpPrivate *priv)
3724 {
3725  if (priv->eng)
3726  return;
3727  prepareEngine_helper(priv);
3728 }
3729 
3731 {
3732  prepareEngine(priv);
3733  priv->matchState.prepareForMatch(priv->eng);
3734 #ifndef QT_NO_REGEXP_CAPTURE
3735  priv->t = str;
3736  priv->capturedCache.clear();
3737 #else
3738  Q_UNUSED(str);
3739 #endif
3740 }
3741 
3743 {
3744  if (priv->eng != 0) {
3745  derefEngine(priv->eng, priv->engineKey);
3746  priv->eng = 0;
3747  priv->matchState.drain();
3748  }
3749 }
3750 
3808 {
3809  priv = new QRegExpPrivate;
3811 }
3812 
3824 {
3825  priv = new QRegExpPrivate(QRegExpEngineKey(pattern, syntax, cs));
3827 }
3828 
3835 {
3836  priv = new QRegExpPrivate;
3837  operator=(rx);
3838 }
3839 
3844 {
3846  delete priv;
3847 }
3848 
3855 {
3856  prepareEngine(rx.priv); // to allow sharing
3857  QRegExpEngine *otherEng = rx.priv->eng;
3858  if (otherEng)
3859  otherEng->ref.ref();
3861  priv->eng = otherEng;
3862  priv->engineKey = rx.priv->engineKey;
3863  priv->minimal = rx.priv->minimal;
3864 #ifndef QT_NO_REGEXP_CAPTURE
3865  priv->t = rx.priv->t;
3866  priv->capturedCache = rx.priv->capturedCache;
3867 #endif
3868  if (priv->eng)
3869  priv->matchState.prepareForMatch(priv->eng);
3870  priv->matchState.captured = rx.priv->matchState.captured;
3871  return *this;
3872 }
3873 
3893 bool QRegExp::operator==(const QRegExp &rx) const
3894 {
3895  return priv->engineKey == rx.priv->engineKey && priv->minimal == rx.priv->minimal;
3896 }
3897 
3925 bool QRegExp::isEmpty() const
3926 {
3927  return priv->engineKey.pattern.isEmpty();
3928 }
3929 
3943 bool QRegExp::isValid() const
3944 {
3945  if (priv->engineKey.pattern.isEmpty()) {
3946  return true;
3947  } else {
3949  return priv->eng->isValid();
3950  }
3951 }
3952 
3961 {
3962  return priv->engineKey.pattern;
3963 }
3964 
3971 void QRegExp::setPattern(const QString &pattern)
3972 {
3973  if (priv->engineKey.pattern != pattern) {
3975  priv->engineKey.pattern = pattern;
3976  }
3977 }
3978 
3986 {
3987  return priv->engineKey.cs;
3988 }
3989 
3999 {
4000  if ((bool)cs != (bool)priv->engineKey.cs) {
4002  priv->engineKey.cs = cs;
4003  }
4004 }
4005 
4013 {
4014  return priv->engineKey.patternSyntax;
4015 }
4016 
4033 {
4034  if (syntax != priv->engineKey.patternSyntax) {
4036  priv->engineKey.patternSyntax = syntax;
4037  }
4038 }
4039 
4047 {
4048  return priv->minimal;
4049 }
4050 
4068 void QRegExp::setMinimal(bool minimal)
4069 {
4070  priv->minimal = minimal;
4071 }
4072 
4073 // ### Qt 5: make non-const
4094 bool QRegExp::exactMatch(const QString &str) const
4095 {
4097  priv->matchState.match(str.unicode(), str.length(), 0, priv->minimal, true, 0);
4098  if (priv->matchState.captured[1] == str.length()) {
4099  return true;
4100  } else {
4101  priv->matchState.captured[0] = 0;
4102  priv->matchState.captured[1] = priv->matchState.oneTestMatchedLen;
4103  return false;
4104  }
4105 }
4106 
4107 // ### Qt 5: make non-const
4136 int QRegExp::indexIn(const QString &str, int offset, CaretMode caretMode) const
4137 {
4139  if (offset < 0)
4140  offset += str.length();
4141  priv->matchState.match(str.unicode(), str.length(), offset,
4142  priv->minimal, false, caretIndex(offset, caretMode));
4143  return priv->matchState.captured[0];
4144 }
4145 
4146 // ### Qt 5: make non-const
4167 int QRegExp::lastIndexIn(const QString &str, int offset, CaretMode caretMode) const
4168 {
4170  if (offset < 0)
4171  offset += str.length();
4172  if (offset < 0 || offset > str.length()) {
4173  memset(priv->matchState.captured, -1, priv->matchState.capturedSize*sizeof(int));
4174  return -1;
4175  }
4176 
4177  while (offset >= 0) {
4178  priv->matchState.match(str.unicode(), str.length(), offset,
4179  priv->minimal, true, caretIndex(offset, caretMode));
4180  if (priv->matchState.captured[0] == offset)
4181  return offset;
4182  --offset;
4183  }
4184  return -1;
4185 }
4186 
4194 {
4195  return priv->matchState.captured[1];
4196 }
4197 
4198 #ifndef QT_NO_REGEXP_CAPTURE
4199 
4200 #ifndef QT_NO_DEPRECATED
4201 
4211 {
4212  return captureCount();
4213 }
4214 #endif
4215 
4224 {
4226  return priv->eng->captureCount();
4227 }
4228 
4268 {
4269  if (priv->capturedCache.isEmpty()) {
4271  const int *captured = priv->matchState.captured;
4272  int n = priv->matchState.capturedSize;
4273 
4274  for (int i = 0; i < n; i += 2) {
4275  QString m;
4276  if (captured[i + 1] == 0)
4277  m = QLatin1String(""); // ### Qt 5: don't distinguish between null and empty
4278  else if (captured[i] >= 0)
4279  m = priv->t.mid(captured[i], captured[i + 1]);
4280  priv->capturedCache.append(m);
4281  }
4282  priv->t.clear();
4283  }
4284  return priv->capturedCache;
4285 }
4286 
4291 {
4292  return const_cast<const QRegExp *>(this)->capturedTexts();
4293 }
4294 
4310 QString QRegExp::cap(int nth) const
4311 {
4312  return capturedTexts().value(nth);
4313 }
4314 
4319 {
4320  return const_cast<const QRegExp *>(this)->cap(nth);
4321 }
4322 
4337 int QRegExp::pos(int nth) const
4338 {
4339  if (nth < 0 || nth >= priv->matchState.capturedSize / 2)
4340  return -1;
4341  else
4342  return priv->matchState.captured[2 * nth];
4343 }
4344 
4348 int QRegExp::pos(int nth)
4349 {
4350  return const_cast<const QRegExp *>(this)->pos(nth);
4351 }
4352 
4360 {
4361  if (isValid()) {
4362  return QString::fromLatin1(RXERR_OK);
4363  } else {
4364  return priv->eng->errorString();
4365  }
4366 }
4367 
4372 {
4373  return const_cast<const QRegExp *>(this)->errorString();
4374 }
4375 #endif
4376 
4393 {
4394  QString quoted;
4395  const int count = str.count();
4396  quoted.reserve(count * 2);
4397  const QLatin1Char backslash('\\');
4398  for (int i = 0; i < count; i++) {
4399  switch (str.at(i).toLatin1()) {
4400  case '$':
4401  case '(':
4402  case ')':
4403  case '*':
4404  case '+':
4405  case '.':
4406  case '?':
4407  case '[':
4408  case '\\':
4409  case ']':
4410  case '^':
4411  case '{':
4412  case '|':
4413  case '}':
4414  quoted.append(backslash);
4415  }
4416  quoted.append(str.at(i));
4417  }
4418  return quoted;
4419 }
4420 
4510 #ifndef QT_NO_DATASTREAM
4511 
4522 {
4523  return out << regExp.pattern() << (quint8)regExp.caseSensitivity()
4524  << (quint8)regExp.patternSyntax()
4525  << (quint8)!!regExp.isMinimal();
4526 }
4527 
4539 {
4540  QString pattern;
4541  quint8 cs;
4542  quint8 patternSyntax;
4543  quint8 isMinimal;
4544 
4545  in >> pattern >> cs >> patternSyntax >> isMinimal;
4546 
4547  QRegExp newRegExp(pattern, Qt::CaseSensitivity(cs),
4548  QRegExp::PatternSyntax(patternSyntax));
4549 
4550  newRegExp.setMinimal(isMinimal);
4551  regExp = newRegExp;
4552  return in;
4553 }
4554 #endif // QT_NO_DATASTREAM
4555 
T * q_check_ptr(T *p)
Definition: qglobal.h:1857
Box(QRegExpEngine *engine)
Definition: qregexp.cpp:2429
QString goodStr
Definition: qregexp.cpp:1188
QMap< int, int > anchors
Definition: qregexp.cpp:958
Q_DECLARE_TYPEINFO(QRegExpAutomatonState, Q_MOVABLE_TYPE)
~QRegExp()
Destroys the regular expression and cleans up its internal data.
Definition: qregexp.cpp:3843
int parse(const QChar *rx, int len)
Definition: qregexp.cpp:3361
#define RXERR_OCTAL
Definition: qregexp.cpp:71
unsigned char c[8]
Definition: qnumeric_p.h:62
const int InftyRep
Definition: qregexp.cpp:704
void setBit(int i)
Sets the bit at index position i to 1.
Definition: qbitarray.h:128
QString cap(int nth=0) const
Returns the text captured by the nth subexpression.
Definition: qregexp.cpp:4310
void setPatternSyntax(PatternSyntax syntax)
Sets the syntax mode for the regular expression.
Definition: qregexp.cpp:4032
QRegExpCharClass & operator=(const QRegExpCharClass &cc)
Definition: qregexp.cpp:2331
#define QT_END_NAMESPACE
This macro expands to.
Definition: qglobal.h:90
The QMutex class provides access serialization between threads.
Definition: qmutex.h:60
QStringList capturedCache
Definition: qregexp.cpp:3661
int lastIndexIn(const QString &str, int offset=-1, CaretMode caretMode=CaretAtZero) const
Attempts to find a match backwards in str from position offset.
Definition: qregexp.cpp:4167
void catAnchor(int a)
Definition: qregexp.cpp:2632
void addRange(ushort from, ushort to)
Definition: qregexp.cpp:2365
QHash< QByteArray, QPair< int, int > > categoriesRangeMap
Definition: qregexp.cpp:1297
const QChar at(int i) const
Returns the character at the given index position in the string.
Definition: qstring.h:698
void dump() const
Definition: qregexp.cpp:2415
The QRegExp class provides pattern matching using regular expressions.
Definition: qregexp.h:61
bool yyMayCapture
Definition: qregexp.cpp:1296
ushort unicode() const
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition: qchar.h:251
QVector< T > & fill(const T &t, int size=-1)
Assigns value to all items in the vector.
Definition: qvector.h:665
The QAtomicInt class provides platform-independent atomic operations on integers. ...
Definition: qatomic.h:55
bool goodStringMatch(QRegExpMatchState &matchState) const
Definition: qregexp.cpp:1872
int count(const T &t) const
Returns the number of occurrences of value in the vector.
Definition: qvector.h:742
QByteArray & append(char c)
Appends the character ch to this byte array.
QVector< int > outs
Definition: qregexp.cpp:956
void addPlusTransitions(const QVector< int > &from, const QVector< int > &to, int atom)
Definition: qregexp.cpp:1535
QString errorString() const
Returns a text string that explains why a regexp pattern is invalid the case being; otherwise returns...
Definition: qregexp.cpp:4359
uint qHash(const QRegExpEngineKey &key)
Definition: qregexp.cpp:3672
void addSingleton(ushort ch)
Definition: qregexp.cpp:1035
QCache< QRegExpEngineKey, QRegExpEngine > EngineCache
Definition: qregexp.cpp:3677
const int NumBadChars
Definition: qregexp.cpp:698
Qt::CaseSensitivity cs
Definition: qregexp.cpp:1176
int anchorAlternation(int a, int b)
Definition: qregexp.cpp:1557
#define error(msg)
bool caretAnchored
Definition: qregexp.cpp:1172
The QByteArray class provides an array of bytes.
Definition: qbytearray.h:135
Definition: qcache.h:54
Qt::CaseSensitivity cs
Definition: qregexp.cpp:876
int length() const
Returns the number of characters in this string.
Definition: qstring.h:696
QVector< int > captureForOfficialCapture
Definition: qregexp.cpp:1158
void error(const char *msg)
Definition: qregexp.cpp:3181
void setPattern(const QString &pattern)
Sets the pattern string to pattern.
Definition: qregexp.cpp:3971
bool badCharMatch(QRegExpMatchState &matchState) const
Definition: qregexp.cpp:1892
void heuristicallyChooseHeuristic()
Definition: qregexp.cpp:1616
T1 first
Definition: qpair.h:65
static void mergeInto(QVector< int > *a, const QVector< int > &b)
Definition: qregexp.cpp:716
QRegExpCharClass(const QRegExpCharClass &cc)
Definition: qregexp.cpp:1026
QRegExp & operator=(const QRegExp &rx)
Copies the regular expression rx and returns a reference to the copy.
Definition: qregexp.cpp:3854
Box & operator=(const Box &b)
Definition: qregexp.cpp:2441
const_iterator constEnd() const
Returns a const STL-style iterator pointing to the imaginary item after the last item in the vector...
Definition: qvector.h:252
T2 second
Definition: qpair.h:66
static bool match(const uchar *found, const char *target, uint len)
int pos(int nth=0) const
Returns the position of the nth captured text in the searched string.
Definition: qregexp.cpp:4337
int matchedLength() const
Returns the length of the last matched string, or -1 if there was no match.
Definition: qregexp.cpp:4193
QT_DEPRECATED int numCaptures() const
Returns the number of captures contained in the regular expression.
Definition: qregexp.cpp:4210
QRegExpLookahead(QRegExpEngine *eng0, bool neg0)
Definition: qregexp.cpp:1312
#define RXERR_LEFTDELIM
Definition: qregexp.cpp:72
QLatin1String(DBUS_INTERFACE_DBUS))) Q_GLOBAL_STATIC_WITH_ARGS(QString
Q_OUTOFLINE_TEMPLATE RandomAccessIterator qBinaryFind(RandomAccessIterator begin, RandomAccessIterator end, const T &value)
Definition: qalgorithms.h:295
QRegExpMatchState matchState
Definition: qregexp.cpp:3663
#define RXERR_CATEGORY
Definition: qregexp.cpp:76
long ASN1_INTEGER_get ASN1_INTEGER * a
QRegExpPrivate(const QRegExpEngineKey &key)
Definition: qregexp.cpp:3667
unsigned char quint8
Definition: qglobal.h:934
bool ref()
Atomically increments the value of this QAtomicInt.
void dump() const
Definition: qregexp.cpp:1649
void parseExpression(Box *box)
Definition: qregexp.cpp:3634
The QString class provides a Unicode character string.
Definition: qstring.h:83
QRegExp::PatternSyntax patternSyntax
Definition: qregexp.cpp:875
Q_STATIC_GLOBAL_OPERATOR bool operator==(const QRegExpEngineKey &key1, const QRegExpEngineKey &key2)
Definition: qregexp.cpp:889
The QHash class is a template class that provides a hash-table-based dictionary.
Definition: qdatastream.h:66
void parseAtom(Box *box)
Definition: qregexp.cpp:3466
#define RXERR_LOOKBEHIND
Definition: qregexp.cpp:69
bool testBit(int i) const
Returns true if the bit at index position i is 1; otherwise returns false.
Definition: qbitarray.h:124
bool startsWith(const QByteArray &a) const
Returns true if this byte array starts with byte array ba; otherwise returns false.
static int caretIndex(int offset, QRegExp::CaretMode caretMode)
Definition: qregexp.cpp:858
Qt::CaseSensitivity caseSensitivity() const
Returns Qt::CaseSensitive if the regexp is matched case sensitively; otherwise returns Qt::CaseInsens...
Definition: qregexp.cpp:3985
bool contains(const Key &key) const
Returns true if the hash contains an item with the key; otherwise returns false.
Definition: qhash.h:872
static void prepareEngine_helper(QRegExpPrivate *priv)
Definition: qregexp.cpp:3704
The QChar class provides a 16-bit Unicode character.
Definition: qchar.h:72
QString pattern
Definition: qregexp.cpp:874
QVector< int > occ1
Definition: qregexp.cpp:1191
const T value(const Key &key) const
Returns the value associated with the key.
Definition: qhash.h:606
The QStringMatcher class holds a sequence of characters that can be quickly matched in a Unicode stri...
Q_DECL_CONSTEXPR const T & qMax(const T &a, const T &b)
Definition: qglobal.h:1217
Category category() const
Returns the character&#39;s category.
Definition: qchar.cpp:853
static bool isWord(QChar ch)
Definition: qregexp.cpp:707
void resize(int size)
Sets the size of the vector to size.
Definition: qvector.h:342
iterator insert(const Key &key, const T &value)
Inserts a new item with the key and a value of value.
Definition: qhash.h:753
QMap< Key, T > & unite(const QMap< Key, T > &other)
Inserts all the items in the other map into this map.
Definition: qmap.h:625
iterator end()
Returns an STL-style iterator pointing to the imaginary item after the last item in the vector...
Definition: qvector.h:250
#define RXERR_REPETITION
Definition: qregexp.cpp:70
QVector< int > ls
Definition: qregexp.cpp:1233
int anchorConcatenation(int a, int b)
Definition: qregexp.cpp:1576
bool isValid() const
Returns true if the regular expression is valid; otherwise returns false.
Definition: qregexp.cpp:3943
QRegExpEngine * eng
Definition: qregexp.cpp:1309
#define RXERR_DISABLED
Definition: qregexp.cpp:66
Q_CORE_EXPORT void qDebug(const char *,...)
const QRegExpEngine * eng
Definition: qregexp.cpp:931
void reserve(int size)
Attempts to allocate memory for at least size characters.
Definition: qstring.h:881
const_iterator constBegin() const
Returns a const STL-style iterator pointing to the first item in the vector.
Definition: qvector.h:249
bool negative() const
Definition: qregexp.cpp:1031
void setMinimal(bool minimal)
Enables or disables minimal matching.
Definition: qregexp.cpp:4068
static QString wc2rx(const QString &wc_str, const bool enableEscaping)
Definition: qregexp.cpp:761
int addLookahead(QRegExpEngine *eng, bool negative)
Definition: qregexp.cpp:1769
#define QT_BEGIN_NAMESPACE
This macro expands to.
Definition: qglobal.h:89
bool in(QChar ch) const
Definition: qregexp.cpp:2393
void setNegative(bool negative)
Definition: qregexp.cpp:2349
void dump() const
Definition: qregexp.cpp:2676
Box(const Box &b)
Definition: qregexp.cpp:1205
int setupState(int match)
Definition: qregexp.cpp:1731
int indexIn(const QString &str, int offset=0, CaretMode caretMode=CaretAtZero) const
Attempts to find a match in str from position offset (0 by default).
Definition: qregexp.cpp:4136
int captureCount() const
Definition: qregexp.cpp:1085
void clear()
Removes all the elements from the vector and releases the memory used by the vector.
Definition: qvector.h:347
QVector< int > occ1
Definition: qregexp.cpp:1051
const QChar * unicode() const
Returns a &#39;\0&#39;-terminated Unicode representation of the string.
Definition: qstring.h:706
QStringList capturedTexts() const
Returns a list of the captured text strings.
Definition: qregexp.cpp:4267
int qFindString(const QChar *haystack, int haystackLen, int from, const QChar *needle, int needleLen, Qt::CaseSensitivity cs)
Definition: qstring.cpp:2753
bool isEmpty() const
Returns true if the string has no characters; otherwise returns false.
Definition: qstring.h:704
const int EmptyCapture
Definition: qregexp.cpp:702
T value(int i) const
Returns the value at index position i in the vector.
Definition: qvector.h:559
void prepareForMatch(QRegExpEngine *eng)
Definition: qregexp.cpp:1367
int goodEarlyStart
Definition: qregexp.cpp:1186
#define Q_GLOBAL_STATIC(TYPE, NAME)
Declares a global static variable with the given type and name.
Definition: qglobal.h:1968
const T value(const Key &key) const
Returns the value associated with the key key.
Definition: qmap.h:499
static unsigned int getChar(const QChar *str, int &i, const int len)
static bool isBetterCapture(int ncap, const int *begin1, const int *end1, const int *begin2, const int *end2)
Definition: qregexp.cpp:1785
Q_CORE_EXPORT QString qt_regexp_toCanonical(const QString &pattern, QRegExp::PatternSyntax patternSyntax)
Definition: qregexp.cpp:1323
#define Q_STATIC_GLOBAL_OPERATOR
Definition: qfunctions_p.h:71
bool deref()
Atomically decrements the value of this QAtomicInt.
The QStringList class provides a list of strings.
Definition: qstringlist.h:66
bool isEmpty() const
Returns true if the hash contains no items; otherwise returns false.
Definition: qhash.h:297
void append(const T &t)
Inserts value at the end of the vector.
Definition: qvector.h:573
#define RXERR_INTERVAL
Definition: qregexp.cpp:75
#define RXERR_CHARCLASS
Definition: qregexp.cpp:67
#define RXERR_LOOKAHEAD
Definition: qregexp.cpp:68
#define RXERR_OK
Definition: qregexp.cpp:65
unsigned int uint
Definition: qglobal.h:996
#define YYREDO()
int getToken()
Definition: qregexp.cpp:3200
static int getEscape(const QChar *uc, int *pos, int len, int maxNumber=999)
Definition: qstring.cpp:7408
bool greedyQuantifiers
Definition: qregexp.cpp:1177
int getEscape()
Definition: qregexp.cpp:2860
void parseFactor(Box *box)
Definition: qregexp.cpp:3536
void clear()
Removes all items from the list.
Definition: qlist.h:764
QRegExp()
Constructs an empty regexp.
Definition: qregexp.cpp:3807
const_iterator constBegin() const
Returns a const STL-style iterator pointing to the first item in the map.
Definition: qmap.h:374
#define QT_CATCH(A)
Definition: qglobal.h:1537
int count() const
Definition: qstring.h:103
void reset(T *other=0)
Deletes the existing object it is pointing to if any, and sets its pointer to other.
void qSwap(T &value1, T &value2)
Definition: qglobal.h:2181
void addAnchors(int from, int to, int a)
Definition: qregexp.cpp:1593
const T & at(int i) const
Returns the item at index position i in the vector.
Definition: qvector.h:350
QVector< QRegExpCharClassRange > r
Definition: qregexp.cpp:1048
#define BadChar(ch)
Definition: qregexp.cpp:699
void orx(const Box &b)
Definition: qregexp.cpp:2574
iterator begin()
Returns an STL-style iterator pointing to the first item in the map.
Definition: qmap.h:372
The QBitArray class provides an array of bits.
Definition: qbitarray.h:54
void setupCategoriesRangeMap()
Definition: qregexp.cpp:2709
QString yyError
Definition: qregexp.cpp:1284
QVector< QRegExpAnchorAlternation > aa
Definition: qregexp.cpp:1169
CaseSensitivity
Definition: qnamespace.h:1451
void addCatTransitions(const QVector< int > &from, const QVector< int > &to)
Definition: qregexp.cpp:1528
bool useGoodStringHeuristic
Definition: qregexp.cpp:1184
const int EOS
Definition: qregexp.cpp:705
bool isEmpty() const
Returns true if the pattern string is empty; otherwise returns false.
Definition: qregexp.cpp:3925
QString pattern() const
Returns the pattern string of the regular expression.
Definition: qregexp.cpp:3960
const_iterator constEnd() const
Returns a const STL-style iterator pointing to the imaginary item after the last item in the map...
Definition: qmap.h:380
The QMutexLocker class is a convenience class that simplifies locking and unlocking mutexes...
Definition: qmutex.h:101
static void prepareEngine(QRegExpPrivate *priv)
Definition: qregexp.cpp:3723
QVector< QRegExpCharClass > cl
Definition: qregexp.cpp:1163
PatternSyntax patternSyntax() const
Returns the syntax used by the regular expression.
Definition: qregexp.cpp:4012
QString & append(QChar c)
Definition: qstring.cpp:1777
PatternSyntax
The syntax used to interpret the meaning of the pattern.
Definition: qregexp.h:64
QAtomicInt ref
Definition: qregexp.cpp:1115
#define Q_CORE_EXPORT
Definition: qglobal.h:1449
QRegExpPrivate * priv
Definition: qregexp.h:153
static void derefEngine(QRegExpEngine *eng, const QRegExpEngineKey &key)
Definition: qregexp.cpp:3682
QRegExpEngine(Qt::CaseSensitivity cs, bool greedyQuantifiers)
Definition: qregexp.cpp:1077
QVector< QRegExpLookahead * > ahead
Definition: qregexp.cpp:1166
void plus(int atom)
Definition: qregexp.cpp:2605
void clear()
Clears the contents of the string and makes it empty.
Definition: qstring.h:723
QList< QVector< int > > sleeping
Definition: qregexp.cpp:926
int captureCount() const
Returns the number of captures contained in the regular expression.
Definition: qregexp.cpp:4223
unsigned short ushort
Definition: qglobal.h:995
iterator end()
Returns an STL-style iterator pointing to the imaginary item after the last item in the map...
Definition: qmap.h:375
void parseTerm(Box *box)
Definition: qregexp.cpp:3621
static QString fromLatin1(const char *, int size=-1)
Returns a QString initialized with the first size characters of the Latin-1 string str...
Definition: qstring.cpp:4188
static void prepareEngineForMatch(QRegExpPrivate *priv, const QString &str)
Definition: qregexp.cpp:3730
void finishAtom(int atom, bool needCapture)
Definition: qregexp.cpp:1757
int startAtom(bool officialCapture)
Definition: qregexp.cpp:1747
iterator insert(const Key &key, const T &value)
Inserts a new item with the key key and a value of value.
Definition: qmap.h:559
QRegExpEngineKey(const QString &pattern, QRegExp::PatternSyntax patternSyntax, Qt::CaseSensitivity cs)
Definition: qregexp.cpp:878
Q_OUTOFLINE_TEMPLATE QPair< T1, T2 > qMakePair(const T1 &x, const T2 &y)
Definition: qpair.h:102
#define st(var, type, card)
int key
QString toLower() const Q_REQUIRED_RESULT
Returns a lowercase copy of the string.
Definition: qstring.cpp:5389
QScopedPointer< QRegExpCharClass > yyCharClass
Definition: qregexp.cpp:1281
bool isEmpty() const
Returns true if the map contains no items; otherwise returns false.
Definition: qmap.h:203
void cat(const Box &b)
Definition: qregexp.cpp:2504
static const QMetaObjectPrivate * priv(const uint *data)
if(void) toggleToolbarShown
Definition: qnamespace.h:54
void addCategories(int cats)
Definition: qregexp.cpp:2357
bool contains(const Key &key) const
Returns true if the map contains an item with key key; otherwise returns false.
Definition: qmap.h:553
static QString dump(const QByteArray &)
bool contains(const T &t) const
Returns true if the vector contains an occurrence of value; otherwise returns false.
Definition: qvector.h:731
char toLatin1() const
Returns the Latin-1 character equivalent to the QChar, or 0.
Definition: qchar.h:376
void setupHeuristics()
Definition: qregexp.cpp:2645
QDataStream & operator<<(QDataStream &out, const QRegExp &regExp)
Writes the regular expression regExp to stream out.
Definition: qregexp.cpp:4521
QVector< QRegExpAtom > f
Definition: qregexp.cpp:1155
#define RXERR_END
Definition: qregexp.cpp:73
bool exactMatch(const QString &str) const
Returns true if str is matched exactly by this regular expression; otherwise returns false...
Definition: qregexp.cpp:4094
const QChar * in
Definition: qregexp.cpp:904
QMap< int, int > lanchors
Definition: qregexp.cpp:1235
bool operator==(const QRegExp &rx) const
Returns true if this regular expression is equal to rx; otherwise returns false.
Definition: qregexp.cpp:3893
iterator erase(iterator it)
Removes the (key, value) pair pointed to by the iterator pos from the map, and returns an iterator to...
Definition: qmap.h:717
bool isMark() const
Returns true if the character is a mark (Mark_* categories); otherwise returns false.
Definition: qchar.cpp:625
QVector< int > occ1
Definition: qregexp.cpp:1250
CaretMode
The CaretMode enum defines the different meanings of the caret (^) in a regular expression.
Definition: qregexp.h:71
T * data()
Returns a pointer to the data stored in the vector.
Definition: qvector.h:152
int getRep(int def)
Definition: qregexp.cpp:3152
void setCaseSensitivity(Qt::CaseSensitivity cs)
Sets case sensitive matching to cs.
Definition: qregexp.cpp:3998
QRegExpEngineKey engineKey
Definition: qregexp.cpp:3657
The QDataStream class provides serialization of binary data to a QIODevice.
Definition: qdatastream.h:71
QRegExpAutomatonState(int a, int m)
Definition: qregexp.cpp:962
const QString & errorString() const
Definition: qregexp.cpp:1084
const T * constData() const
Returns a const pointer to the data stored in the vector.
Definition: qvector.h:154
int createState(QChar ch)
Definition: qregexp.cpp:1489
QRegExpEngine * eng
Definition: qregexp.cpp:3656
bool testAnchor(int i, int a, const int *capBegin)
Definition: qregexp.cpp:1804
const int NoOccurrence
Definition: qregexp.cpp:701
bool xmlSchemaExtensions
Definition: qregexp.cpp:1178
QVector< QRegExpAutomatonState > s
Definition: qregexp.cpp:1153
void setup()
Definition: qregexp.cpp:1706
#define Q_UNUSED(x)
Indicates to the compiler that the parameter with the specified name is not used in the body of a fun...
Definition: qglobal.h:1729
Q_OUTOFLINE_TEMPLATE void qDeleteAll(ForwardIterator begin, ForwardIterator end)
Definition: qalgorithms.h:319
QDataStream & operator>>(QDataStream &in, QRegExp &regExp)
Reads a regular expression from stream in into regExp.
Definition: qregexp.cpp:4538
QChar toLower() const
Returns the lowercase equivalent if the character is uppercase or titlecase; otherwise returns the ch...
Definition: qchar.cpp:1239
int size() const
Returns the number of items in the vector.
Definition: qvector.h:137
const QVector< int > & firstOccurrence() const
Definition: qregexp.cpp:1039
#define QT_TRY
Definition: qglobal.h:1536
void startTokenizer(const QChar *rx, int len)
Definition: qregexp.cpp:3187
The QLatin1Char class provides an 8-bit ASCII/Latin-1 character.
Definition: qchar.h:55
void set(QChar ch)
Definition: qregexp.cpp:2462
#define RXERR_LIMIT
Definition: qregexp.cpp:74
#define INT_MAX
bool isValid() const
Definition: qregexp.cpp:1083
void addAnchorsToEngine(const Box &to) const
Definition: qregexp.cpp:2698
QRegExpEngine * eng
Definition: qregexp.cpp:1232
const QChar * yyIn
Definition: qregexp.cpp:1276
QVector< int > rs
Definition: qregexp.cpp:1234
static void invalidateEngine(QRegExpPrivate *priv)
Definition: qregexp.cpp:3742
QMap< int, int > reenter
Definition: qregexp.cpp:957
void match(const QChar *str, int len, int pos, bool minimal, bool oneTest, int caretIndex)
Definition: qregexp.cpp:1414
static void setup()
Definition: qtextcodec.cpp:718
QMap< int, int > ranchors
Definition: qregexp.cpp:1236
bool isLetterOrNumber() const
Returns true if the character is a letter or number (Letter_* or Number_* categories); otherwise retu...
Definition: qchar.cpp:681
The QList class is a template class that provides lists.
Definition: qdatastream.h:62
void skipChars(int n)
Definition: qregexp.cpp:3172
static QString escape(const QString &str)
Returns the string str with every regexp special character escaped with a backslash.
Definition: qregexp.cpp:4392
const int InftyLen
Definition: qregexp.cpp:703
bool isMinimal() const
Returns true if minimal (non-greedy) matching is enabled; otherwise returns false.
Definition: qregexp.cpp:4046