38 pcre_extra *fPCREExtra;
40 PCREPriv_t() { fPCRE = 0; fPCREExtra = 0; }
46 Bool_t TPRegexp::fgThrowAtCompileError = kFALSE;
53 fPriv =
new PCREPriv_t;
60 TPRegexp::TPRegexp(
const TString &pat)
63 fPriv =
new PCREPriv_t;
70 TPRegexp::TPRegexp(
const TPRegexp &p)
72 fPattern = p.fPattern;
73 fPriv =
new PCREPriv_t;
74 fPCREOpts = p.fPCREOpts;
83 pcre_free(fPriv->fPCRE);
84 if (fPriv->fPCREExtra)
85 pcre_free(fPriv->fPCREExtra);
92 TPRegexp &TPRegexp::operator=(
const TPRegexp &p)
95 fPattern = p.fPattern;
97 pcre_free(fPriv->fPCRE);
99 if (fPriv->fPCREExtra)
100 pcre_free(fPriv->fPCREExtra);
101 fPriv->fPCREExtra = 0;
102 fPCREOpts = p.fPCREOpts;
135 UInt_t TPRegexp::ParseMods(
const TString &modStr)
const
139 if (modStr.Length() <= 0)
143 const char *m = modStr;
147 opts |= kPCRE_GLOBAL;
150 opts |= PCRE_CASELESS;
153 opts |= PCRE_MULTILINE;
156 opts |= kPCRE_OPTIMIZE;
162 opts |= PCRE_EXTENDED;
165 opts |= kPCRE_DEBUG_MSGS;
168 Error(
"ParseMods",
"illegal pattern modifier: %c", *m);
180 TString TPRegexp::GetModifiers()
const
184 if (fPCREOpts & kPCRE_GLOBAL) ret +=
'g';
185 if (fPCREOpts & PCRE_CASELESS) ret +=
'i';
186 if (fPCREOpts & PCRE_MULTILINE) ret +=
'm';
187 if (fPCREOpts & PCRE_DOTALL) ret +=
's';
188 if (fPCREOpts & PCRE_EXTENDED) ret +=
'x';
189 if (fPCREOpts & kPCRE_OPTIMIZE) ret +=
'o';
190 if (fPCREOpts & kPCRE_DEBUG_MSGS) ret +=
'd';
198 void TPRegexp::Compile()
201 pcre_free(fPriv->fPCRE);
203 if (fPCREOpts & kPCRE_DEBUG_MSGS)
204 Info(
"Compile",
"PREGEX compiling %s", fPattern.Data());
208 fPriv->fPCRE = pcre_compile(fPattern.Data(), fPCREOpts & kPCRE_INTMASK,
209 &errstr, &patIndex, 0);
212 if (fgThrowAtCompileError) {
213 throw std::runtime_error
214 (TString::Format(
"TPRegexp::Compile() compilation of TPRegexp(%s) failed at: %d because %s",
215 fPattern.Data(), patIndex, errstr).Data());
217 Error(
"Compile",
"compilation of TPRegexp(%s) failed at: %d because %s",
218 fPattern.Data(), patIndex, errstr);
223 if (fPriv->fPCREExtra || (fPCREOpts & kPCRE_OPTIMIZE))
230 void TPRegexp::Optimize()
232 if (fPriv->fPCREExtra)
233 pcre_free(fPriv->fPCREExtra);
235 if (fPCREOpts & kPCRE_DEBUG_MSGS)
236 Info(
"Optimize",
"PREGEX studying %s", fPattern.Data());
240 fPriv->fPCREExtra = pcre_study(fPriv->fPCRE, 0, &errstr);
242 if (!fPriv->fPCREExtra && errstr) {
243 Error(
"Optimize",
"Optimization of TPRegexp(%s) failed: %s",
244 fPattern.Data(), errstr);
251 Int_t TPRegexp::ReplaceSubs(
const TString &s, TString &
final,
252 const TString &replacePattern,
253 Int_t *offVec, Int_t nrMatch)
const
256 const char *p = replacePattern;
260 while (state != -1) {
274 }
else if (!isdigit(p[1])) {
275 Error(
"ReplaceSubs",
"badly formed replacement pattern: %s",
276 replacePattern.Data());
286 if (fPCREOpts & kPCRE_DEBUG_MSGS)
287 Info(
"ReplaceSubs",
"PREGEX appending substr #%d", subnum);
288 if (subnum < 0 || subnum > nrMatch-1) {
289 Error(
"ReplaceSubs",
"bad string number: %d",subnum);
291 const TString subStr = s(offVec[2*subnum],offVec[2*subnum+1]-offVec[2*subnum]);
307 Int_t TPRegexp::MatchInternal(
const TString &s, Int_t start,
308 Int_t nMaxMatch, TArrayI *pos)
const
310 Int_t *offVec =
new Int_t[3*nMaxMatch];
312 Int_t nrMatch = pcre_exec(fPriv->fPCRE, fPriv->fPCREExtra, s.Data(),
313 s.Length(), start, 0,
314 offVec, 3*nMaxMatch);
316 if (nrMatch == PCRE_ERROR_NOMATCH)
318 else if (nrMatch <= 0) {
319 Error(
"Match",
"pcre_exec error = %d", nrMatch);
325 pos->Set(2*nrMatch, offVec);
339 Int_t TPRegexp::Match(
const TString &s,
const TString &mods, Int_t start,
340 Int_t nMaxMatch, TArrayI *pos)
342 UInt_t opts = ParseMods(mods);
344 if (!fPriv->fPCRE || opts != fPCREOpts) {
349 return MatchInternal(s, start, nMaxMatch, pos);
370 TObjArray *TPRegexp::MatchS(
const TString &s,
const TString &mods,
371 Int_t start, Int_t nMaxMatch)
374 Int_t nrMatch = Match(s, mods, start, nMaxMatch, &pos);
376 TObjArray *subStrL =
new TObjArray();
379 for (Int_t i = 0; i < nrMatch; i++) {
380 Int_t startp = pos[2*i];
381 Int_t stopp = pos[2*i+1];
382 if (startp >= 0 && stopp >= 0) {
383 const TString subStr = s(pos[2*i], pos[2*i+1]-pos[2*i]);
384 subStrL->Add(
new TObjString(subStr));
386 subStrL->Add(
new TObjString());
396 Int_t TPRegexp::SubstituteInternal(TString &s,
const TString &replacePattern,
397 Int_t start, Int_t nMaxMatch,
398 Bool_t doDollarSubst)
const
400 Int_t *offVec =
new Int_t[3*nMaxMatch];
404 Int_t offset = start;
411 Int_t nrMatch = pcre_exec(fPriv->fPCRE, fPriv->fPCREExtra, s.Data(),
412 s.Length(), offset, 0,
413 offVec, 3*nMaxMatch);
415 if (nrMatch == PCRE_ERROR_NOMATCH) {
418 }
else if (nrMatch <= 0) {
419 Error(
"Substitute",
"pcre_exec error = %d", nrMatch);
424 if (last <= offVec[0]) {
425 final += s(last,offVec[0]-last);
431 ReplaceSubs(s,
final, replacePattern, offVec, nrMatch);
433 final += replacePattern;
438 if (!(fPCREOpts & kPCRE_GLOBAL))
441 if (offVec[0] != offVec[1])
445 if (offVec[1] == s.Length())
447 offset = offVec[1]+1;
453 final += s(last,s.Length()-last);
472 Int_t TPRegexp::Substitute(TString &s,
const TString &replacePattern,
473 const TString &mods, Int_t start, Int_t nMaxMatch)
475 UInt_t opts = ParseMods(mods);
477 if (!fPriv->fPCRE || opts != fPCREOpts) {
482 return SubstituteInternal(s, replacePattern, start, nMaxMatch, kTRUE);
490 Bool_t TPRegexp::IsValid()
const
492 return fPriv->fPCRE != 0;
499 Bool_t TPRegexp::GetThrowAtCompileError()
501 return fgThrowAtCompileError;
508 void TPRegexp::SetThrowAtCompileError(Bool_t throwp)
510 fgThrowAtCompileError = throwp;
524 Ssiz_t TString::Index(TPRegexp& r, Ssiz_t start)
const
527 Int_t nrMatch = r.Match(*
this,
"",start,10,&pos);
539 Ssiz_t TString::Index(TPRegexp& r, Ssiz_t* extent, Ssiz_t start)
const
542 const Int_t nrMatch = r.Match(*
this,
"",start,10,&pos);
544 *extent = pos[1]-pos[0];
555 TSubString TString::operator()(TPRegexp& r, Ssiz_t start)
const
558 Ssiz_t begin = Index(r, &len, start);
559 return TSubString(*
this, begin, len);
565 TSubString TString::operator()(TPRegexp& r)
const
567 return (*
this)(r, 0);
583 ClassImp(TPMERegexp);
588 TPMERegexp::TPMERegexp() :
592 fAddressOfLastString(0),
593 fLastGlobalPosition(0)
605 TPMERegexp::TPMERegexp(
const TString& s,
const TString& opts, Int_t nMatchMax) :
607 fNMaxMatches(nMatchMax),
609 fAddressOfLastString(0),
610 fLastGlobalPosition(0)
612 fPCREOpts = ParseMods(opts);
623 TPMERegexp::TPMERegexp(
const TString& s, UInt_t opts, Int_t nMatchMax) :
625 fNMaxMatches(nMatchMax),
627 fAddressOfLastString(0),
628 fLastGlobalPosition(0)
639 TPMERegexp::TPMERegexp(
const TPMERegexp& r) :
641 fNMaxMatches(r.fNMaxMatches),
643 fAddressOfLastString(0),
644 fLastGlobalPosition(0)
653 void TPMERegexp::Reset(
const TString& s,
const TString& opts, Int_t nMatchMax)
655 Reset(s, ParseMods(opts), nMatchMax);
662 void TPMERegexp::Reset(
const TString& s, UInt_t opts, Int_t nMatchMax)
669 fNMatches = nMatchMax;
671 fLastGlobalPosition = 0;
684 void TPMERegexp::AssignGlobalState(
const TPMERegexp& re)
686 fLastStringMatched = re.fLastStringMatched;
687 fLastGlobalPosition = re.fLastGlobalPosition;
696 void TPMERegexp::ResetGlobalState()
698 fLastGlobalPosition = 0;
708 Int_t TPMERegexp::Match(
const TString& s, UInt_t start)
711 if (fAddressOfLastString != (
void*) &s) {
712 fLastGlobalPosition = 0;
715 if (fPCREOpts & kPCRE_GLOBAL) {
716 start += fLastGlobalPosition;
720 fNMatches = MatchInternal(s, start, fNMaxMatches, &fMarkers);
724 fLastStringMatched = s;
725 fAddressOfLastString = (
void*) &s;
727 if (fPCREOpts & kPCRE_GLOBAL) {
728 if (fNMatches == PCRE_ERROR_NOMATCH) {
730 fLastGlobalPosition = 0;
731 }
else if (fNMatches > 0) {
733 fLastGlobalPosition = fMarkers[1];
736 fLastGlobalPosition = 0;
765 Int_t TPMERegexp::Split(
const TString& s, Int_t maxfields)
767 typedef std::pair<int, int> MarkerLoc_t;
768 typedef std::vector<MarkerLoc_t> MarkerLocVec_t;
771 MarkerLocVec_t oMarks;
778 MarkerLocVec_t oCurrentTrailingEmpties;
781 Int_t nMatchesFound = 0;
787 while ((matchRes = Match(s, nOffset)) &&
788 ((maxfields < 1) || nMatchesFound < maxfields)) {
791 if (fMarkers[1] - fMarkers[0] == 0) {
792 oMarks.push_back(MarkerLoc_t(nOffset, nOffset + 1));
794 if (nOffset >= s.Length())
801 if (nOffset != fMarkers[0]) {
802 if (!oCurrentTrailingEmpties.empty()) {
803 oMarks.insert(oMarks.end(),
804 oCurrentTrailingEmpties.begin(),
805 oCurrentTrailingEmpties.end());
806 oCurrentTrailingEmpties.clear();
808 oMarks.push_back(MarkerLoc_t(nOffset, fMarkers[0]));
811 if (maxfields == 0) {
813 oCurrentTrailingEmpties.push_back(MarkerLoc_t(nOffset, nOffset));
815 oMarks.push_back(MarkerLoc_t(nOffset, nOffset));
819 nOffset = fMarkers[1];
822 for (Int_t i = 1; i < matchRes; ++i)
823 oMarks.push_back(MarkerLoc_t(fMarkers[2*i], fMarkers[2*i + 1]));
829 if (nMatchesFound == 0) {
830 oMarks.push_back(MarkerLoc_t(0, s.Length()));
834 else if (maxfields > 0 && nMatchesFound >= maxfields) {
835 oMarks[oMarks.size() - 1].second = s.Length();
839 Bool_t last_empty = (nOffset == s.Length());
840 if (!last_empty || maxfields < 0) {
841 if (!oCurrentTrailingEmpties.empty()) {
842 oMarks.insert(oMarks.end(),
843 oCurrentTrailingEmpties.begin(),
844 oCurrentTrailingEmpties.end());
846 oMarks.push_back(MarkerLoc_t(nOffset, s.Length()));
850 fNMatches = oMarks.size();
851 fMarkers.Set(2*fNMatches);
852 for (Int_t i = 0; i < fNMatches; ++i) {
853 fMarkers[2*i] = oMarks[i].first;
854 fMarkers[2*i + 1] = oMarks[i].second;
874 Int_t TPMERegexp::Substitute(TString& s,
const TString& r, Bool_t doDollarSubst)
876 Int_t cnt = SubstituteInternal(s, r, 0, fNMaxMatches, doDollarSubst);
880 Ssiz_t pos = 0, len = s.Length();
881 const Char_t *data = s.Data();
883 Char_t c = data[pos];
887 case 0 : ret +=
'\\';
break;
888 case 'l': state = 1;
break;
889 case 'u': state = 2;
break;
890 case 'L': state = 3;
break;
891 case 'U': state = 4;
break;
892 case 'E': state = 0;
break;
893 default : ret +=
'\\'; ret += c;
break;
898 case 0: ret += c;
break;
899 case 1: ret += (Char_t) tolower(c); state = 0;
break;
900 case 2: ret += (Char_t) toupper(c); state = 0;
break;
901 case 3: ret += (Char_t) tolower(c);
break;
902 case 4: ret += (Char_t) toupper(c);
break;
903 default: Error(
"TPMERegexp::Substitute",
"invalid state.");
918 TString TPMERegexp::operator[](
int index)
920 if (index >= fNMatches)
923 Int_t begin = fMarkers[2*index];
924 Int_t end = fMarkers[2*index + 1];
925 return fLastStringMatched(begin, end-begin);
933 void TPMERegexp::Print(Option_t* option)
935 TString opt = option;
938 Printf(
"Regexp='%s', Opts='%s'", fPattern.Data(), GetModifiers().Data());
939 if (opt.Contains(
"all")) {
940 Printf(
" last string='%s'", fLastStringMatched.Data());
941 Printf(
" number of matches = %d", fNMatches);
942 for (Int_t i=0; i<fNMatches; ++i)
943 Printf(
" %d - %s", i,
operator[](i).Data());
960 ClassImp(TStringToken);
965 TStringToken::TStringToken(
const TString& fullStr,
const TString& splitRe, Bool_t retVoid) :
968 fReturnVoid (retVoid),
977 Bool_t TStringToken::NextToken()
980 while (fPos < fFullStr.Length()) {
981 if (fSplitRe.Match(fFullStr,
"", fPos, 2, &x)) {
982 TString::operator=(fFullStr(fPos, x[0] - fPos));
985 TString::operator=(fFullStr(fPos, fFullStr.Length() - fPos));
986 fPos = fFullStr.Length() + 1;
988 if (Length() || fReturnVoid)
994 if (fPos == fFullStr.Length() && fReturnVoid) {
995 TString::operator=(
"");
996 fPos = fFullStr.Length() + 1;