/** * Copyright (c) 2001, Sergey A. Samokhodkin * All rights reserved. * * Redistribution and use in source and binary forms, with or without modification, * are permitted provided that the following conditions are met: * * - Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * - Redistributions in binary form * must reproduce the above copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials provided with the distribution. * - Neither the name of jregex nor the names of its contributors may be used * to endorse or promote products derived from this software without specific prior * written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * @version 1.2_01 */ package totalcross.util.regex; import totalcross.io.*; import totalcross.sys.*; import totalcross.util.*; /** * Matcher instance is an automaton that actually performs matching. It provides the following methods: * <li> searching for a matching substrings : matcher.find() or matcher.findAll(); * <li> testing whether a text matches a whole pattern : matcher.matches(); * <li> testing whether the text matches the beginning of a pattern : matcher.matchesPrefix(); * <li> searching with custom options : matcher.find(int options) * <p> * <b>Obtaining results</b><br> * After the search succeded, i.e. if one of above methods returned <code>true</code> * one may obtain an information on the match: * <li> may check whether some group is captured : matcher.isCaptured(int); * <li> may obtain start and end positions of the match and its length : matcher.start(int),matcher.end(int),matcher.length(int); * <li> may obtain match contents as String : matcher.group(int).<br> * The same way can be obtained the match prefix and suffix information. * The appropriate methods are grouped in MatchResult interface, which the Matcher class implements.<br> * Matcher objects are not thread-safe, so only one thread may use a matcher instance at a time. * Note, that Pattern objects are thread-safe(the same instanse may be shared between * multiple threads), and the typical tactics in multithreaded applications is to have one Pattern instance per expression(a singleton), * and one Matcher object per thread. */ public class Matcher implements MatchResult{ /* Matching options*/ /** * The same effect as "^" without REFlags.MULTILINE. * @see Matcher#find(int) */ public static final int ANCHOR_START=1; /** * The same effect as "\\G". * @see Matcher#find(int) */ public static final int ANCHOR_LASTMATCH=2; /** * The same effect as "$" without REFlags.MULTILINE. * @see Matcher#find(int) */ public static final int ANCHOR_END=4; /** * Experimental option; if a text ends up before the end of a pattern,report a match. * @see Matcher#find(int) */ public static final int ACCEPT_INCOMPLETE=8; //see search(ANCHOR_START|...) private static Term startAnchor=new Term(Term.START); //see search(ANCHOR_LASTMATCH|...) private static Term lastMatchAnchor=new Term(Term.LAST_MATCH_END); private Pattern re; private int[] counters; private MemReg[] memregs; private LAEntry[] lookaheads; private char[] data; private int offset,end,wOffset,wEnd; private boolean shared; private SearchEntry top; //stack entry private SearchEntry first; //object pool entry private SearchEntry defaultEntry; //called when moving the window private boolean called; private int minQueueLength; private String cache; //cache may be longer than the actual data //and contrariwise; so cacheOffset may have both signs. //cacheOffset is actually -(data offset). private int cacheOffset,cacheLength; private MemReg prefixBounds,suffixBounds,targetBounds; Matcher(Pattern regex){ this.re=regex; //int memregCount=(memregs=new MemReg[regex.memregs]).length; //for(int i=0;i<memregCount;i++){ // this.memregs[i]=new MemReg(-1); //unlikely to SearchEntry, in this case we know memreg indicies by definition //} //counters=new int[regex.counters]; //int lookaheadCount=(lookaheads=new LAEntry[regex.lookaheads]).length; //for(int i=0;i<lookaheadCount;i++){ // this.lookaheads[i]=new LAEntry(); //} int memregCount,counterCount,lookaheadCount; if((memregCount=regex.memregs)>0){ MemReg[] memregs=new MemReg[memregCount]; for(int i=0;i<memregCount;i++){ memregs[i]=new MemReg(-1); //unlikely to SearchEntry, in this case we know memreg indicies by definition } this.memregs=memregs; } if((counterCount=regex.counters)>0) counters=new int[counterCount]; if((lookaheadCount=regex.lookaheads)>0){ LAEntry[] lookaheads=new LAEntry[lookaheadCount]; for(int i=0;i<lookaheadCount;i++){ lookaheads[i]=new LAEntry(); } this.lookaheads=lookaheads; } first=new SearchEntry(); defaultEntry=new SearchEntry(); minQueueLength=regex.stringRepr.length()/2; // just evaluation!!! } /** * This method allows to efficiently pass data between matchers. * Note that a matcher may pass data to itself:<pre> * Matcher m=new Pattern("\\w+").matcher(myString); * if(m.find())m.setTarget(m,m.SUFFIX); //forget all that is not a suffix * </pre> * Resets current search position to zero. * @param m - a matcher that is a source of data * @param groupId - which group to take data from * @see Matcher#setTarget(java.lang.String) * @see Matcher#setTarget(java.lang.String,int,int) * @see Matcher#setTarget(char[],int,int) */ public final void setTarget(Matcher m, int groupId){ MemReg mr=m.bounds(groupId); //System.out.println("setTarget("+m+","+groupId+")"); //System.out.println(" in="+mr.in); //System.out.println(" out="+mr.out); if(mr==null) throw new IllegalArgumentException("group #"+groupId+" is not assigned"); data=m.data; offset=mr.in; end=mr.out; cache=m.cache; cacheLength=m.cacheLength; cacheOffset=m.cacheOffset; if(m!=this){ shared=true; m.shared=true; } init(); } /** * Supplies a text to search in/match with. * Resets current search position to zero. * @param text - a data * @see Matcher#setTarget(totalcross.util.regex.Matcher,int) * @see Matcher#setTarget(java.lang.String,int,int) * @see Matcher#setTarget(char[],int,int) */ public void setTarget(String text){ setTarget(text,0,text.length()); } /** * Supplies a text to search in/match with, as a part of String. * Resets current search position to zero. * @param text - a data source * @param start - where the target starts * @param len - how long is the target * @see Matcher#setTarget(totalcross.util.regex.Matcher,int) * @see Matcher#setTarget(java.lang.String) * @see Matcher#setTarget(char[],int,int) */ public void setTarget(String text,int start,int len){ char[] mychars=data; if(mychars==null || shared || mychars.length<len){ data=mychars=new char[(int)(1.7f*len)]; shared=false; } text.getChars(start,len,mychars,0); //(srcBegin,srcEnd,dst[],dstBegin) offset=0; end=len; cache=text; cacheOffset=-start; cacheLength=text.length(); init(); } /** * Supplies a text to search in/match with, as a part of char array. * Resets current search position to zero. * @param text - a data source * @param start - where the target starts * @param len - how long is the target * @see Matcher#setTarget(totalcross.util.regex.Matcher,int) * @see Matcher#setTarget(java.lang.String) * @see Matcher#setTarget(java.lang.String,int,int) */ public void setTarget(char[] text,int start,int len){ setTarget(text,start,len,true); } /** * To be used with much care. * Supplies a text to search in/match with, as a part of a char array, as above, but also allows to permit * to use the array as internal buffer for subsequent inputs. That is, if we call it with <code>shared=false</code>:<pre> * myMatcher.setTarget(myCharArray,x,y,<b>false</b>); //we declare that array contents is NEITHER shared NOR will be used later, so may modifications on it are permitted * </pre> * then we should expect the array contents to be changed on subsequent setTarget(..) operations. * Such method may yield some increase in perfomanse in the case of multiple setTarget() calls. * Resets current search position to zero. * @param text - a data source * @param start - where the target starts * @param len - how long is the target * @param shared - if <code>true<code>: data are shared or used later, <b>don't</b> modify it; if <code>false<code>: possible modifications of the text on subsequent <code>setTarget()</code> calls are perceived and allowed. * @see Matcher#setTarget(totalcross.util.regex.Matcher,int) * @see Matcher#setTarget(java.lang.String) * @see Matcher#setTarget(java.lang.String,int,int) * @see Matcher#setTarget(char[],int,int) */ public final void setTarget(char[] text,int start,int len,boolean shared){ cache=null; data=text; offset=start; end=start+len; this.shared=shared; init(); } /** * Supplies a text to search in/match with through a stream. * Resets current search position to zero. * @param in - a data stream; * @param len - how much characters should be read; if len is -1, read the entire stream. * @see Matcher#setTarget(totalcross.util.regex.Matcher,int) * @see Matcher#setTarget(java.lang.String) * @see Matcher#setTarget(java.lang.String,int,int) * @see Matcher#setTarget(char[],int,int) */ public void setTarget(CharStream in,int len)throws IOException{ if(len<0){ setAll(in); return; } char[] mychars=data; boolean shared=this.shared; if(mychars==null || shared || mychars.length<len){ mychars=new char[len]; shared=false; } int count=0; int c; while((c=in.read(mychars,count,len))>=0){ len-=c; count+=c; if(len==0) break; } setTarget(mychars,0,count,shared); } private void setAll(CharStream in)throws IOException{ char[] mychars=data; int free; boolean shared=this.shared; if(mychars==null || shared){ mychars=new char[free=1024]; shared=false; } else free=mychars.length; int count=0; int c; while((c=in.read(mychars,count,free))>=0){ free-=c; count+=c; if(free==0){ int newsize=count*3; char[] newchars=new char[newsize]; Vm.arrayCopy(mychars,0,newchars,0,count); mychars=newchars; free=newsize-count; shared=false; } } setTarget(mychars,0,count,shared); } private final String getString(int start,int end){ String src=cache; if(src!=null){ int co=cacheOffset; return src.substring(start-co,end-co); } int tOffset,tLen=this.end-(tOffset=this.offset); char[] data=this.data; if((end-start)>=(tLen/3)){ //it makes sence to make a cache cache=src=new String(data,tOffset,tLen); cacheOffset=tOffset; cacheLength=tLen; return src.substring(start-tOffset,end-tOffset); } return new String(data,start,end-start); } /* Matching */ /** * Tells whether the entire target matches the beginning of the pattern. * The whole pattern is also regarded as its beginning.<br> * This feature allows to find a mismatch by examining only a beginning part of * the target (as if the beginning of the target doesn't match the beginning of the pattern, then the entire target * also couldn't match).<br> * For example the following assertions yield <code>true<code>:<pre> * Pattern p=new Pattern("abcd"); * p.matcher("").matchesPrefix(); * p.matcher("a").matchesPrefix(); * p.matcher("ab").matchesPrefix(); * p.matcher("abc").matchesPrefix(); * p.matcher("abcd").matchesPrefix(); * </pre> * and the following yield <code>false<code>:<pre> * p.matcher("b").isPrefix(); * p.matcher("abcdef").isPrefix(); * p.matcher("x").isPrefix(); * </pre> * @return true if the entire target matches the beginning of the pattern */ public final boolean matchesPrefix(){ setPosition(0); return search(ANCHOR_START|ACCEPT_INCOMPLETE|ANCHOR_END); } /** * Just an old name for isPrefix().<br> * Retained for backwards compatibility. * @deprecated Replaced by isPrefix() */ public final boolean isStart(){ return matchesPrefix(); } /** * Tells whether a current target matches the whole pattern. * For example the following yields the <code>true<code>:<pre> * Pattern p=new Pattern("\\w+"); * p.matcher("a").matches(); * p.matcher("ab").matches(); * p.matcher("abc").matches(); * </pre> * and the following yields the <code>false<code>:<pre> * p.matcher("abc def").matches(); * p.matcher("bcd ").matches(); * p.matcher(" bcd").matches(); * p.matcher("#xyz#").matches(); * </pre> * @return whether a current target matches the whole pattern. */ public final boolean matches(){ if(called) setPosition(0); return search(ANCHOR_START|ANCHOR_END); } /** * Just a combination of setTarget(String) and matches(). * @param s the target string; * @return whether the specified string matches the whole pattern. */ public final boolean matches(String s){ setTarget(s); return search(ANCHOR_START|ANCHOR_END); } /** * Allows to set a position the subsequent find()/find(int) will start from. * @param pos the position to start from; * @see Matcher#find() * @see Matcher#find(int) */ public void setPosition(int pos){ wOffset=offset+pos; wEnd=-1; called=false; flush(); } /** * Searches through a target for a matching substring, starting from just after the end of last match. * If there wasn't any search performed, starts from zero. * @return <code>true</code> if a match found. */ public final boolean find(){ if(called) skip(); return search(0); } /** * Searches through a target for a matching substring, starting from just after the end of last match. * If there wasn't any search performed, starts from zero. * @param anchors a zero or a combination(bitwise OR) of ANCHOR_START,ANCHOR_END,ANCHOR_LASTMATCH,ACCEPT_INCOMPLETE * @return <code>true</code> if a match found. */ public final boolean find(int anchors){ if(called) skip(); return search(anchors); } /** * The same as findAll(int), but with default behaviour; */ public MatchIterator findAll(){ return findAll(0); } /** * Returns an iterator over the matches found by subsequently calling find(options), the search starts from the zero position. */ public MatchIterator findAll(final int options){ //setPosition(0); return new MatchIterator(){ private boolean checked=false; private boolean hasMore=false; public boolean hasMore(){ if(!checked) check(); return hasMore; } public MatchResult nextMatch() throws ElementNotFoundException{ if(!checked) check(); if(!hasMore) throw new ElementNotFoundException(""); checked=false; return Matcher.this; } private final void check(){ hasMore=find(options); checked=true; } public int count(){ if(!checked) check(); if(!hasMore) return 0; int c=1; while(find(options))c++; checked=false; return c; } }; } /** * Continues to search from where the last search left off. * The same as proceed(0). * @see Matcher#proceed(int) */ public final boolean proceed(){ return proceed(0); } /** * Continues to search from where the last search left off using specified options:<pre> * Matcher m=new Pattern("\\w+").matcher("abc"); * while(m.proceed(0)){ * System.out.println(m.group(0)); * } * </pre> * Output:<pre> * abc * ab * a * bc * b * c * </pre> * For example, let's find all odd nubmers occuring in a text:<pre> * Matcher m=new Pattern("\\d+").matcher("123"); * while(m.proceed(0)){ * String match=m.group(0); * if(isOdd(Integer.parseInt(match))) System.out.println(match); * } * * static boolean isOdd(int i){ * return (i&1)>0; * } * </pre> * This outputs:<pre> * 123 * 1 * 23 * 3 * </pre> * Note that using <code>find()</code> method we would find '123' only. * @param options search options, some of ANCHOR_START|ANCHOR_END|ANCHOR_LASTMATCH|ACCEPT_INCOMPLETE; zero value(default) stands for usual search for substring. */ public final boolean proceed(int options){ //System.out.println("next() : top="+top); if(called){ if(top==null){ wOffset++; } } return search(0); } /** * Sets the current search position just after the end of last match. */ public final void skip(){ int we=wEnd; if(wOffset==we){ //requires special handling //if no variants at 'wOutside',advance pointer and clear if(top==null){ wOffset++; flush(); } //otherwise, if there exist a variant, //don't clear(), i.e. allow it to match return; } else{ if(we<0) wOffset=0; else wOffset=we; } //rflush(); //rflush() works faster on simple regexes (with a small group/branch number) flush(); } private final void init(){ //wOffset=-1; //System.out.println("init(): offset="+offset+", end="+end); wOffset=offset; wEnd=-1; called=false; flush(); } /** * Resets the internal state. */ private final void flush(){ top=null; defaultEntry.reset(0); /* int c=0; SearchEntry se=first; while(se!=null){ c++; se=se.on; } System.out.println("queue: allocated="+c+", truncating to "+minQueueLength); new Exception().printStackTrace(); */ first.reset(minQueueLength); //first.reset(0); for(int i=memregs.length-1;i>0;i--){ MemReg mr=memregs[i]; mr.in=mr.out=-1; } for(int i=memregs.length-1;i>0;i--){ MemReg mr=memregs[i]; mr.in=mr.out=-1; } called=false; } /** */ public String toString(){ return getString(wOffset,wEnd); } public Pattern pattern(){ return re; } public String target(){ return getString(offset,end); } /** */ public char[] targetChars(){ shared=true; return data; } /** */ public int targetStart(){ return offset; } /** */ public int targetEnd(){ return end; } public char charAt(int i){ int in=this.wOffset; int out=this.wEnd; if(in<0 || out<in) throw new IllegalStateException("unassigned"); return data[in+i]; } public char charAt(int i,int groupId){ MemReg mr=bounds(groupId); if(mr==null) throw new IllegalStateException("group #"+groupId+" is not assigned"); int in=mr.in; if(i<0 || i>(mr.out-in)) throw new StringIndexOutOfBoundsException(""+i); return data[in+i]; } public final int length(){ return wEnd-wOffset; } /** */ public final int start(){ return wOffset-offset; } /** */ public final int end(){ return wEnd-offset; } /** */ public String prefix(){ return getString(offset,wOffset); } /** */ public String suffix(){ return getString(wEnd,end); } /** */ public int groupCount(){ return memregs.length; } /** */ public String group(int n){ MemReg mr=bounds(n); if(mr==null) return null; return getString(mr.in,mr.out); } /** */ public String group(String name){ Integer id=re.groupId(name); if(id==null) throw new IllegalArgumentException("<"+name+"> isn't defined"); return group(id.intValue()); } /** */ public boolean getGroup(int n,TextBuffer tb){ MemReg mr=bounds(n); if(mr==null) return false; int in; tb.append(data,in=mr.in,mr.out-in); return true; } /** */ public boolean getGroup(String name,TextBuffer tb){ Integer id=re.groupId(name); if(id==null) throw new IllegalArgumentException("unknown group: \""+name+"\""); return getGroup(id.intValue(),tb); } /** */ public boolean getGroup(int n,StringBuffer sb){ MemReg mr=bounds(n); if(mr==null) return false; int in; sb.append(data,in=mr.in,mr.out-in); return true; } /** */ public boolean getGroup(String name,StringBuffer sb){ Integer id=re.groupId(name); if(id==null) throw new IllegalArgumentException("unknown group: \""+name+"\""); return getGroup(id.intValue(),sb); } /** */ public String[] groups(){ MemReg[] memregs=this.memregs; String[] groups=new String[memregs.length]; int in,out; MemReg mr; for(int i=0;i<memregs.length;i++){ in=(mr=memregs[i]).in; out=mr.out; if((in=mr.in)<0 || mr.out<in) continue; groups[i]=getString(in,out); } return groups; } /** */ public Vector groupv(){ MemReg[] memregs=this.memregs; Vector v=new Vector(); MemReg mr; for(int i=0;i<memregs.length;i++){ mr=bounds(i); if(mr==null){ v.addElement("empty"); continue; } String s=getString(mr.in,mr.out); v.addElement(s); } return v; } private final MemReg bounds(int id){ //System.out.println("Matcher.bounds("+id+"):"); MemReg mr; if(id>=0){ mr=memregs[id]; } else switch(id){ case PREFIX: mr=prefixBounds; if(mr==null) prefixBounds=mr=new MemReg(PREFIX); mr.in=offset; mr.out=wOffset; break; case SUFFIX: mr=suffixBounds; if(mr==null) suffixBounds=mr=new MemReg(SUFFIX); mr.in=wEnd; mr.out=end; break; case TARGET: mr=targetBounds; if(mr==null) targetBounds=mr=new MemReg(TARGET); mr.in=offset; mr.out=end; break; default: throw new IllegalArgumentException("illegal group id: "+id+"; must either nonnegative int, or MatchResult.PREFIX, or MatchResult.SUFFIX"); } //System.out.println(" mr=["+mr.in+","+mr.out+"]"); int in; if((in=mr.in)<0 || mr.out<in) return null; return mr; } /** */ public final boolean isCaptured(){ return wOffset>=0 && wEnd>=wOffset; } /** */ public final boolean isCaptured(int id){ return bounds(id)!=null; } /** */ public final boolean isCaptured(String groupName){ Integer id=re.groupId(groupName); if(id==null) throw new IllegalArgumentException("unknown group: \""+groupName+"\""); return isCaptured(id.intValue()); } /** */ public final int length(int id){ MemReg mr=bounds(id); return mr.out-mr.in; } /** */ public final int start(int id){ return bounds(id).in-offset; } /** */ public final int end(int id){ return bounds(id).out-offset; } private final boolean search(int anchors){ called=true; final int end=this.end; int offset=this.offset; char[] data=this.data; int wOffset=this.wOffset; int wEnd=this.wEnd; MemReg[] memregs=this.memregs; int[] counters=this.counters; LAEntry[] lookaheads=this.lookaheads; //int memregCount=memregs.length; //int cntCount=counters.length; SearchEntry defaultEntry=this.defaultEntry; SearchEntry first=this.first; SearchEntry top=this.top; SearchEntry actual=null; int cnt,regLen; int i; final boolean matchEnd=(anchors&ANCHOR_END)>0; final boolean allowIncomplete=(anchors&ACCEPT_INCOMPLETE)>0; Pattern re=this.re; Term root=re.root; Term term; if(top==null){ if((anchors&ANCHOR_START)>0){ term=re.root0; //raw root root=startAnchor; } else if((anchors&ANCHOR_LASTMATCH)>0){ term=re.root0; //raw root root=lastMatchAnchor; } else{ term=root; //optimized root } i=wOffset; actual=first; SearchEntry.popState(defaultEntry,memregs,counters); } else{ top=(actual=top).sub; term=actual.term; i=actual.index; SearchEntry.popState(actual,memregs,counters); } cnt=actual.cnt; regLen=actual.regLen; main: while(wOffset<=end){ matchHere: for(;;){ /* System.out.print("char: "+i+", term: "); System.out.print(term.toString()); System.out.print(" // mrs:{"); for(int dbi=0;dbi<memregs.length;dbi++){ System.out.print('['); System.out.print(memregs[dbi].in); System.out.print(','); System.out.print(memregs[dbi].out); System.out.print(']'); System.out.print(' '); } System.out.print("}, crs:{"); for(int dbi=0;dbi<counters.length;dbi++){ System.out.print(counters[dbi]); if(dbi<counters.length-1)System.out.print(','); } System.out.println("}"); */ int memreg,cntreg; char c; //System.out.print("("+term.instanceNum+"/"+i+")"); switch(term.type){ case Term.FIND:{ int jump=find(data,i+term.distance,end,term.target); //don't eat the last match if(jump<0) break main; //return false i+=jump; wOffset=i; //force window to move if(term.eat){ if(i==end) break; i++; } term=term.next; continue matchHere; } case Term.FINDREG:{ MemReg mr=memregs[term.target.memreg]; int sampleOff=mr.in; int sampleLen=mr.out-sampleOff; //if(sampleOff<0 || sampleLen<0) throw new Error("backreference used before definition: \\"+term.memreg); /*@since 1.2*/ if(sampleOff<0 || sampleLen<0){ break; } else if(sampleLen==0){ term=term.next; continue matchHere; } int jump=findReg(data,i+term.distance,sampleOff,sampleLen,term.target,end); //don't eat the last match if(jump<0) break main; //return false i+=jump; wOffset=i; //force window to move if(term.eat){ i+=sampleLen; if(i>end) break; } term=term.next; continue matchHere; } case Term.VOID: term=term.next; continue matchHere; case Term.CHAR: //can only be 1-char-wide // \/ if(i>=end || data[i]!=term.c) break; //System.out.println("CHAR: "+data[i]+", i="+i); i++; term=term.next; continue matchHere; case Term.ANY_CHAR: //can only be 1-char-wide // \/ if(i>=end) break; i++; term=term.next; continue matchHere; case Term.ANY_CHAR_NE: //can only be 1-char-wide // \/ if(i>=end || (c=data[i])=='\r' || c=='\n') break; i++; term=term.next; continue matchHere; case Term.END: if(i>=end){ //meets term=term.next; continue matchHere; } break; case Term.END_EOL: //perl's $ if(i>=end){ //meets term=term.next; continue matchHere; } else{ boolean matches= i>=end | ((i+1)==end && data[i]=='\n') | ((i+2)==end && data[i]=='\r' && data[i+1]=='\n'); if(matches){ term=term.next; continue matchHere; } else break; } case Term.LINE_END: if(i>=end){ //meets term=term.next; continue matchHere; } else{ /* if(((c=data[i])=='\r' || c=='\n') && (c=data[i-1])!='\r' && c!='\n'){ term=term.next; continue matchHere; } */ //5 aug 2001 if((c=data[i])=='\r' || c=='\n'){ term=term.next; continue matchHere; } } break; case Term.START: //Perl's "^" if(i==offset){ //meets term=term.next; continue matchHere; } //break; //changed on 27-04-2002 //due to a side effect: if ALLOW_INCOMPLETE is enabled, //the anchorStart moves up to the end and succeeds //(see comments at the last lines of matchHere, ~line 1830) //Solution: if there are some entries on the stack ("^a|b$"), //try them; otherwise it's a final 'no' //if(top!=null) break; //else break main; //changed on 25-05-2002 //rationale: if the term is startAnchor, //it's the root term by definition, //so if it doesn't match, the entire pattern //couldn't match too; //otherwise we could have the following problem: //"c|^a" against "abc" finds only "a" if(top!=null) break; if(term!=startAnchor) break; else break main; case Term.LAST_MATCH_END: if(i==wEnd){ //meets term=term.next; continue matchHere; } break main; //return false case Term.LINE_START: if(i==offset){ //meets term=term.next; continue matchHere; } else if(i<end){ /* if(((c=data[i-1])=='\r' || c=='\n') && (c=data[i])!='\r' && c!='\n'){ term=term.next; continue matchHere; } */ //5 aug 2001 //if((c=data[i-1])=='\r' || c=='\n'){ ?? if((c=data[i-1])=='\n' || ((c=='\r') && (data[i]!='\n'))){ term=term.next; continue matchHere; } } break; case Term.BITSET:{ //can only be 1-char-wide // \/ if(i>=end) break; c=data[i]; if(!(c<=255 && term.bitset[c])^term.inverse) break; i++; term=term.next; continue matchHere; } case Term.BITSET2:{ //can only be 1-char-wide // \/ if(i>=end) break; c=data[i]; boolean[] arr=term.bitset2[c>>8]; if(arr==null || !arr[c&255]^term.inverse) break; i++; term=term.next; continue matchHere; } case Term.BOUNDARY:{ boolean ch1Meets=false,ch2Meets=false; boolean[] bitset=term.bitset; test1:{ int j=i-1; //if(j<offset || j>=end) break test1; if(j<offset) break test1; c= data[j]; ch1Meets= (c<256 && bitset[c]); } test2:{ //if(i<offset || i>=end) break test2; if(i>=end) break test2; c= data[i]; ch2Meets= (c<256 && bitset[c]); } if(ch1Meets^ch2Meets^term.inverse){ //meets term=term.next; continue matchHere; } else break; } case Term.UBOUNDARY:{ boolean ch1Meets=false,ch2Meets=false; boolean[][] bitset2=term.bitset2; test1:{ int j=i-1; //if(j<offset || j>=end) break test1; if(j<offset) break test1; c= data[j]; boolean[] bits=bitset2[c>>8]; ch1Meets= bits!=null && bits[c&0xff]; } test2:{ //if(i<offset || i>=end) break test2; if(i>=end) break test2; c= data[i]; boolean[] bits=bitset2[c>>8]; ch2Meets= bits!=null && bits[c&0xff]; } if(ch1Meets^ch2Meets^term.inverse){ //is boundary ^ inv term=term.next; continue matchHere; } else break; } case Term.DIRECTION:{ boolean ch1Meets=false,ch2Meets=false; boolean[] bitset=term.bitset; boolean inv=term.inverse; //System.out.println("i="+i+", inv="+inv+", bitset="+CharacterClass.stringValue0(bitset)); int j=i-1; //if(j>=offset && j<end){ if(j>=offset){ c= data[j]; ch1Meets= c<256 && bitset[c]; //System.out.println(" ch1Meets="+ch1Meets); } if(ch1Meets^inv) break; //if(i>=offset && i<end){ if(i<end){ c= data[i]; ch2Meets= c<256 && bitset[c]; //System.out.println(" ch2Meets="+ch2Meets); } if(!ch2Meets^inv) break; //System.out.println(" Ok"); term=term.next; continue matchHere; } case Term.UDIRECTION:{ boolean ch1Meets=false,ch2Meets=false; boolean[][] bitset2=term.bitset2; boolean inv=term.inverse; int j=i-1; //if(j>=offset && j<end){ if(j>=offset){ c= data[j]; boolean[] bits=bitset2[c>>8]; ch1Meets= bits!=null && bits[c&0xff]; } if(ch1Meets^inv) break; //if(i>=offset && i<end){ if(i<end){ c= data[i]; boolean[] bits=bitset2[c>>8]; ch2Meets= bits!=null && bits[c&0xff]; } if(!ch2Meets^inv) break; term=term.next; continue matchHere; } case Term.REG:{ MemReg mr=memregs[term.memreg]; int sampleOffset=mr.in; int sampleOutside=mr.out; int rLen; if(sampleOffset<0 || (rLen=sampleOutside-sampleOffset)<0){ break; } else if(rLen==0){ term=term.next; continue matchHere; } // don't prevent us from reaching the 'end' if((i+rLen)>end) break; if(compareRegions(data,sampleOffset,i,rLen,end)){ i+=rLen; term=term.next; continue matchHere; } break; } case Term.REG_I:{ MemReg mr=memregs[term.memreg]; int sampleOffset=mr.in; int sampleOutside=mr.out; int rLen; if(sampleOffset<0 || (rLen=sampleOutside-sampleOffset)<0){ break; } else if(rLen==0){ term=term.next; continue matchHere; } // don't prevent us from reaching the 'end' if((i+rLen)>end) break; if(compareRegionsI(data,sampleOffset,i,rLen,end)){ i+=rLen; term=term.next; continue matchHere; } break; } case Term.REPEAT_0_INF:{ //System.out.println("REPEAT, i="+i+", term.minCount="+term.minCount+", term.maxCount="+term.maxCount); //i+=(cnt=repeat(data,i,end,term.target)); if((cnt=repeat(data,i,end,term.target))<=0){ term=term.next; continue; } i+=cnt; //branch out the backtracker (that is term.failNext, see Term.make*()) actual.cnt=cnt; actual.term=term.failNext; actual.index=i; actual=(top=actual).on; if(actual==null){ actual=new SearchEntry(); top.on=actual; actual.sub=top; } term=term.next; continue; } case Term.REPEAT_MIN_INF:{ //System.out.println("REPEAT, i="+i+", term.minCount="+term.minCount+", term.maxCount="+term.maxCount); cnt=repeat(data,i,end,term.target); if(cnt<term.minCount) break; i+=cnt; //branch out the backtracker (that is term.failNext, see Term.make*()) actual.cnt=cnt; actual.term=term.failNext; actual.index=i; actual=(top=actual).on; if(actual==null){ actual=new SearchEntry(); top.on=actual; actual.sub=top; } term=term.next; continue; } case Term.REPEAT_MIN_MAX:{ //System.out.println("REPEAT, i="+i+", term.minCount="+term.minCount+", term.maxCount="+term.maxCount); int out1=end; int out2=i+term.maxCount; cnt=repeat(data,i,out1<out2? out1: out2,term.target); if(cnt<term.minCount) break; i+=cnt; //branch out the backtracker (that is term.failNext, see Term.make*()) actual.cnt=cnt; actual.term=term.failNext; actual.index=i; actual=(top=actual).on; if(actual==null){ actual=new SearchEntry(); top.on=actual; actual.sub=top; } term=term.next; continue; } case Term.REPEAT_REG_MIN_INF:{ MemReg mr=memregs[term.memreg]; int sampleOffset=mr.in; int sampleOutside=mr.out; //if(sampleOffset<0) throw new Error("register is referred before definition: "+term.memreg); //if(sampleOutside<0 || sampleOutside<sampleOffset) throw new Error("register is referred within definition: "+term.memreg); /*@since 1.2*/ int bitset; if(sampleOffset<0 || (bitset=sampleOutside-sampleOffset)<0){ break; } else if(bitset==0){ term=term.next; continue matchHere; } cnt=0; while(compareRegions(data,i,sampleOffset,bitset,end)){ cnt++; i+=bitset; } if(cnt<term.minCount) break; actual.cnt=cnt; actual.term=term.failNext; actual.index=i; actual.regLen=bitset; actual=(top=actual).on; if(actual==null){ actual=new SearchEntry(); top.on=actual; actual.sub=top; } term=term.next; continue; } case Term.REPEAT_REG_MIN_MAX:{ MemReg mr=memregs[term.memreg]; int sampleOffset=mr.in; int sampleOutside=mr.out; //if(sampleOffset<0) throw new Error("register is referred before definition: "+term.memreg); //if(sampleOutside<0 || sampleOutside<sampleOffset) throw new Error("register is referred within definition: "+term.memreg); /*@since 1.2*/ int bitset; if(sampleOffset<0 || (bitset=sampleOutside-sampleOffset)<0){ break; } else if(bitset==0){ term=term.next; continue matchHere; } cnt=0; int countBack=term.maxCount; while(countBack>0 && compareRegions(data,i,sampleOffset,bitset,end)){ cnt++; i+=bitset; countBack--; } if(cnt<term.minCount) break; actual.cnt=cnt; actual.term=term.failNext; actual.index=i; actual.regLen=bitset; actual=(top=actual).on; if(actual==null){ actual=new SearchEntry(); top.on=actual; actual.sub=top; } term=term.next; continue; } case Term.BACKTRACK_0: //System.out.println("<<"); cnt=actual.cnt; if(cnt>0){ cnt--; i--; actual.cnt=cnt; actual.index=i; actual.term=term; actual=(top=actual).on; if(actual==null){ actual=new SearchEntry(); top.on=actual; actual.sub=top; } term=term.next; continue; } else break; case Term.BACKTRACK_MIN: //System.out.println("<<"); cnt=actual.cnt; if(cnt>term.minCount){ cnt--; i--; actual.cnt=cnt; actual.index=i; actual.term=term; actual=(top=actual).on; if(actual==null){ actual=new SearchEntry(); top.on=actual; actual.sub=top; } term=term.next; continue; } else break; case Term.BACKTRACK_FIND_MIN:{ //System.out.print("<<<[cnt="); cnt=actual.cnt; //System.out.print(cnt+", minCnt="); //System.out.print(term.minCount+", target="); //System.out.print(term.target+"]"); int minCnt; if(cnt>(minCnt=term.minCount)){ int start=i+term.distance; if(start>end){ int exceed=start-end; cnt-=exceed; if(cnt<=minCnt) break; i-=exceed; start=end; } int back=findBack(data,i+term.distance,cnt-minCnt,term.target); //System.out.print("[back="+back+"]"); if(back<0) break; //cnt-=back; //i-=back; if((cnt-=back)<=minCnt){ i-=back; if(term.eat)i++; term=term.next; continue; } i-=back; actual.cnt=cnt; actual.index=i; if(term.eat)i++; actual.term=term; actual=(top=actual).on; if(actual==null){ actual=new SearchEntry(); top.on=actual; actual.sub=top; } term=term.next; continue; } else break; } case Term.BACKTRACK_FINDREG_MIN:{ //System.out.print("<<<[cnt="); cnt=actual.cnt; //System.out.print(cnt+", minCnt="); //System.out.print(term.minCount+", target="); //System.out.print(term.target); //System.out.print("reg=<"+memregs[term.target.memreg].in+","+memregs[term.target.memreg].out+">]"); int minCnt; if(cnt>(minCnt=term.minCount)){ int start=i+term.distance; if(start>end){ int exceed=start-end; cnt-=exceed; if(cnt<=minCnt) break; i-=exceed; start=end; } MemReg mr=memregs[term.target.memreg]; int sampleOff=mr.in; int sampleLen=mr.out-sampleOff; //if(sampleOff<0 || sampleLen<0) throw new Error("backreference used before definition: \\"+term.memreg); //int back=findBackReg(data,i+term.distance,sampleOff,sampleLen,cnt-minCnt,term.target,end); //if(back<0) break; /*@since 1.2*/ int back; if(sampleOff<0 || sampleLen<0){ //the group is not def., as in the case of '(\w+)\1' //treat as usual BACKTRACK_MIN cnt--; i--; actual.cnt=cnt; actual.index=i; actual.term=term; actual=(top=actual).on; if(actual==null){ actual=new SearchEntry(); top.on=actual; actual.sub=top; } term=term.next; continue; } else if(sampleLen==0){ back=-1; } else{ back=findBackReg(data,i+term.distance,sampleOff,sampleLen,cnt-minCnt,term.target,end); //System.out.print("[back="+back+"]"); if(back<0) break; } cnt-=back; i-=back; actual.cnt=cnt; actual.index=i; if(term.eat)i+=sampleLen; actual.term=term; actual=(top=actual).on; if(actual==null){ actual=new SearchEntry(); top.on=actual; actual.sub=top; } term=term.next; continue; } else break; } case Term.BACKTRACK_REG_MIN: //System.out.println("<<"); cnt=actual.cnt; if(cnt>term.minCount){ regLen=actual.regLen; cnt--; i-=regLen; actual.cnt=cnt; actual.index=i; actual.term=term; //actual.regLen=regLen; actual=(top=actual).on; if(actual==null){ actual=new SearchEntry(); top.on=actual; actual.sub=top; } term=term.next; continue; } else break; case Term.GROUP_IN:{ memreg=term.memreg; //memreg=0 is a regex itself; we don't need to handle it //because regex bounds already are in wOffset and wEnd if(memreg>0){ //MemReg mr=memregs[memreg]; //saveMemregState((top!=null)? top: defaultEntry,memreg,mr); //mr.in=i; memregs[memreg].tmp=i; //assume } term=term.next; continue; } case Term.GROUP_OUT: memreg=term.memreg; //see above if(memreg>0){ //if(term.saveState)saveMemregState((top!=null)? top: defaultEntry,memreg,memregs); MemReg mr=memregs[memreg]; SearchEntry.saveMemregState((top!=null)? top: defaultEntry,memreg,mr); mr.in=mr.tmp; //commit mr.out=i; } term=term.next; continue; case Term.PLOOKBEHIND_IN:{ int tmp=i-term.distance; if(tmp<offset) break; //System.out.println("term="+term+", next="+term.next); LAEntry le=lookaheads[term.lookaheadId]; le.index=i; i=tmp; le.actual=actual; le.top=top; term=term.next; continue; } case Term.INDEPENDENT_IN: case Term.PLOOKAHEAD_IN:{ LAEntry le=lookaheads[term.lookaheadId]; le.index=i; le.actual=actual; le.top=top; term=term.next; continue; } case Term.LOOKBEHIND_CONDITION_OUT: case Term.LOOKAHEAD_CONDITION_OUT: case Term.PLOOKAHEAD_OUT: case Term.PLOOKBEHIND_OUT:{ LAEntry le=lookaheads[term.lookaheadId]; i=le.index; actual=le.actual; top=le.top; term=term.next; continue; } case Term.INDEPENDENT_OUT:{ LAEntry le=lookaheads[term.lookaheadId]; actual=le.actual; top=le.top; term=term.next; continue; } case Term.NLOOKBEHIND_IN:{ int tmp=i-term.distance; if(tmp<offset){ term=term.failNext; continue; } LAEntry le=lookaheads[term.lookaheadId]; le.actual=actual; le.top=top; actual.term=term.failNext; actual.index=i; i=tmp; actual=(top=actual).on; if(actual==null){ actual=new SearchEntry(); top.on=actual; actual.sub=top; } term=term.next; continue; } case Term.NLOOKAHEAD_IN:{ LAEntry le=lookaheads[term.lookaheadId]; le.actual=actual; le.top=top; actual.term=term.failNext; actual.index=i; actual=(top=actual).on; if(actual==null){ actual=new SearchEntry(); top.on=actual; actual.sub=top; } term=term.next; continue; } case Term.NLOOKBEHIND_OUT: case Term.NLOOKAHEAD_OUT:{ LAEntry le=lookaheads[term.lookaheadId]; actual=le.actual; top=le.top; break; } case Term.LOOKBEHIND_CONDITION_IN:{ int tmp=i-term.distance; if(tmp<offset){ term=term.failNext; continue; } LAEntry le=lookaheads[term.lookaheadId]; le.index=i; le.actual=actual; le.top=top; actual.term=term.failNext; actual.index=i; actual=(top=actual).on; if(actual==null){ actual=new SearchEntry(); top.on=actual; actual.sub=top; } i=tmp; term=term.next; continue; } case Term.LOOKAHEAD_CONDITION_IN:{ LAEntry le=lookaheads[term.lookaheadId]; le.index=i; le.actual=actual; le.top=top; actual.term=term.failNext; actual.index=i; actual=(top=actual).on; if(actual==null){ actual=new SearchEntry(); top.on=actual; actual.sub=top; } term=term.next; continue; } case Term.MEMREG_CONDITION:{ MemReg mr=memregs[term.memreg]; int sampleOffset=mr.in; int sampleOutside=mr.out; if(sampleOffset>=0 && sampleOutside>=0 && sampleOutside>=sampleOffset){ term=term.next; } else{ term=term.failNext; } continue; } case Term.BRANCH_STORE_CNT_AUX1: actual.regLen=regLen; case Term.BRANCH_STORE_CNT: actual.cnt=cnt; case Term.BRANCH: actual.term=term.failNext; actual.index=i; actual=(top=actual).on; if(actual==null){ actual=new SearchEntry(); top.on=actual; actual.sub=top; } term=term.next; continue; case Term.SUCCESS: //System.out.println("success, matchEnd="+matchEnd+", i="+i+", end="+end); if(!matchEnd || i==end){ this.wOffset=memregs[0].in=wOffset; this.wEnd=memregs[0].out=i; this.top=top; return true; } else break; case Term.CNT_SET_0: cnt=0; term=term.next; continue; case Term.CNT_INC: cnt++; term=term.next; continue; case Term.CNT_GT_EQ: if(cnt>=term.maxCount){ term=term.next; continue; } else break; case Term.READ_CNT_LT: cnt=actual.cnt; if(cnt<term.maxCount){ term=term.next; continue; } else break; case Term.CRSTORE_CRINC:{ int cntvalue=counters[cntreg=term.cntreg]; SearchEntry.saveCntState((top!=null)? top: defaultEntry,cntreg,cntvalue); counters[cntreg]=++cntvalue; term=term.next; continue; } case Term.CR_SET_0: counters[term.cntreg]=0; term=term.next; continue; case Term.CR_LT: if(counters[term.cntreg]<term.maxCount){ term=term.next; continue; } else break; case Term.CR_GT_EQ: if(counters[term.cntreg]>=term.maxCount){ term=term.next; continue; } else break; default: throw new Error("unknown term type: "+term.type); } //if(top==null) break matchHere; if(allowIncomplete && i==end){ //an attempt to implement matchesPrefix() //not sure it's a good way //27-04-2002: just as expencted, //the side effect was found (and POSSIBLY fixed); //see the case Term.START return true; } if(top==null){ break matchHere; } //pop the stack top=(actual=top).sub; term=actual.term; i=actual.index; //System.out.println("***POP*** : branch to #"+term.instanceNum+" at "+i); if(actual.isState){ SearchEntry.popState(actual,memregs,counters); } } if(defaultEntry.isState)SearchEntry.popState(defaultEntry,memregs,counters); term=root; //wOffset++; //i=wOffset; i=++wOffset; } this.wOffset=wOffset; this.top=top; return false; } private static final boolean compareRegions(char[] arr, int off1, int off2, int len,int out){ //System.out.print("out="+out+", off1="+off1+", off2="+off2+", len="+len+", reg1="+new String(arr,off1,len)+", reg2="+new String(arr,off2,len)); int p1=off1+len-1; int p2=off2+len-1; if(p1>=out || p2>=out){ //System.out.println(" : out"); return false; } for(int c=len;c>0;c--,p1--,p2--){ if(arr[p1]!=arr[p2]){ //System.out.println(" : no"); return false; } } //System.out.println(" : yes"); return true; } private static final boolean compareRegionsI(char[] arr, int off1, int off2, int len,int out){ int p1=off1+len-1; int p2=off2+len-1; if(p1>=out || p2>=out){ return false; } char c1,c2; for(int c=len;c>0;c--,p1--,p2--){ if((c1=arr[p1])!=Character.toLowerCase(c2=arr[p2]) && c1!=Character.toUpperCase(c2) && c1!=Character.toTitleCase(c2)) return false; } return true; } //repeat while matches private static final int repeat(char[] data,int off,int out,Term term){ //System.out.print("off="+off+", out="+out+", term="+term); switch(term.type){ case Term.CHAR:{ char c=term.c; int i=off; while(i<out){ if(data[i]!=c) break; i++; } //System.out.println(", returning "+(i-off)); return i-off; } case Term.ANY_CHAR:{ return out-off; } case Term.ANY_CHAR_NE:{ int i=off; char c; while(i<out){ if((c=data[i])=='\r' || c=='\n') break; i++; } return i-off; } case Term.BITSET:{ boolean[] arr=term.bitset; int i=off; char c; if(term.inverse) while(i<out){ if((c=data[i])<=255 && arr[c]) break; else i++; } else while(i<out){ if((c=data[i])<=255 && arr[c]) i++; else break; } return i-off; } case Term.BITSET2:{ int i=off; boolean[][] bitset2=term.bitset2; char c; if(term.inverse) while(i<out){ boolean[] arr=bitset2[(c=data[i])>>8]; if(arr!=null && arr[c&0xff]) break; else i++; } else while(i<out){ boolean[] arr=bitset2[(c=data[i])>>8]; if(arr!=null && arr[c&0xff]) i++; else break; } return i-off; } } throw new Error("this kind of term can't be quantified:"+term.type); } //repeat while doesn't match private static final int find(char[] data,int off,int out,Term term){ //System.out.print("off="+off+", out="+out+", term="+term); if(off>=out) return -1; switch(term.type){ case Term.CHAR:{ char c=term.c; int i=off; while(i<out){ if(data[i]==c) break; i++; } //System.out.println(", returning "+(i-off)); return i-off; } case Term.BITSET:{ boolean[] arr=term.bitset; int i=off; char c; if(!term.inverse) while(i<out){ if((c=data[i])<=255 && arr[c]) break; else i++; } else while(i<out){ if((c=data[i])<=255 && arr[c]) i++; else break; } return i-off; } case Term.BITSET2:{ int i=off; boolean[][] bitset2=term.bitset2; char c; if(!term.inverse) while(i<out){ boolean[] arr=bitset2[(c=data[i])>>8]; if(arr!=null && arr[c&0xff]) break; else i++; } else while(i<out){ boolean[] arr=bitset2[(c=data[i])>>8]; if(arr!=null && arr[c&0xff]) i++; else break; } return i-off; } } throw new IllegalArgumentException("can't seek this kind of term:"+term.type); } private static final int findReg(char[] data,int off,int regOff,int regLen,Term term,int out){ //System.out.print("off="+off+", out="+out+", term="+term); if(off>=out) return -1; int i=off; if(term.type==Term.REG){ while(i<out){ if(compareRegions(data,i,regOff,regLen,out)) break; i++; } } else if(term.type==Term.REG_I){ while(i<out){ if(compareRegionsI(data,i,regOff,regLen,out)) break; i++; } } else throw new IllegalArgumentException("wrong findReg() target:"+term.type); return off-i; } private static final int findBack(char[] data,int off,int maxCount,Term term){ //System.out.print("off="+off+", maxCount="+maxCount+", term="+term); switch(term.type){ case Term.CHAR:{ char c=term.c; int i=off; int iMin=off-maxCount; for(;;){ if(data[--i]==c) break; if(i<=iMin) return -1; } //System.out.println(", returning "+(off-i)); return off-i; } case Term.BITSET:{ boolean[] arr=term.bitset; int i=off; char c; int iMin=off-maxCount; if(!term.inverse) for(;;){ if((c=data[--i])<=255 && arr[c]) break; if(i<=iMin) return -1; } else for(;;){ if((c=data[--i])>255 || !arr[c]) break; if(i<=iMin) return -1; } return off-i; } case Term.BITSET2:{ boolean[][] bitset2=term.bitset2; int i=off; char c; int iMin=off-maxCount; if(!term.inverse) for(;;){ boolean[] arr=bitset2[(c=data[--i])>>8]; if(arr!=null && arr[c&0xff]) break; if(i<=iMin) return -1; } else for(;;){ boolean[] arr=bitset2[(c=data[--i])>>8]; if(arr==null || arr[c&0xff]) break; if(i<=iMin) return -1; } return off-i; } } throw new IllegalArgumentException("can't find this kind of term:"+term.type); } private static final int findBackReg(char[] data,int off,int regOff,int regLen,int maxCount,Term term,int out){ //assume that the cases when regLen==0 or maxCount==0 are handled by caller int i=off; int iMin=off-maxCount; if(term.type==Term.REG){ /*@since 1.2*/ char first=data[regOff]; regOff++; regLen--; for(;;){ i--; if(data[i]==first && compareRegions(data,i+1,regOff,regLen,out)) break; if(i<=iMin) return -1; } } else if(term.type==Term.REG_I){ /*@since 1.2*/ char c=data[regOff]; char firstLower=Character.toLowerCase(c); char firstUpper=Character.toUpperCase(c); char firstTitle=Character.toTitleCase(c); regOff++; regLen--; for(;;){ i--; if(((c=data[i])==firstLower || c==firstUpper || c==firstTitle) && compareRegionsI(data,i+1,regOff,regLen,out)) break; if(i<=iMin) return -1; } return off-i; } else throw new IllegalArgumentException("wrong findBackReg() target type :"+term.type); return off-i; } public String toString_d(){ StringBuffer s=new StringBuffer(); s.append("counters: "); s.append(counters==null? 0: counters.length); s.append("\r\nmemregs: "); s.append(memregs.length); for(int i=0;i<memregs.length;i++) s.append("\r\n #"+i+": ["+memregs[i].in+","+memregs[i].out+"](\""+getString(memregs[i].in,memregs[i].out)+"\")"); s.append("\r\ndata: "); if(data!=null)s.append(data.length); else s.append("[none]"); s.append("\r\noffset: "); s.append(offset); s.append("\r\nend: "); s.append(end); s.append("\r\nwOffset: "); s.append(wOffset); s.append("\r\nwEnd: "); s.append(wEnd); s.append("\r\nregex: "); s.append(re); return s.toString(); } } class SearchEntry{ Term term; int index; int cnt; int regLen; boolean isState; SearchEntry sub,on; private static class MState{ int index,in,out; MState next,prev; } private static class CState{ int index,value; CState next,prev; } private MState mHead,mCurrent; private CState cHead,cCurrent; final static void saveMemregState(SearchEntry entry,int memreg, MemReg mr){ //System.out.println("saveMemregState("+entry+","+memreg+"):"); entry.isState=true; MState current=entry.mCurrent; if(current==null){ MState head=entry.mHead; if(head==null) entry.mHead=entry.mCurrent=current=new MState(); else current=head; } else{ MState next=current.next; if(next==null){ current.next=next=new MState(); next.prev=current; } current=next; } current.index=memreg; current.in=mr.in; current.out=mr.out; entry.mCurrent=current; } final static void saveCntState(SearchEntry entry,int cntreg,int value){ entry.isState=true; CState current=entry.cCurrent; if(current==null){ CState head=entry.cHead; if(head==null) entry.cHead=entry.cCurrent=current=new CState(); else current=head; } else{ CState next=current.next; if(next==null){ current.next=next=new CState(); next.prev=current; } current=next; } current.index=cntreg; current.value=value; entry.cCurrent=current; } final static void popState(SearchEntry entry, MemReg[] memregs, int[] counters){ //System.out.println("popState("+entry+"):"); MState ms=entry.mCurrent; while(ms!=null){ MemReg mr=memregs[ms.index]; mr.in=ms.in; mr.out=ms.out; ms=ms.prev; } CState cs=entry.cCurrent; while(cs!=null){ counters[cs.index]=cs.value; cs=cs.prev; } entry.mCurrent=null; entry.cCurrent=null; entry.isState=false; } final void reset(int restQueue){ term=null; index=cnt=regLen=0; mCurrent=null; cCurrent=null; isState=false; SearchEntry on=this.on; if(on!=null){ if(restQueue>0) on.reset(restQueue-1); else{ this.on=null; on.sub=null; } } //sub=on=null; } } class MemReg{ int index; int in=-1,out=-1; int tmp=-1; //for assuming at GROUP_IN MemReg(int index){ this.index=index; } void reset(){ in=out=-1; } } class LAEntry{ int index; SearchEntry top,actual; }