/******************************************************************************* * Copyright 2012 University of Southern California * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * This code was developed by the Information Integration Group as part * of the Karma project at the Information Sciences Institute of the * University of Southern California. For more information, publications, * and related projects, please see: http://www.isi.edu/integration ******************************************************************************/ package edu.isi.karma.cleaning; import java.util.Collections; import java.util.Iterator; import java.util.ListIterator; import java.util.StringTokenizer; import java.util.Vector; import org.antlr.runtime.ANTLRStringStream; import org.antlr.runtime.CharStream; import org.antlr.runtime.Token; public class Ruler { String Org = ""; String Trgt = ""; StringTokenizer st = null; String[] seperator = {" ",","}; public Vector<TNode> vec; int curPos = 0; Vector<Object[]> operators = new Vector<Object[]>(); Vector<Integer> positions; Vector<TNode> whats; Vector<Integer> consPos; public Ruler() { positions = new Vector<Integer>(); consPos = new Vector<Integer>(); whats = new Vector<TNode>(); } public Ruler(String x) { positions = new Vector<Integer>(); consPos = new Vector<Integer>(); whats = new Vector<TNode>(); this.initConstantPosition(); vec = new Vector<TNode>(); Org = x; tokenize(); } public void initConstantPosition() { String target = ","; for(int i =0; i<vec.size();i++) { if(vec.get(i).text.compareTo(target)==0) { this.consPos.add(i); } } } public void setNewInput(String x) { this.Org = x; this.Trgt = ""; this.vec = new Vector<TNode>(); this.curPos = 0; this.tokenize(); this.initConstantPosition(); } public void setNewInput(Vector<TNode> x) { this.Org = x.toString(); this.Trgt = ""; this.vec = new Vector<TNode>(); this.curPos = 0; this.vec = x; this.initConstantPosition(); } //in current data,search the position of the tvec public static int Search(Vector<TNode> xvec,Vector<TNode> tvec,int bpos) { boolean isFind = false; int p1 = -1; for(int t = bpos;t<xvec.size()-tvec.size()+1;t++) { p1 = t; for(int x = 0; x<tvec.size();x++) { int p2 = x; if(xvec.get(p1).sameNode(tvec.get(p2))) { p1++; } else { isFind = false; break; } isFind = true; } if(isFind) { return t; } } return -1; } //evalPos() public int evalPos(String input,Vector<TNode> t, String option) { boolean incld = false; if(input.contains("first")) { if(!input.contains("incld")) { incld = false; } else { incld = true; } //int pos1 = this.Search(this.vec,t,0); if(option.compareTo("from_beginning")==0) { int pos = Ruler.Search(vec,t, 0); if(pos == -1) return -1; if(incld) { return pos; } else { if(pos<vec.size()) if(pos>0) { return pos-1; } else { return 0; } else return vec.size()-1; } } else { Vector<TNode> tmpvec = (Vector<TNode>)this.vec.clone(); Collections.reverse(tmpvec); int pos = Ruler.Search(tmpvec,t, 0); if(pos == -1) return -1; if(incld) { if(this.vec.size()- pos-1>=0 && this.vec.size()- pos-1 <= vec.size()) return this.vec.size()- pos-1; else return 0; } else { if(this.vec.size()- pos>=0 && this.vec.size()- pos <= vec.size()) return this.vec.size()- pos; else return 0; } } } /*LSA to do*/ else { if(option.compareTo("from_beginning")==0) { return Integer.parseInt(input)-1; } else { return this.vec.size()-Integer.parseInt(input); } } } public void addOperators(Object[][] opers) { for(int j = 0; j<opers.length;j++) { operators.add(opers[j]); } } //seperate by , and " " // public void tokenize() { CharStream cs = new ANTLRStringStream(Org); Tokenizer tk = new Tokenizer(cs); Token t; t = tk.nextToken(); while(t.getType()!=-1) { int mytype = -1; String txt = ""; if(t.getType()==Tokenizer.LWRD) { mytype = TNode.LWRDTYP; txt = t.getText(); } else if(t.getType()==Tokenizer.UWRD) { mytype = TNode.UWRDTYP; txt = t.getText(); } else if(t.getType() == Tokenizer.BLANK) { mytype = TNode.BNKTYP; txt = t.getText(); } else if(t.getType() == Tokenizer.NUMBER) { mytype = TNode.NUMTYP; txt = t.getText(); } else if(t.getType() == Tokenizer.SYBS) { //mytype = TNode.SYBSTYP; mytype = (int)t.getText().charAt(0); txt = t.getText(); } else if(t.getType() == Tokenizer.START) { mytype = TNode.STARTTYP; txt = ""; } else if(t.getType() == Tokenizer.END) { mytype = TNode.ENDTYP; txt = ""; } TNode tx = new TNode(mytype,txt); vec.add(tx); t = tk.nextToken(); } } public static void main(String[] args) { } //move a position complied with condition // move to n -1 // move up to tok -2 public void move(int n, TNode tok,int opt) { if(opt == -1) { this.curPos = n; } else if(opt == -2) { Iterator<TNode> iter = vec.iterator(); while(iter.hasNext()) { if(iter.next().sameText(tok)) { this.curPos ++ ; } } } } public String print() { String res = ""; for(int i =0;i<vec.size();i++) { String type = ""; if(vec.get(i).type==TNode.LWRDTYP) type = "LWRD"; else if(vec.get(i).type==TNode.UWRDTYP) type = "UWRD"; else if(vec.get(i).type==TNode.SYBSTYP) type = "SYB"; else if(vec.get(i).type==TNode.NUMTYP) type = "NUM"; else if(vec.get(i).type==TNode.BNKTYP) type = "BNK"; res += vec.get(i).text+"<"+type+">"; } return res; } public String toString() { String res = ""; for(int i=0;i<vec.size();i++) { res += vec.get(i).text; } return res; } public void doOperation(String oper,String num,Vector<TNode> x,int spos,int epos) { int quan = 0 ; if(num==null||num.compareTo("anynumber")==0) { quan = Integer.MAX_VALUE; } else { quan = Integer.parseInt(num); } if(oper.compareTo("del")==0) { if(spos < 0) { return;// not applicable } if(epos < 0) { return; // not applicable } this.det(quan,x, spos, epos); } if(oper.compareTo("mov")==0) { if(spos > epos) { this.vec = null; return; } this.mov(x, Integer.parseInt(num), spos, epos); } if(oper.compareTo("ins")==0) { this.ins(x, spos); } } public void collectPoss(int x) { // the consPos show be sorted from small to high for(int i = 0;i<this.consPos.size();i++) { if(x<consPos.get(i)) { this.positions.add(i); } } this.positions.add(consPos.size()); } //toks is the token sequence that needed to be inserted into original token sequence //dpos is the position start of the insertion public void ins(Vector<TNode> toks,int dpos) { if(dpos<vec.size()) { vec.addAll(dpos, toks); } else { vec.addAll(toks); } } //dpos is the destination position //toks specify the tokens need to be moved //spos is the start position of the segment //epos is the end position of the segment public void mov(Vector<TNode> toks, int dpos, int spos,int epos) { int pos = 0; int size = 0; if(toks!=null) { pos = Ruler.Search(this.vec,toks, spos); if(pos+toks.size()>epos+1 || pos == -1) { return; } size = toks.size(); } else { pos = spos; if(epos == vec.size()) { size = vec.size()-spos; } else{ size = epos - spos+1; } } //update the end position and do the del ListIterator<TNode> l = this.vec.listIterator(pos); //ListIterator<TNode> dl = this.vec.listIterator(dpos); int c = 0; Vector<TNode> x = new Vector<TNode>(); for(c = 0;c<size;c++) { //this.collectPoss(pos); TNode tn = l.next(); tn.setColor(TNode.MOVCOLOR); // set color for moving. x.add(tn); //this.whats.add(tn); l.remove(); } if(dpos <= spos) { if(dpos==vec.size()) { this.vec.addAll(x); return; } this.vec.addAll(dpos, x); } if(dpos>=epos) { dpos = dpos-size; if(dpos==vec.size()) { this.vec.addAll(x); return; } this.vec.addAll(dpos, x); } } public void det(int n,Vector<TNode> toks, int start, int end) { int cnt = 0; int pos = 0; int deleng = 0; while(cnt < n) { if(toks == null) // don't specify all particular token sequence { pos = start; n = 0; deleng = end-start+1; } else { pos = Ruler.Search(this.vec,toks,start); deleng = toks.size(); } if(pos+deleng>end+1 || pos == -1) { break; } //update the end position and do the del ListIterator<TNode> l = this.vec.listIterator(pos); int c = 0; for(c = 0;c<deleng;c++) { //this.collectPoss(pos); TNode tn = l.next(); this.whats.add(tn); l.remove(); pos ++; } end = end - deleng; cnt ++; } } }