package edu.isi.karma.cleaning;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.SortedMap;
import java.util.SortedSet;
import java.util.TreeMap;
import java.util.TreeSet;
import java.util.Vector;
public class Position implements GrammarTreeNode {
public Vector<TNode> leftContextNodes = new Vector<TNode>();
public Vector<TNode> rightContextNodes = new Vector<TNode>();
public Vector<Integer> absPosition = new Vector<Integer>();
public Vector<Integer> counters = new Vector<Integer>();
public Vector<String> orgStrings = new Vector<String>();
public Vector<String> tarStrings = new Vector<String>();
public boolean isinloop = false;
public int curState = 0;
public static Interpretor itInterpretor = null;
public static int fixedlength = 1;
public Position(Vector<Integer> absPos, Vector<TNode> lcxt,
Vector<TNode> rcxt, Vector<String> orgStrings,
Vector<String> tarStrings, boolean loop) {
this.absPosition = absPos;
this.orgStrings.addAll(orgStrings);
this.tarStrings.addAll(tarStrings);
if (itInterpretor == null)
itInterpretor = new Interpretor();
// occurance of a reg pattern
this.counters.add(-1);
this.counters.add(1);
this.leftContextNodes = lcxt;
this.rightContextNodes = rcxt;
this.isinloop = loop;
createTotalOrderVector();
}
public Position(Position p, boolean loop) {
this.absPosition = p.absPosition;
// occurance of a reg pattern
this.counters.add(-1);
this.counters.add(1);
this.leftContextNodes = p.leftContextNodes;
this.rightContextNodes = p.rightContextNodes;
this.isinloop = loop;
createTotalOrderVector();
}
public void getString(Vector<TNode> x, int cur, String path, Double value,
HashMap<String, Double> smap, boolean isleft) {
if(fixedlength == 0)
{
if (!smap.keySet().contains(path)) {
String res = UtilTools.escape(path);
if (!smap.containsKey(res) && res.length() != 0)
smap.put(res, value); // store the string of all sizes
}
}
if (x == null || x.size() == 0) {
return;
}
if (cur >= x.size() || cur < 0) {
if (fixedlength == 1) {
if (!smap.keySet().contains(path)) {
String res = UtilTools.escape(path);
if (!smap.containsKey(res) && res.length() != 0)
smap.put(res, value); // store the string of all sizes
}
}
return;
}
TNode t = x.get(cur);
if (t.text.compareTo("ANYTOK") != 0 && t.text.length() > 0) {
if (!isleft)
getString(x, cur + 1, path + t.text, value + 2, smap, false);
else {
getString(x, cur - 1, t.text + path, value + 2, smap, true);
}
}
String s = "";
if (t.type == TNode.NUMTYP) {
s += "NUM";
} else if (t.type == TNode.WORD) {
s += "WORD";
} else if (t.type == TNode.SYBSTYP) {
s += "SYB";
} else if (t.type == TNode.BNKTYP) {
s += "BNK";
} else if (t.type == TNode.UWRDTYP) {
s += "UWRD";
} else if (t.type == TNode.LWRDTYP) {
s += "LWRD";
} else if (t.type == TNode.STARTTYP) {
s += "START";
} else if (t.type == TNode.ENDTYP) {
s += "END";
} else if (t.type == TNode.ANYTYP) {
s += "ANYTYP";
} else {
s += "" + t.getType();
}
if (!isleft)
getString(x, cur + 1, path + s, value + 1, smap, false);
else {
getString(x, cur - 1, s + path, value + 1, smap, true);
}
}
// option: left or right context
public Vector<TNode> mergeCNXT(Vector<TNode> a, Vector<TNode> b,
String option) {
Vector<TNode> xNodes = new Vector<TNode>();
if (a == null || b == null)
return null;
else {
int leng = Math.min(a.size(), b.size());
if (option.compareTo(Segment.LEFTPOS) == 0) {
for (int i = 1; i <= leng; i++) {
TNode t = a.get(a.size() - i);
TNode t1 = b.get(b.size() - i);
if (t == null || t1 == null) {
break;
}
if (t.mergableType(t1) == -1) {
break;
} else {
int type = t.mergableType(t1);
if (t.text.compareTo(t1.text) == 0) {
TNode tx = new TNode(type, t.text);
xNodes.add(0, tx);
} else {
TNode tx = new TNode(type, "ANYTOK");
xNodes.add(0, tx);
}
}
}
} else if (option.compareTo(Segment.RIGHTPOS) == 0) {
for (int i = 0; i < leng; i++) {
TNode t = a.get(i);
TNode t1 = b.get(i);
if (t == null || t1 == null) {
break;
}
if (t.mergableType(t1) == -1) {
break;
} else {
int type = t.mergableType(t1);
if (t.text.compareTo(t1.text) == 0) {
TNode tx = new TNode(type, t.text);
xNodes.add(tx);
} else {
TNode tx = new TNode(type, "ANYTOK");
xNodes.add(tx);
}
}
}
}
}
if (xNodes.size() == 0)
return null;
return xNodes;
}
public Position mergewith(Position b) {
if (this == null || b == null)
return null;
Vector<Integer> tmpIntegers = new Vector<Integer>();
tmpIntegers.addAll(this.absPosition);
tmpIntegers.retainAll(b.absPosition);
Vector<Integer> tmpIntegers2 = new Vector<Integer>();
tmpIntegers2.addAll(this.counters);
tmpIntegers2.retainAll(b.counters);
Vector<TNode> tl = b.leftContextNodes;
Vector<TNode> tr = b.rightContextNodes;
Vector<TNode> g_lcxtNodes = mergeCNXT(this.leftContextNodes, tl,
Segment.LEFTPOS);
Vector<TNode> g_rcxtNodes = mergeCNXT(this.rightContextNodes, tr,
Segment.RIGHTPOS);
// this.leftContextNodes = g_lcxtNodes;
// this.rightContextNodes = g_rcxtNodes;
if (tmpIntegers.size() == 0 && g_lcxtNodes == null
&& g_rcxtNodes == null)
return null;
boolean loop = this.isinloop || b.isinloop;
Vector<String> aStrings = new Vector<String>();
Vector<String> bStrings = new Vector<String>();
if (this.orgStrings.size() == 1
&& this.orgStrings.size() == b.tarStrings.size()
&& this.orgStrings.get(0).compareTo(b.orgStrings.get(0)) == 0) {
aStrings.addAll(this.orgStrings);
String[] s1 = this.tarStrings.get(0).split(",");
String[] s2 = b.tarStrings.get(0).split(",");
HashSet<Integer> hset = new HashSet<Integer>();
for(int x=0; x<s1.length; x++)
{
int v = Integer.valueOf(s1[x]);
if(!hset.contains(v))
{
hset.add(v);
}
}
for(int x=0;x<s2.length;x++)
{
int v = Integer.valueOf(s2[x]);
if(!hset.contains(v))
{
hset.add(v);
}
}
SortedSet<Integer> poses = new TreeSet<Integer>(hset);
Iterator<Integer> iter = poses.iterator();
String rep = "";
while(iter.hasNext())
{
rep += iter.next()+",";
}
rep = rep.substring(0,rep.length()-1);
bStrings.add(rep);
} else {
aStrings.addAll(this.orgStrings);
aStrings.addAll(b.orgStrings);
bStrings.addAll(this.tarStrings);
bStrings.addAll(b.tarStrings);
}
return new Position(tmpIntegers, g_lcxtNodes, g_rcxtNodes, aStrings,
bStrings, loop);
}
public void setinLoop(boolean res) {
this.isinloop = res;
}
// return indexOf(value,left,right) or position
private double score = 0.0;
// score sum(gToken)/size
public double getScore() {
double sum = 0.0;
int lsize = 0;
if (this.leftContextNodes != null) {
lsize = leftContextNodes.size();
}
int rsize = 0;
if (this.rightContextNodes != null) {
rsize = rightContextNodes.size();
}
if (lsize == 0 && rsize == 0)
return 1;
else {
for (int i = 0; i < lsize; i++) {
if (leftContextNodes.get(i).text.compareTo("ANYTOK") != 0
&& leftContextNodes.get(i).type != TNode.ANYTYP) {
sum++;
}
}
for (int i = 0; i < rsize; i++) {
if (rightContextNodes.get(i).text.compareTo("ANYTOK") != 0
&& rightContextNodes.get(i).type != TNode.ANYTYP) {
sum++;
}
}
return sum * 1.0 / (lsize + rsize);
}
}
public void emptyState() {
this.curState = 0;
}
public Vector<String> rules = new Vector<String>();
public String toProgram() {
if (curState >= rules.size())
return "null";
String rule = rules.get(curState);
if (!isinloop)
rule = rule.replace("counter", counters.get(1) + "");
curState++;
return rule;
}
public String getRule(int index) {
if (index >= rules.size())
return "null";
String rule = rules.get(index);
if (!isinloop)
rule = rule.replace("counter", counters.get(1) + "");
return rule;
}
public long size() {
return this.rules.size();
}
public String VerifySpace(int itercnt) {
String rule = "null";
int ruleNo = 0;
while (ruleNo<this.rules.size()) {
rule = getRule(ruleNo);
ruleNo++;
//System.out.println("verifying..."+rule);
if (isinloop) {
// replace the counter with number and verify it
if(rule.indexOf("counter")==-1)
{
return "null";
}
boolean isvalid = true;
for (int j = 0; j < this.orgStrings.size(); j++) {
int cnt = 1;
String r = "";
while (r.indexOf("None") == -1) {
String tmpRule = rule.replace("counter",
String.valueOf(cnt));
ProgramRule programRule = new ProgramRule(tmpRule);
String val = programRule.transform(this.orgStrings
.get(j));
if(val.indexOf("None")!= -1)
break;
r += val+",";
cnt ++;
}
if(r.length()<=1)
return "null";
if (this.tarStrings.get(j).compareTo(r.substring(0,r.length()-1)) != 0) {
isvalid = false;
break;
}
}
if (isvalid) {
if(itercnt == 0)
return rule;
else
itercnt--; // valid number - 1
}
} else {
ProgramRule pr = new ProgramRule(rule);
boolean isValid = true;
for(int k=0; k<this.orgStrings.size(); k++)
{
String val = String.valueOf(pr.transform(this.orgStrings.get(k)));
if(this.tarStrings.get(k).compareTo(val)!=0)
{
isValid = false;
}
}
if (isValid) {
if(itercnt == 0)
return rule;
else
itercnt--;
}
}
}
return "null";
}
public void createTotalOrderVector() {
HashMap<String, Double> lMap = new HashMap<String, Double>();
HashMap<String, Double> rMap = new HashMap<String, Double>();
if (this.leftContextNodes != null) {
String path = "";
getString(this.leftContextNodes, this.leftContextNodes.size() - 1,
path, 1.0, lMap, true);
} else {
lMap.put("ANY", 1.0);
}
if (this.rightContextNodes != null) {
String path = "";
getString(this.rightContextNodes, 0, path, 1.0, rMap, false);
} else {
rMap.put("ANY", 1.0);
}
String reString = "";
SortedMap<Double, Vector<String>> sortedMap = new TreeMap<Double, Vector<String>>();
String negString = "";
for (String a : lMap.keySet()) {
for (String b : rMap.keySet()) {
if (a.compareTo(b) == 0 && a.compareTo("ANY") == 0)
continue;
Double key = lMap.get(a) + rMap.get(b);
reString = String.format(
"indexOf(value,\'%s\',\'%s\',counter)", a, b);
negString = String.format(
"indexOf(value,\'%s\',\'%s\',-counter)", a, b);
if (sortedMap.containsKey(key)) {
sortedMap.get(key).add(reString);
sortedMap.get(key).add(negString);
} else {
Vector<String> svec = new Vector<String>();
svec.add(reString);
svec.add(negString);
sortedMap.put(key, svec);
}
}
}
while (!sortedMap.isEmpty()) {
Double key = sortedMap.firstKey();
rules.addAll(sortedMap.get(key));
sortedMap.remove(key);
}
// append the absolute position to the end
for (int k = 0; k < this.absPosition.size(); k++) {
String line = String.format("%d", this.absPosition.get(k));
rules.add(line);
}
}
public String toString() {
return "(" + UtilTools.print(this.leftContextNodes) + ","
+ UtilTools.print(this.rightContextNodes) + ")";
}
public GrammarTreeNode mergewith(GrammarTreeNode a) {
Position p = (Position) a;
p = this.mergewith(p);
return p;
}
public String getNodeType() {
return "position";
}
public String getrepString() {
return this.toString();
}
}