package edu.isi.karma.cleaning;
import java.util.Comparator;
import java.util.HashMap;
import java.util.Random;
import java.util.Vector;
public class UtilTools {
public static int index = 0;
public static Vector<String> results = new Vector<String>();
public static Vector<Integer> getStringPos(int tokenpos,Vector<TNode> example)
{
Vector<Integer> poss = new Vector<Integer>();
if(tokenpos < 0)
return poss;
int pos = 0;
int strleng = 0;
for(int i=0;i<example.size();i++)
{
strleng += example.get(i).text.length();
}
for(int i = 0; i<tokenpos;i++)
{
pos += example.get(i).text.length();
}
poss.add(pos); // forward position
poss.add(pos-strleng); // backward position
return poss;
}
public static Vector<GrammarTreeNode> convertSegVector(Vector<Segment> x)
{
Vector<GrammarTreeNode> res = new Vector<GrammarTreeNode>();
for(Segment e:x)
{
res.add(e);
}
return res;
}
public static int multinominalSampler(double[] probs) {
Random r = new Random();
double x = r.nextDouble();
if (x <= probs[0]) {
return 0;
}
x -= probs[0];
for (int i = 1; i < probs.length; i++) {
if (x <= probs[i]) {
return i;
}
x -= probs[i];
}
return 0;
}
public static int randChoose(int n) {
Random r = new Random();
return r.nextInt(n);
}
public static String print(Vector<TNode> x) {
String str = "";
if(x == null)
return "null";
for (TNode t : x)
if(t.text.compareTo("ANYTOK")==0)
str += t.getType();
else
str += t.text;
return str;
}
public static boolean samesteplength(Vector<Integer> s) {
if (s.size() <= 1)
return false;
if (s.size() == 2) {
if (s.get(1) - s.get(0) >= 1) {
return true;
} else {
return false;
}
}
int span = s.get(1) - s.get(0);
for (int i = 2; i < s.size(); i++) {
if ((s.get(i) - s.get(i - 1)) != span)
return false;
}
return true;
}
public static void StringColorCode(String org,String res,HashMap<String, String> dict)
{
//System.out.println("res: "+res);
//System.out.println("org: "+org);
int segmentCnt = 0;
String pat = "((?<=\\{_L\\})|(?=\\{_L\\}))";
String pat1 = "((?<=\\{_S\\})|(?=\\{_S\\}))";
String orgdis = "";
String tardis = "";
String tar = "";
String[] st = res.split(pat);
int pre = 0;
boolean inloop = false;
for(String token:st)
{
if(token.compareTo("{_L}")==0 && !inloop)
{
inloop = true;
continue;
}
if(token.compareTo("{_L}")==0 && inloop)
{
inloop = false;
continue;
}
String[] st1 = token.split(pat1);
for(String str:st1)
{
if(str.compareTo("{_S}")==0||str.compareTo("{_S}")==0)
{
continue;
}
if(str.indexOf("{_C}")!=-1)
{
String[] pos = str.split("\\{_C\\}");
if(Integer.valueOf(pos[1])<Integer.valueOf(pos[0]))
{
return;
}
String tarseg = org.substring(Integer.valueOf(pos[0]),Integer.valueOf(pos[1]));
if(Integer.valueOf(pos[0]) >=pre && pre<org.length())
{
orgdis += org.substring(pre,Integer.valueOf(pos[0]));
orgdis += String.format("[%s]",tarseg);
pre = Integer.valueOf(pos[1]);
}
if(inloop)
{
tardis += String.format("[%s]",tarseg);
//orgdis += String.format("[%s]",tarseg);
tar += tarseg;
}
else
{
tardis += String.format("[%s]",tarseg);
//orgdis += String.format("[%s]",tarseg);
segmentCnt ++;
tar += tarseg;
}
}
else
{
tardis += String.format("{%s}",str);
tar += str;
if(!inloop)
segmentCnt ++;
}
}
}
if(pre<org.length())
orgdis += org.substring(pre);
dict.put("Org", org);
dict.put("Tar",tar );
dict.put("Orgdis", orgdis);
dict.put("Tardis", tardis);
}
public static String escape(String s) {
s = s.replaceAll("\\\\", "\\\\\\\\\\\\\\\\");
HashMap<String, String> dict = new HashMap<String, String>();
dict.put("\\(", "\\\\(");
dict.put("\\)", "\\\\)");
dict.put("\\+", "\\\\+");
dict.put("\\.", "\\\\.");
dict.put("\\?", "\\\\?");
dict.put("\\$", "\\\\\\$");
dict.put("\\*", "\\\\*");
dict.put("\\^", "\\\\^");
dict.put("\\]", "\\\\]");
dict.put("\\[", "\\\\[");
dict.put("\\/", "\\\\/");
dict.put("\\'", "\\\\'");
dict.put("\\\"", "\\\\\"");
for (String key : dict.keySet()) {
s = s.replaceAll(key, dict.get(key));
}
return s;
}
public static Vector<TNode> subtokenseqs(int a, int b, Vector<TNode> org) {
Vector<TNode> xNodes = new Vector<TNode>();
if (a < 0 || b >= org.size() || a > b) {
return null;
} else {
for (int i = a; i <= b; i++) {
xNodes.add(org.get(i));
}
return xNodes;
}
}
public static void clearTmpVars() {
results.clear();
index = 0;
}
/*public static String dic2Arff(String[] dic, String s) {
String dirpathString = ServletContextParameterMap
.getParameterValue(ContextParameter.USER_DIRECTORY_PATH);
if (dirpathString.compareTo("") == 0) {
dirpathString = "./src/main/webapp/";
}
UtilTools.clearTmpVars();
try {
CSVWriter cw = new CSVWriter(new FileWriter(new File(dirpathString
+ "grammar/tmp/tmp.csv")), ',');
// write header into the csv file
Vector<String> tmp = new Vector<String>();
Vector<String> tmp1 = new Vector<String>();
RegularityFeatureSet rfs = new RegularityFeatureSet();
Collection<Feature> cols = rfs.computeFeatures(tmp, tmp1);
String[] xyz = new String[rfs.fnames.size() + 1];
for (int i = 0; i < xyz.length - 1; i++) {
xyz[i] = "a_" + i;
}
xyz[xyz.length - 1] = "label";
cw.writeNext(xyz);
// write the data
Vector<String> examples = new Vector<String>();
if (s != null && s.length() > 0) {
String[] z = s.split("\n");
for (String elem : z) {
if (elem.trim().length() > 0) {
examples.add(elem.trim());
}
}
}
for (String o : dic) {
UtilTools.results.add(o);
Vector<String> row = new Vector<String>();
if (s != null && o.compareTo(s) == 0) {
RegularityFeatureSet rf = new RegularityFeatureSet();
Vector<String> oexamples = new Vector<String>();
String[] y = o.split("\n");
for (String elem : y) {
if (elem.trim().length() > 0) {
oexamples.add(elem.trim());
}
}
Collection<Feature> cf = rf.computeFeatures(oexamples,
examples);
Feature[] x = cf.toArray(new Feature[cf.size()]);
// row.add(f.getName());
for (int k = 0; k < cf.size(); k++) {
row.add(String.valueOf(x[k].getScore()));
}
row.add("3"); // change this according to the dataset.
} else {
RegularityFeatureSet rf = new RegularityFeatureSet();
Vector<String> oexamples = new Vector<String>();
String[] y = o.split("\n");
for (String elem : y) {
if (elem.trim().length() > 0) {
oexamples.add(elem.trim());
}
}
Collection<Feature> cf = rf.computeFeatures(oexamples,
examples);
Feature[] x = cf.toArray(new Feature[cf.size()]);
// row.add(f.getName());
for (int k = 0; k < cf.size(); k++) {
row.add(String.valueOf(x[k].getScore()));
}
row.add("0"); // change this according to the dataset.
}
cw.writeNext((String[]) row.toArray(new String[row.size()]));
}
cw.flush();
cw.close();
Data2Features.csv2arff(dirpathString + "grammar/tmp/tmp.csv",
"./src/main/webapp/grammar/tmp/tmp.arff");
return dirpathString + "grammar/tmp/tmp.arff";
} catch (Exception e) {
Logger.getLogger(UtilTools.class).info("" + e.toString());
return "";
}
}
public static Vector<Integer> topKindexs(Vector<Double> scores, int k) {
int cnt = 0;
Vector<Integer> res = new Vector<Integer>();
ScoreObj[] sas = new ScoreObj[scores.size()];
for (int i = 0; i < scores.size(); i++) {
sas[i] = new ScoreObj(i, scores.get(i));
}
Arrays.sort(sas, new DoubleCompare());
while (cnt < k && cnt < sas.length) {
res.add(sas[cnt].index);
cnt++;
}
return res;
}
// unsupervised learning
public static Vector<Double> getScores2(String[] res, String cpres) {
Vector<Double> vds = new Vector<Double>();
// convert the json format to \n seperated format
try {
String[] csvres = new String[res.length];
for (int i = 0; i < res.length; i++) {
JSONObject jso = new JSONObject(res[i]);
Iterator<String> iter = jso.keys();
String lines = "";
while (iter.hasNext()) {
lines += jso.getString(iter.next()) + "\n";
}
csvres[i] = lines;
}
Vector<String> examples = new Vector<String>();
String s = cpres;
String[] sy = cpres.split("\n");
for(String tp:sy)
{
if (tp.trim().length() > 0) {
examples.add(tp.trim());
}
}
for (String o : csvres) {
double soc = 0.0;
RegularityFeatureSet rf = new RegularityFeatureSet();
Vector<String> oexamples = new Vector<String>();
String[] y = o.split("\n");
for (String elem : y) {
if (elem.trim().length() > 0) {
oexamples.add(elem.trim());
}
}
Collection<Feature> cf = rf
.computeFeatures(oexamples, examples);
Feature[] x = cf.toArray(new Feature[cf.size()]);
// row.add(f.getName());
for (int k = 0; k < cf.size(); k++) {
soc += x[k].getScore();
}
vds.add(cf.size() - soc);
}
return vds;
} catch (Exception ex) {
System.out.println("Get Scores error: " + ex.toString());
return vds;
}
}
public static int rank(HashMap<String, Integer> dic, String s,
String trainPath) {
Set<String> keys = dic.keySet();
String[] ks = (String[]) keys.toArray(new String[keys.size()]);
String fpath = UtilTools.dic2Arff(ks, s);
RegularityClassifer rc = new RegularityClassifer(trainPath);
try {
int rank = rc.getRank(fpath);
if (rank < 0) {
return -1;
} else
return rank;
} catch (Exception ex) {
System.out.println("" + ex.toString());
return -1;
}
}
public static Vector<Double> getScores(String[] res, String trainPath) {
Vector<Double> vds = new Vector<Double>();
// convert the json format to \n seperated format
try {
String[] csvres = new String[res.length];
for (int i = 0; i < res.length; i++) {
JSONObject jso = new JSONObject(res[i]);
Iterator<String> iter = jso.keys();
String lines = "";
while (iter.hasNext()) {
lines += jso.getString(iter.next()) + "\n";
}
csvres[i] = lines;
}
String fpath = UtilTools.dic2Arff(csvres, null);
RegularityClassifer rc = new RegularityClassifer(trainPath);
try {
vds = rc.getScores(fpath);
return vds;
} catch (Exception ex) {
System.out.println("get Scores error: " + ex.toString());
return null;
}
} catch (Exception ex) {
System.out.println("Get Scores error: " + ex.toString());
}
return vds;
}*/
public static void main(String[] args) {
String s = "+";
}
}
// used to sort the score in decend order
class ScoreObj {
int index;
double score;
public ScoreObj(int index, double score) {
this.index = index;
this.score = score;
}
}
class DoubleCompare implements Comparator {
public int compare(Object x1, Object x2) {
ScoreObj a1 = (ScoreObj) x1;
ScoreObj a2 = (ScoreObj) x2;
if (a1.score > a2.score) {
return -1;
} else if (a1.score < a2.score) {
return 1;
} else
return 0;
}
}