/*******************************************************************************
* Copyright 2012 University of Southern California
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* This code was developed by the Information Integration Group as part
* of the Karma project at the Information Sciences Institute of the
* University of Southern California. For more information, publications,
* and related projects, please see: http://www.isi.edu/integration
******************************************************************************/
package edu.isi.karma.cleaning.QuestionableRecord;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Vector;
import org.python.antlr.PythonParser.dictmaker_return;
import org.python.antlr.PythonParser.print_stmt_return;
import edu.isi.karma.cleaning.RecFeature;
import edu.isi.karma.cleaning.Ruler;
import edu.isi.karma.cleaning.TNode;
import edu.isi.karma.cleaning.UtilTools;
public class FeatureVector {
String[] symbol = {"#",";",",","!","~","@","$","%","^","&","*","(",")","_","-","{","}","[","]","\"","'",":","?","<",">","."};
int[] types = {TNode.NUMTYP,TNode.SYBSTYP,TNode.LWRDTYP,TNode.UWRDTYP};
Vector<RecFeature> x = new Vector<RecFeature>();
HashSet<String> dictionary = new HashSet<String>();
public int size;
public FeatureVector(HashSet<String> dic)
{
this.dictionary = dic;
this.dictionary.clear();
}
public int size()
{
return this.symbol.length+this.types.length+1+this.dictionary.size();
}
public Vector<RecFeature> createVector(String raw,String color)
{
Vector<RecFeature> v = new Vector<RecFeature>();
Ruler r = new Ruler();
r.setNewInput(raw);
Vector<TNode> vt = new Vector<TNode>();
vt = r.vec;
HashMap<String, String> tmp = new HashMap<String, String>();
System.out.println("raw: "+raw+" color: "+color);
UtilTools.StringColorCode(raw, color, tmp);
String tar = tmp.get("Tar");
r.setNewInput(tar);
Vector<TNode> tarNodes = r.vec;
constructVector(vt,tarNodes,color,v,this.dictionary);
this.size = v.size();
return v;
}
public void constructVector(Vector<TNode> t,Vector<TNode> tarNodes,String color,Vector<RecFeature> v,HashSet<String> dic)
{
for(String s:symbol)
{
Feature1 feature1 = new Feature1(s, tarNodes);
v.add(feature1);
}
for(int type:types)
{
Feature2 feature2 = new Feature2(type, tarNodes);
v.add(feature2);
}
Feature3 feature3 = new Feature3(color);
v.add(feature3);
for(String s:dic)
{
Feature4 f4 = new Feature4(s,t,tarNodes);
v.add(f4);
}
}
}