/*
* Sifarish: Recommendation Engine
* Author: Pranab Ghosh
*
* Licensed under the Apache License, Version 2.0 (the "License"); you
* may not use this file except in compliance with the License. You may
* obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied. See the License for the specific language governing
* permissions and limitations under the License.
*/
package org.sifarish.feature;
import org.apache.hadoop.conf.Configuration;
/**
* Base schema class
* @author pranab
*
*/
public class TypeSchema {
private String distAlgorithm;
private double minkowskiParam;
private double numericDiffThreshold;
private String missingValueHandler = "default";
private String textMatchingAlgorithm;
private double srcNonMatchingTermWeight = 1.0;
private double trgNonMatchingTermWeight = 1.0;
private double[] locationComponentWeights;
private double[] eventComponentWeights;
private Configuration conf;
public String getDistAlgorithm() {
return distAlgorithm;
}
public void setDistAlgorithm(String distAlgorithm) {
this.distAlgorithm = distAlgorithm;
}
public double getMinkowskiParam() {
return minkowskiParam;
}
public void setMinkowskiParam(double minkowskiParam) {
this.minkowskiParam = minkowskiParam;
}
public double getNumericDiffThreshold() {
return numericDiffThreshold;
}
public void setNumericDiffThreshold(double numericDiffThreshold) {
this.numericDiffThreshold = numericDiffThreshold;
}
public String getMissingValueHandler() {
return missingValueHandler;
}
public void setMissingValueHandler(String missingValueHandler) {
this.missingValueHandler = missingValueHandler;
}
public String getTextMatchingAlgorithm() {
return textMatchingAlgorithm;
}
public void setTextMatchingAlgorithm(String textMatchingAlgorithm) {
this.textMatchingAlgorithm = textMatchingAlgorithm;
}
public double getSrcNonMatchingTermWeight() {
return srcNonMatchingTermWeight;
}
public void setSrcNonMatchingTermWeight(double srcNonMatchingTermWeight) {
this.srcNonMatchingTermWeight = srcNonMatchingTermWeight;
}
public double getTrgNonMatchingTermWeight() {
return trgNonMatchingTermWeight;
}
public void setTrgNonMatchingTermWeight(double trgNonMatchingTermWeight) {
this.trgNonMatchingTermWeight = trgNonMatchingTermWeight;
}
public double[] getLocationComponentWeights() {
return locationComponentWeights;
}
public void setLocationComponentWeights(double[] locationComponentWeights) {
this.locationComponentWeights = locationComponentWeights;
}
public double[] getEventComponentWeights() {
return eventComponentWeights;
}
public void setEventComponentWeights(double[] eventComponentWeights) {
this.eventComponentWeights = eventComponentWeights;
}
public void setConf(Configuration conf) {
this.conf = conf;
}
/**
* Entity distance strategy
* @param scale
* @return
*/
public DistanceStrategy createDistanceStrategy(int scale) {
DistanceStrategy distStrategy = null;
if (distAlgorithm.equals("euclidean")) {
distStrategy = new EuclideanDistance(scale);
} else if (distAlgorithm.equals("manhattan")) {
distStrategy = new ManhattanDistance(scale);
} else if (distAlgorithm.equals("minkwoski")) {
distStrategy = new MinkwoskiDistance(scale);
distStrategy.setPower(minkowskiParam);
}
return distStrategy;
}
/**
* Text similarity strategy
* @return
*/
public DynamicAttrSimilarityStrategy createTextSimilarityStrategy() {
DynamicAttrSimilarityStrategy textSimStrategy = null;
if (null != textMatchingAlgorithm) {
if (textMatchingAlgorithm.equals("jaccard")){
textSimStrategy = new JaccardSimilarity(srcNonMatchingTermWeight, trgNonMatchingTermWeight);
} else if (textMatchingAlgorithm.equals("cosine")){
textSimStrategy = new CosineSimilarity();
} else if (textMatchingAlgorithm.equals("editDistance")){
boolean tokenWise = conf.getBoolean("edit.dist.token", true);
textSimStrategy = new EditDistanceSimilarity(tokenWise);
}
}
return textSimStrategy;
}
}