/*
* chombo: Hadoop Map Reduce utility
* Author: Pranab Ghosh
*
* Licensed under the Apache License, Version 2.0 (the "License"); you
* may not use this file except in compliance with the License. You may
* obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied. See the License for the specific language governing
* permissions and limitations under the License.
*/
package org.chombo.distance;
import java.io.IOException;
import java.util.Map;
/**
* Entity attributes are variable and dynamic
* @author pranab
*
*/
public abstract class DynamicVectorSimilarity {
protected String fieldDelimRegex = "\\s+";
protected boolean isBooleanVec;
protected boolean isSemanticVec;
protected boolean isCountIncluded;
protected int intersectionLength;
protected String[] matchingContexts;
/**
* @param src
* @param target
* @return
*/
public double findDistance(String src, String target) throws IOException {
return 1.0;
}
public double findDistance(String srcEntityID, String src, String targetEntityID, String target, String groupingID) throws IOException {
return 1.0;
}
/**
* @return
*/
public String getFieldDelimRegex() {
return fieldDelimRegex;
}
/**
* @param fieldDelimRegex
*/
public void setFieldDelimRegex(String fieldDelimRegex) {
this.fieldDelimRegex = fieldDelimRegex;
}
/**
* @return
*/
public boolean isBooleanVec() {
return isBooleanVec;
}
/**
* @param isBooleanVec
*/
public void setBooleanVec(boolean isBooleanVec) {
this.isBooleanVec = isBooleanVec;
}
/**
* @return
*/
public boolean isSemanticVec() {
return isSemanticVec;
}
/**
* @param isSemanticVec
*/
public void setSemanticVec(boolean isSemanticVec) {
this.isSemanticVec = isSemanticVec;
}
/**
* @return
*/
public boolean isCountIncluded() {
return isCountIncluded;
}
/**
* @param isCountIncluded
*/
public void setCountIncluded(boolean isCountIncluded) {
this.isCountIncluded = isCountIncluded;
}
/**
* @return
*/
public int getIntersectionLength() {
return intersectionLength;
}
public String[] getMatchingContexts() {
return matchingContexts;
}
/**
* @param algorithm
* @param params
* @return
* @throws IOException
*/
public static DynamicVectorSimilarity createSimilarityStrategy(AttributeDistance attrDist)
throws IOException {
String simAlgorithm = attrDist.getTextSimilarityStrategy();
DynamicVectorSimilarity simStrategy = null;
if (simAlgorithm.equals("jaccard")){
double srcNonMatchingTermWeight = attrDist.getJaccardSrcNonMatchingTermWeight();
double trgNonMatchingTermWeight = attrDist.getJaccardTrgNonMatchingTermWeight();
simStrategy = new JaccardSimilarity(srcNonMatchingTermWeight, trgNonMatchingTermWeight);
} else if (simAlgorithm.equals("dice")){
simStrategy = new DiceSimilarity();
} else if (simAlgorithm.equals("charPair")){
simStrategy = new CharacterPairSimilarity();
} else if (simAlgorithm.equals("cosine")){
simStrategy = new CosineSimilarity();
} else {
throw new IllegalArgumentException("invalid text similarity algorithms:" + simAlgorithm);
}
return simStrategy;
}
}