/*
* Sifarish: Recommendation Engine
* Author: Pranab Ghosh
*
* Licensed under the Apache License, Version 2.0 (the "License"); you
* may not use this file except in compliance with the License. You may
* obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied. See the License for the specific language governing
* permissions and limitations under the License.
*/
package org.sifarish.feature;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import org.apache.hadoop.conf.Configuration;
import org.apache.log4j.Level;
import org.apache.log4j.Logger;
import org.sifarish.common.TaggedEntity;
/**
* @author pranab
*
*/
public class SemanticSimilarity extends DynamicAttrSimilarityStrategy {
private TaggedEntity thisEntity;
private TaggedEntity thatEntity;
private int topMatchCount;
private int scale;
private List<MatchedItem> matchedItems = new ArrayList<MatchedItem>();
private static final Logger LOG = Logger.getLogger(SemanticSimilarity.class);
public SemanticSimilarity(String matcherClass, int topMatchCount, Map<String,Object> params) throws IOException {
Class<?> iterCls;
try {
iterCls = Class.forName(matcherClass);
thisEntity = (TaggedEntity)iterCls.newInstance();
thatEntity = (TaggedEntity)iterCls.newInstance();
thisEntity.initialize(params);
this.topMatchCount = topMatchCount;
scale = (Integer)params.get("semanticScale");
Configuration conf = (Configuration)params.get("config");
if (conf.getBoolean("debug.on", false)) {
LOG.setLevel(Level.DEBUG);
}
} catch (ClassNotFoundException e) {
throw new IOException("failed to intialize SemanticSimilarity");
}catch (InstantiationException e) {
throw new IOException("failed to intialize SemanticSimilarity");
} catch (IllegalAccessException e) {
throw new IOException("failed to intialize SemanticSimilarity");
}
}
/**
* @param src
* @param target
* @return
* @throws IOException
*/
public double findDistance(String src, String target) throws IOException {
int matchScore;
String matchingContext;
double avScore = 0;
matchedItems.clear();
String[] thisTagItems = src.split(fieldDelimRegex);
String[] thatTagItems = target.split(fieldDelimRegex);
for (String thisTagItem : thisTagItems) {
thisEntity.setTag(thisTagItem);
for (String thatTagItem :thatTagItems) {
LOG.debug("thisTagItem:" + thisTagItem + " thatTagItem:" + thatTagItem);
thatEntity.setTag(thatTagItem);
matchScore = thisEntity.match(thatEntity);
matchScore = matchScore <= scale ? matchScore : scale;
if (!thatEntity.isResultCorrelation()) {
matchScore = scale - matchScore;
LOG.debug("matchScore:" + matchScore);
}
matchingContext = thisEntity.getMatchingContext();
LOG.debug("matchScore:" + matchScore + " matchingContext:" + matchingContext);
matchedItems.add(new MatchedItem(matchScore, matchingContext));
}
}
LOG.debug("matched items size:" + matchedItems.size());
//sort them descending
Collections.sort(matchedItems);
int numMatches = matchedItems.size() < topMatchCount ? matchedItems.size() : topMatchCount;
matchingContexts = new String[numMatches];
for (int i = 0; i < numMatches; ++i) {
matchingContexts[i] = matchedItems.get(i).getContext();
avScore += matchedItems.get(i).getScore();
LOG.debug("after sorting score:" + matchedItems.get(i).getScore());
}
avScore /= numMatches;
avScore /= scale;
avScore = avScore > 1.0 ? 1.0 : avScore;
LOG.debug("avScore:" + avScore);
return avScore;
}
@Override
public double findDistance(String thisEntityID, String thisTag,
String thatEntityID, String thatTag, String groupingID) throws IOException {
thisEntity.setEntityID(thisEntityID);
thisEntity.setGroupID(groupingID);
thatEntity.setEntityID(thisEntityID);
thatEntity.setGroupID(groupingID);
return findDistance( thisTag, thatTag);
}
private static class MatchedItem implements Comparable<MatchedItem>{
private int score;
private String context;
public MatchedItem(int score, String context) {
super();
this.score = score;
this.context = context;
}
public int getScore() {
return score;
}
public String getContext() {
return context;
}
@Override
public int compareTo(MatchedItem other) {
return other.score - score ;
}
}
}