/* * Sifarish: Recommendation Engine * Author: Pranab Ghosh * * Licensed under the Apache License, Version 2.0 (the "License"); you * may not use this file except in compliance with the License. You may * obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or * implied. See the License for the specific language governing * permissions and limitations under the License. */ package org.sifarish.common; import java.io.IOException; import java.util.ArrayList; import java.util.List; import java.util.Map; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.log4j.Level; import org.apache.log4j.Logger; import com.hp.hpl.jena.rdf.model.Model; import com.hp.hpl.jena.rdf.model.ModelFactory; import com.hp.hpl.jena.rdf.model.Property; import com.hp.hpl.jena.rdf.model.RDFNode; import com.hp.hpl.jena.rdf.model.Resource; import com.hp.hpl.jena.rdf.model.ResourceFactory; import com.hp.hpl.jena.rdf.model.Statement; import com.hp.hpl.jena.rdf.model.StmtIterator; /** * RDF based semantic similarity using Apache Jena * @author pranab * */ public class ResourceDescribedEntity extends TaggedEntity { private Model model; private static final Logger LOG = Logger.getLogger(ResourceDescribedEntity.class); @Override public boolean isResultCorrelation() { return false; } @Override public int match(TaggedEntity other) throws IOException { String thisTag = getTag(); String thatTag = other.getTag(); int dist = 0; Resource matchingResource = null; if (!thisTag.equals(thatTag)) { //find intersections in RDF graph List<ResourceTraversed> intersections = match(thisTag, thatTag); //find the intersection with min distance dist = Integer.MAX_VALUE; for (ResourceTraversed resTrav : intersections) { if (resTrav.getDistance() < dist) { dist = resTrav.getDistance(); matchingResource = resTrav.getResource(); } } } else { matchingResource = ResourceFactory.createResource(thisTag); } matchingContext = matchingResource != null ? resourceToString(matchingResource) : ""; LOG.debug("dist:" + dist); return dist; } /** * @param resource * @return */ private String resourceToString(Resource resource) { StringBuilder stBld = new StringBuilder(); StmtIterator iter = model.listStatements(resource, (Property)null, (RDFNode)null); while (iter.hasNext()) { Statement stmt = iter.nextStatement(); Resource subject = stmt.getSubject(); Property predicate = stmt.getPredicate(); RDFNode object = stmt.getObject(); stBld.append("[").append(subject.toString()).append(" ").append(predicate.toString()).append(" "). append(object.toString()).append("]"); } return stBld.toString(); } /** * @throws IOException */ private void loadModel() throws IOException { if (null == model) { FileSystem dfs = FileSystem.get((Configuration)params.get("config")); Path src = new Path((String)params.get("semantic.rdf.modelFilePath")); FSDataInputStream fs = dfs.open(src); model = ModelFactory.createDefaultModel(); model.read(fs, ""); } } /** * @param modelFilePath * @param firstResource * @param secondResource */ public List<ResourceTraversed> match( String firstResource, String secondResource) { LOG.debug("firstResource:" + firstResource + " secondResource:" + secondResource ); List<ResourceTraversed> firstSearchRes = search( firstResource); List<ResourceTraversed> secondSearchRes = search( secondResource); //find intersections List<ResourceTraversed> intersections = new ArrayList<ResourceTraversed>(); for (ResourceTraversed firstRsrc : firstSearchRes) { for (ResourceTraversed secondRsrc : secondSearchRes) { if (firstRsrc.getResource().equals(secondRsrc.getResource())) { ResourceTraversed intersection = new ResourceTraversed(firstRsrc.getResource(), firstRsrc.getDistance() + secondRsrc.getDistance()); intersections.add(intersection); } } } LOG.debug("num of intersections:" + intersections.size()); return intersections; } /** * @param modelFilePath * @param uriResource */ public List<ResourceTraversed> search(String uriResource) { Resource resource = ResourceFactory.createResource(uriResource); List<ResourceTraversed> resourcesTraversed = new ArrayList<ResourceTraversed>(); searchOntology(resource, resourcesTraversed, 0); LOG.debug("next search - uriResource:" + uriResource + " num of paths:" + resourcesTraversed.size()); return resourcesTraversed; } /** * @param resource * @param resourcesTraversed */ private void searchOntology(Resource resource, List<ResourceTraversed> resourcesTraversed, int distance) { resourcesTraversed.add(new ResourceTraversed(resource, distance)); StmtIterator iter = model.listStatements(resource, (Property)null, (RDFNode)null); while (iter.hasNext()) { Statement stmt = iter.nextStatement(); Resource subject = stmt.getSubject(); Property predicate = stmt.getPredicate(); RDFNode object = stmt.getObject(); //LOG.debug("next resource:" + object.toString()); if (object instanceof Resource) { //if another RDF node, recurse searchOntology((Resource)object, resourcesTraversed, distance+1); } } } @Override public void initialize(Map<String, Object> params) throws IOException { this.params = params; loadModel() ; Configuration conf = (Configuration)params.get("config"); if (conf.getBoolean("debug.on", false)) { LOG.setLevel(Level.DEBUG); } } /** * @author pranab * */ private static class ResourceTraversed { private Resource resource; private int distance; public ResourceTraversed(Resource resource, int distance) { this.resource = resource; this.distance = distance; } public Resource getResource() { return resource; } public int getDistance() { return distance; } public String toString() { return "resource:" + resource.toString() + " distance: " + distance; } } }