package com.acuitra.question.resources; import java.io.BufferedReader; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Set; import javax.ws.rs.GET; import javax.ws.rs.Path; import javax.ws.rs.Produces; import javax.ws.rs.QueryParam; import javax.ws.rs.core.MediaType; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import com.acuitra.ErrorCodes; import com.acuitra.pipeline.ContextWithJerseyClient; import com.acuitra.pipeline.ParallelPipelineRunner; import com.acuitra.pipeline.RunnablePipeline; import com.acuitra.question.core.Answer; import com.acuitra.question.core.Question; import com.acuitra.stages.StageException; import com.acuitra.stages.integrated.IntegratedQuepyStage; import com.acuitra.stages.integrated.NLPMapToDBpediaOntOrPropQueryStage; import com.acuitra.stages.integrated.NLPQueryStage; import com.google.common.base.Splitter; import com.google.common.base.Strings; import com.google.common.collect.ArrayListMultimap; import com.google.common.collect.ListMultimap; import com.sun.jersey.api.client.Client; import com.yammer.metrics.annotation.Timed; @Path("/ask") @Produces(MediaType.APPLICATION_JSON) public class QuestionResource { Logger logger = LoggerFactory.getLogger(this.getClass()); private Client jerseyClient; private String namedEntityRecognitionURL; private String sparqlEndpointURL; private String quepyURL; private ListMultimap<String, String> namePredicateMapping; public QuestionResource(Client jerseyClient, String namedEntityRecognitionURL, String sparqlEndpointURL, String quepyURL) { this.jerseyClient = jerseyClient; this.namedEntityRecognitionURL = namedEntityRecognitionURL; this.sparqlEndpointURL = sparqlEndpointURL; this.quepyURL = quepyURL; namePredicateMapping = readPropertyMapping(); } private ListMultimap<String, String> readPropertyMapping() { // read csv in format: word, predicate, preferred_term // should contain either predicate or preferred term, not both String filename = "word-predicate-mappings.csv"; ListMultimap<String, String> namePredicateMapping = ArrayListMultimap.create(); HashMap<String, String> namePreferredMapping = new HashMap<>(); InputStream in = this.getClass().getClassLoader().getResourceAsStream(filename); try { if (in == null) { logger.error("Could not find /word-predicate-mappings.csv"); } else { InputStreamReader is = new InputStreamReader(in); BufferedReader br = new BufferedReader(is); boolean firstline = true; String read; try { read = br.readLine(); while (read != null) { // skip the first line if (!firstline) { Iterable<String> strings = Splitter.on(',').trimResults().split(read); String name = null; String predicate = null; String preferredTerm = null; int count = 0; for (String string : strings) { // iterate along the comma separated string switch (count) { case 0: name = string; break; case 1: if (!Strings.isNullOrEmpty(string)) { predicate = string; if (!predicate.startsWith("<")) { predicate = "<" + predicate; } if (!predicate.endsWith(">")) { predicate = predicate + ">"; } } break; case 2: if (!Strings.isNullOrEmpty(string)) { preferredTerm = string; } break; default: break; } count++; } if (!Strings.isNullOrEmpty(name)) { // name should never be null // should contain either predicate or preferred term, not both if (!Strings.isNullOrEmpty(predicate)) { namePredicateMapping.put(name, predicate); } else if (!Strings.isNullOrEmpty(preferredTerm)) { namePreferredMapping.put(name, preferredTerm); } } } else { // swallow the header line firstline = false; } read = br.readLine(); } // now both maps are filled. Take the namePreferredMapping and build a predicate map out of the non-preferred names, too Set<String> keys = namePreferredMapping.keySet(); for (String name : keys) { String preferredName = namePreferredMapping.get(name); ArrayList<String> predicates = new ArrayList<>(); List<String> existingPredicates = namePredicateMapping.get(preferredName); predicates.addAll(existingPredicates); namePredicateMapping.putAll(name, predicates); } logger.info(filename + " mappings successfully loaded"); } catch (IOException e) { logger.error("Error loading " + filename, e); } } return namePredicateMapping; } finally { if (in != null) { try { in.close(); } catch (IOException e) { // ignore } } } } @GET @Timed public List<Answer> ask(@QueryParam("question") String param) { Question question = new Question(param); ContextWithJerseyClient<Question, List<Answer>> context = new ContextWithJerseyClient<>(jerseyClient); context.setInput(question); RunnablePipeline<Question, List<Answer>> nlpPipeline = new RunnablePipeline<>("NLP Pipeline", context); RunnablePipeline<Question, List<Answer>> nlpOntologyPipeline = new RunnablePipeline<>("NLP Ontology Pipeline", context); RunnablePipeline<Question, List<Answer>> quepyPipeline = new RunnablePipeline<>("Quepy Pipeline", context); nlpPipeline.addStage(new NLPQueryStage(namedEntityRecognitionURL, sparqlEndpointURL, namePredicateMapping)); nlpOntologyPipeline.addStage(new NLPMapToDBpediaOntOrPropQueryStage(namedEntityRecognitionURL, sparqlEndpointURL, namePredicateMapping)); quepyPipeline.addStage(new IntegratedQuepyStage(quepyURL, sparqlEndpointURL, jerseyClient)); ParallelPipelineRunner<Question, List<Answer>> pipeRunner = new ParallelPipelineRunner<>(10000); pipeRunner.addPipeline(nlpPipeline); pipeRunner.addPipeline(nlpOntologyPipeline); pipeRunner.addPipeline(quepyPipeline); pipeRunner.run(); if (quepyPipeline.isComplete() || nlpPipeline.isComplete()) { // at least one pipeline is finished Map<String, List<Answer>> answerMap = context.getPreviousOutputs(); List<Answer> quepyAnswers = answerMap.get(IntegratedQuepyStage.class.getName()); List<Answer> nlpAnswers = answerMap.get(NLPQueryStage.class.getName()); List<Answer> nlpOntologyAnswers = answerMap.get(NLPMapToDBpediaOntOrPropQueryStage.class.getName()); List<Answer> results = new ArrayList<>(); if (isEmpty(nlpAnswers) && isEmpty(quepyAnswers) && isEmpty(nlpOntologyAnswers)) { Answer answer = new Answer(); answer.setErrorCode(ErrorCodes.NO_ANSWER_GENERATED); answer.setErrorMessage("Could not find answer"); results.add(answer); } else { results = mergeList(results, nlpAnswers, quepyAnswers, nlpOntologyAnswers); } return results; } else { // what happened?! if (context.isError()) { throw context.getException(); } else { throw new StageException("Processing questions took too long", ErrorCodes.PROCESSING_QUESTION_TIMEOUT); } } } private List<Answer> mergeList(List<Answer> results, List<Answer> ... listsToMerge) { List<String> index = new ArrayList<>(); for (List<Answer> list : listsToMerge) { if (list != null) { for (Answer answer : list) { if (!index.contains(answer.getAnswer())) { answer.addVote(); index.add(answer.getAnswer()); results.add(answer); } } } } float defaultConfidence = (float) (1.0/results.size()); for (Answer answer : results) { answer.setConfidence(defaultConfidence * answer.getVotes()); } return results; } private boolean isEmpty(List<Answer> lst) { if (lst == null) { return true; } else { return (lst.size() == 0); } } }