DefaultPipeline.java example

Explorer

uncc2014watsonsim-master
- src
  - main
    - java
      - edu
        uncc
        cs
        watsonsim
        Answer.java
        Configuration.java
        DBQuestionSource.java
        Database.java
        DefaultPipeline.java
        Environment.java
        KV.java
        KVTest.java
        Log.java
        Passage.java
        Phrase.java
        QClassDetection.java
        QType.java
        Question.java
        Score.java
        SentenceDetector.java
        StringUtils.java
        WatsonSim.java
        WebFrontend.java
        WebsocketFrontend.java
        datapreparation
        KingJamesBible.java
        index
        Bigrams.java
        Edges.java
        Indri.java
        Lucene.java
        Reindex.java
        Segment.java
        nlp
        ApproxStringIntMap.java
        ApproxStringIntMapTest.java
        ClueType.java
        DBPediaCandidateType.java
        DenseVectors.java
        DenseVectorsTest.java
        Redirects.java
        Relatedness.java
        RelatednessTest.java
        StringStack.java
        StringStackTest.java
        SupportCandidateType.java
        Trees.java
        Weighted.java
        researchers
        AnswerTrimming.java
        CombineScores.java
        HyphenTrimmer.java
        MergeAnswers.java
        MergeByCommonSupport.java
        MergeByText.java
        Normalize.java
        OpenNlpTests.java
        POSStructureScorer.java
        PassageRetrieval.java
        PersonRecognition.java
        RedirectSynonyms.java
        Researcher.java
        StatsDump.java
        StrictFilters.java
        TagLAT.java
        URLExpander.java
        WekaTee.java
        package-info.java
        scorers
        AnswerInPassage.java
        AnswerInQuestionScorer.java
        AnswerLength.java
        AnswerPOS.java
        AnswerScorer.java
        CommonConstituents.java
        Correct.java
        DateMatches.java
        ElliotMerschScorer.java
        Entropy.java
        EntropyTest.java
        GloveAnswerQuestionContext.java
        GloveAnswerQuestionContextTest.java
        JM_Scorer.java
        KensNLPScorer.java
        LATCheck.java
        LATMentions.java
        LuceneEcho.java
        Merge.java
        NERScorer.java
        NGram.java
        NamedEntityRecognizerScorer.java
        PassageCount.java
        PassageQuestionLengthRatio.java
        PassageScorer.java
        PassageScorerOpenNLPAda.java
        PassageTermMatch.java
        PercentWordsInCommon.java
        QAKeywordMatch.java
        QPKeywordMatch.java
        QuestionID.java
        Scorer.java
        SentenceSimilarity.java
        SkipBigram.java
        StephensonOpenNLPScorer.java
        TopPOS.java
        WPPageViews.java
        WShalabyScorer.java
        WordProximity.java
        package-info.java
        scripts
        ParallelStats.java
        WikipediaViewCounter.java
        WikiquoteParser.java
        WikiquoteQuote.java
        WiktionaryParser.java
        package-info.java
        search
        Anagrams.java
        BingSearcher.java
        CachingSearcher.java
        IndriSearcher.java
        LucenePassageSearcher.java
        LuceneSearcher.java
        MeanDVSearch.java
        MeanDVSearchTest.java
        Searcher.java
        SemanticVectorSearcher.java
  - test
    - java
      - edu
        uncc
        cs
        watsonsim
        AnswerMergeTest.java
        CoreNLPSentenceSimilarityTest.java
        DateMatchesTest.java
        LATDetectionTest.java
        QClassDetectionTest.java
        QuestionResultsScorerTest.java
        ReindexEdgesTest.java
        StringUtilsTest.java
        TypeDetectionTest.java

package edu.uncc.cs.watsonsim;

import java.sql.Timestamp;
import java.util.ArrayList;
import java.util.List;
import java.util.function.Consumer;

import edu.uncc.cs.watsonsim.researchers.*;
import edu.uncc.cs.watsonsim.scorers.*;
import edu.uncc.cs.watsonsim.search.*;

/** The standard Question Analysis pipeline.
 * 
 * The pipeline is central to the DeepQA framework.
 * It consists of {@link Searcher}s, {@link Researcher}s, {@link Scorer}s, and
 * a {@link Learner}.<p>
 * 
 * Each step in the pipeline takes and possibly transforms a {@link Question}.
 * {@link Question}s aggregate {@link Answer}s, and a correct {@link Answer} (if it is
 *     known).
 * {@link Answer}s aggregate scores (which are primitive doubles) and
 *     {@link Passage}s, and contain a candidate text.
 * {@link Passage}s aggregate more scores, and provide some utilities for
 *     processing the text they contain.<p>
 * 
 * A {@link Searcher} takes the {@link Question}, runs generic transformations
 *     on its text and runs a search engine on it. The Passages it creates are
 *     promoted into {@link Answer}s, where the Passage title is the candidate
 *     {@link Answer} text and each {@link Answer} has one Passage. The passage
 *     Searchers do the same but are optimized for taking {@link Answer}s and
 *     finding supporting evidence as Passages. In that case, the resulting
 *     Passages are not promoted.<p>
 * 
 * A {@link Researcher} takes a {@link Question} and performs a transformation
 *     on it. There is no contract regarding what it can do to the
 *     {@link Question}, so they can't be safely run in parallel and the order
 *     of execution matters. Read the source for an idea of the intended order.
 *     <p>
 * 
 * A {@link Scorer} takes a {@link Question} and generates scores for either
 *     {@link Answer}s or {@link Passage}s (inheriting from
 *     {@link AnswerScorer} or {@link PassageScorer} respectively.)<p>
 *
 */
public class DefaultPipeline {
	private final Searcher[] searchers;
	private final Researcher early_researchers;
	private final Scorer[] scorers;
	private final Researcher late_researchers;
	private final Environment env = new Environment();
	
	/**
	 * Start a new question answering pipeline.
	 */
	public DefaultPipeline() {
		Timestamp run_start = new Timestamp(System.currentTimeMillis());
		
		/*
		 * Create the pipeline
		 */
		searchers = new Searcher[]{
			new LuceneSearcher(env),
			new IndriSearcher(env, false),
			new MeanDVSearch(env),
			//new SemanticVectorSearcher(env),
			// You may want to cache Bing results
			// new BingSearcher(config),
			new CachingSearcher(env, new BingSearcher(env), "bing"),
			new Anagrams(env)
		};
		early_researchers = Researcher.pipe(env.log,
			// Most of the suggestions are garbage
			//new RedirectSynonyms(env),
			new HyphenTrimmer(),
			new StrictFilters(),
			// This causes too much network traffic.
			//new URLExpander(env),
			// Often trims good parts of correct answers
			//new AnswerTrimming(), // Overshoots
			new MergeByText(env),
			new MergeAnswers(),
			//new ChangeFitbAnswerToContentsOfBlanks(),
			new PassageRetrieval(env,
					new LucenePassageSearcher(env)
					//new IndriSearcher(env, true)
					//new CachingSearcher(new BingSearcher(env), "bing"),
				),
			new MergeByCommonSupport(),
			new PersonRecognition(),
			new TagLAT(env),
			new MergeByCommonSupport()
		);
		scorers = new Scorer[]{
			new AnswerLength(),
			new AnswerPOS(),
			new CommonConstituents(),
			new Correct(env),
			new DateMatches(),
			new Entropy(env),
			new GloveAnswerQuestionContext(),
			new LATCheck(env),
			new LATMentions(),
			new LuceneEcho(),
			new NGram(),
			new PassageTermMatch(),
			new PassageCount(),
			new PassageQuestionLengthRatio(),
			new QuestionID(),
			new QPKeywordMatch(),
			new QAKeywordMatch(),
			new SkipBigram(),
			new TopPOS(),
			new WordProximity(),
			new WPPageViews(env)
			//new RandomIndexingCosineSimilarity(),
			//new DistSemCosQAScore(),
			//new DistSemCosQPScore(),
		};
		late_researchers = Researcher.pipe(env.log,
			new Normalize(),
			new WekaTee(run_start),
			new CombineScores(),
			new StatsDump(run_start, env)
		);
	}
	
	public List<Answer> ask(String qtext) {
	    return ask(new Question(qtext));
	}
	
	public List<Answer> ask(Question question) {
	    return ask(question, System.out::println);
	}
	
    /** Run the full standard pipeline */
	public List<Answer> ask(Question question, Consumer<String> listener) {
		// Query every engine
		Log l = env.log;
		l.setListener(listener);
		
		l.info("Generating candidate answers..");
		List<Answer> answers = new ArrayList<>();
		for (Searcher s: searchers)
			for (Passage p : s.query(question))
				answers.add(new Answer(p));
		l.info("Generated " + answers.size() + " candidate answers.");
		
		answers = early_researchers.pull(question, answers);
    	
    	l.info("Scoring supporting evidence..");
        for (Scorer s: scorers)
        	s.scoreQuestion(question, answers);
        
        l.info("Computing confidence..");
        
        answers = late_researchers.pull(question, answers);
        return answers;
    }
}