DistillerFactory.java example

Explorer

distiller-CORE-master
- src
  - main
    - java
      - it
        uniud
        ailab
        dcore
        Blackboard.java
        DistilledOutput.java
        Distiller.java
        DistillerException.java
        DistillerFactory.java
        Pipeline.java
        Stage.java
        annotation
        Annotable.java
        Annotation.java
        AnnotationException.java
        Annotator.java
        DefaultAnnotations.java
        annotations
        CoreferenceChainAnnotation.java
        FeatureAnnotation.java
        InferenceAnnotation.java
        NERAnnotation.java
        ScoredAnnotation.java
        TextAnnotation.java
        UriAnnotation.java
        annotators
        ChunkingNerAnnotator.java
        CoreferenceResolverAnnotator.java
        DocumentPhraseMaximalityAnnotator.java
        GenericEvaluatorAnnotator.java
        GenericNGramGeneratorAnnotator.java
        GenericWikipediaAnnotator.java
        GramMergerAnnotator.java
        ItalianLemmatizerAnnotator.java
        LinearEvaluatorAnnotator.java
        PorterStemmerAnnotator.java
        RawTdidfAnnotator.java
        RegexNGramGeneratorAnnotator.java
        SimpleAnnotationFilterAnnotator.java
        SimpleCutFilterAnnotator.java
        SimpleNGramGeneratorAnnotator.java
        SkylineGramFilterAnnotator.java
        StatisticalAnnotator.java
        StopwordSimpleFilterAnnotator.java
        SyuzhetAnnotator.java
        TagMeGramAnnotator.java
        TagMeTokenAnnotator.java
        WikipediaInferenceAnnotator.java
        eval
        Evaluator.java
        GenericDataset.java
        TrainingSetGenerator.java
        datasets
        SemEval2010.java
        kp
        KeyphraseEvaluator15.java
        KeyphraseEvaluatorAll.java
        training
        KeyphraseTrainingSetGenerator.java
        io
        CsvPrinter.java
        FileWriterStage.java
        GenericSheetPrinter.java
        GramPrinter.java
        IOBlackboard.java
        SentencePrinter.java
        TokenPrinter.java
        launchers
        Launcher.java
        SampleInference.java
        SimpleKE.java
        StanfordKE.java
        persistence
        DocumentComponent.java
        DocumentComposite.java
        Gram.java
        Keyphrase.java
        Mention.java
        Sentence.java
        Token.java
        utils
        BlackboardUtils.java
        DocumentUtils.java
        Either.java
        FileSystem.java
        GramUtils.java
        ListUtils.java
        Pair.java
        SnowballStemmerSelector.java
        StageUtils.java
        WikipediaUtils.java
        wrappers
        external
        CybozuLanguageDetectorAnnotator.java
        OpenNlpBootstrapperAnnotator.java
        RCallerEvaluator.java
        StanfordBootstrapperAnnotator.java
        StanfordFastBootstrapperAnnotator.java
  - test
    - java
      - test.java

/*
 * Copyright (C) 2015 Artificial Intelligence
 * Laboratory @ University of Udine.
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License
 * as published by the Free Software Foundation; either version 2
 * of the License, or (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
 */
package it.uniud.ailab.dcore;

import it.uniud.ailab.dcore.annotation.annotators.*;
import it.uniud.ailab.dcore.io.GramPrinter;
import it.uniud.ailab.dcore.io.SentencePrinter;
import it.uniud.ailab.dcore.utils.FileSystem;
import it.uniud.ailab.dcore.wrappers.external.*;
import java.io.File;
import java.io.IOException;
import java.util.Locale;
import org.springframework.beans.factory.BeanDefinitionStoreException;
import org.springframework.context.ApplicationContext;
import org.springframework.context.support.ClassPathXmlApplicationContext;
import org.springframework.context.support.FileSystemXmlApplicationContext;

/**
 * A simple factory that generates the default Distiller configuration either
 * via XML configuration file or via Java code. We write this class also as a
 * tutorial for the users of this library, who can learn how to instantiate the
 * Distiller object studying this source code.
 *
 * @author Marco Basaldella
 */
public class DistillerFactory {

    /**
     * Instantiates a Distiller object using the default XML configuration; if
     * it's not available, uses the safer (but less precise) code configuration,
     * which excludes TagMe and inference from the distillation process.
     *
     * @return a Distiller ready to work.
     */
    public static Distiller getDefault() {

        // While the code under this comment may be ugly, it works.
        //
        // It tries to load the default XML config. If the load fails, throws
        // the cause of the failure and immediately catches it.
        //
        // If the config file is not accessible (due to permission, 
        // non-existance, or whatever), the exception is caught and the default
        // code configuration runs.
        //
        // Otherwise, the exception is re-thrown, so that the developer can
        // handle the errors in the config file, which are the other most likely
        // cause of failure of configuration loading falure.
        try {
            return getDefaultXML();
        } catch (BeanDefinitionStoreException bsde) {
            try {
                throw bsde.getCause();
            } catch (IOException ioe) {
                // the configuration file does not exist or is not accessible:
                // load the fallback configuration
                System.out.println(
                        "Distiller config file not found: using fallback configuration");
                return getDefaultCode();
            } catch (Throwable te) {
                throw bsde;
            }
        }
    }

    /**
     * Instantiates a Distiller object using the default evaluation
     * configuration.
     *
     * @return a Distiller ready to work.
     */
    public static Distiller getDefaultEval() {

        try {
            ApplicationContext context = new ClassPathXmlApplicationContext("eval.xml");
            return (Distiller) context.getBean("distiller");
        } catch (BeanDefinitionStoreException bsde) {
            try {
                throw bsde.getCause();
            } catch (IOException ioe) {
                // the configuration file does not exist or is not accessible:
                throw new RuntimeException("FATAL: Impossible to load the default evaluation pipeline.", ioe);
            } catch (Throwable te) {
                throw bsde;
            }
        }
    }

    /**
     * Instantiates a Distiller object using the specified configuration and
     * returns it.
     *
     * @param path the path where the config file is located
     * @return a Distiller ready to work.
     */
    public static Distiller loadFromXML(File path) {
        // We add the file:// thing before because, for some ??? reason, 
        // the Spring Framework decided that all paths are relative paths.

        // So, we get the file, retrieve is absolute path, and then add the
        // file:// prefix to be sure that the Spring Frameworks treats 
        // all paths as absolute paths. 
        // This is less problematic, because the Java platform will handle the
        // File and produce its absolute path, even if it has been created
        // using a relative one.
        ApplicationContext context = new FileSystemXmlApplicationContext(
                "file://" + path.getAbsolutePath());
        return (Distiller) context.getBean("distiller");
    }

    /**
     * Instantiates a Distiller object using the default configuration and
     * returns it. Please note that you should create a config.xml file and copy
     * the content of default.xml inside it to get the framework to work.
     *
     * @return a Distiller ready to work.
     */
    public static Distiller getDefaultXML() {
        ApplicationContext context = new ClassPathXmlApplicationContext("config.xml");
        return (Distiller) context.getBean("distiller");
    }

    /**
     * Instantiates a Distiller object using a configuration packaged in the
     * Distiller JAR file and returns it.
     *
     * @param configPath the path of the pipeline
     * @return a Distiller ready to work.
     */
    public static Distiller loadFromPackagedXML(String configPath) {
        
        ApplicationContext context = new ClassPathXmlApplicationContext(configPath);
        return (Distiller) context.getBean("distiller");
    }

    public static Distiller getDefaultCode() {
        Distiller d = new Distiller();

        // set the language detector tool
        d.setLanguageDetector(new CybozuLanguageDetectorAnnotator());

        // build the pipeline
        Pipeline p = new Pipeline();
        // split the text
        p.addStage(new OpenNlpBootstrapperAnnotator());
        // add wikipedia tags to tokens

        //annotate tokens with stemming
        p.addStage(new PorterStemmerAnnotator());
        // Uncomment the lines below to use the TagMe service
        // TagMeTokenAnnotator tagme = new TagMeTokenAnnotator();        
        // tagme.setApiKey("INSERT KEY HERE");        
        // p.addStage(tagme);
        // generate ngrams
        p.addStage(new SimpleNGramGeneratorAnnotator());

        // remove stopwords
        p.addStage(new StopwordSimpleFilterAnnotator());

        // annotate ngrams
        p.addStage(new StatisticalAnnotator());

        // Uncomment to use TagMe
        // p.addStage(new TagMeGramAnnotator());
        // Uncomment to use the emotional intensity annotator.
        // This way you'll see how different annotators lead to different
        // keyphrases detection
        // p.addStage(new SyuzhetAnnotator());
        // evaluate ngram features        
        LinearEvaluatorAnnotator evaluator = new LinearEvaluatorAnnotator();
        evaluator.addWeight(StatisticalAnnotator.DEPTH, 0.15);
        evaluator.addWeight(StatisticalAnnotator.HEIGHT, 0.25);
        evaluator.addWeight(StatisticalAnnotator.LIFESPAN, 0.1);
        evaluator.addWeight(StatisticalAnnotator.FREQUENCY_SENTENCE, 0.1);
        evaluator.addWeight(GenericNGramGeneratorAnnotator.NOUNVALUE, 0.3);
        evaluator.addWeight(GenericWikipediaAnnotator.WIKIFLAG, 0.1);

        p.addStage(evaluator);

        // Uncomment the line below to infer concepts.
        // Watch out: the inference process sends lots of requests to Wikipedia, 
        // so it significantly slows down the process
        // p.addStage(new WikipediaInferenceAnnotator());
        // filter results
        p.addStage(new SkylineGramFilterAnnotator());

        // remove redundant grams
        //p.addStage(new GramMergerAnnotator());
        p.addStage(new GramPrinter());
        p.addStage(new SentencePrinter());

        d.addPipeline(Locale.ENGLISH, p);
        d.addPipeline(Locale.ITALIAN, p);

        return d;
    }

    public static Distiller getStanfordCode() {
        Distiller d = new Distiller();

        // set the language detector tool
        d.setLanguageDetector(new CybozuLanguageDetectorAnnotator());

        // build the pipeline
        Pipeline p = new Pipeline();
        // split the text
        p.addStage(new StanfordBootstrapperAnnotator());
        // add wikipedia tags to tokens

        //annotate tokens with stemming
        p.addStage(new PorterStemmerAnnotator());

        // Uncomment the lines below to use the TagMe service
        // TagMeTokenAnnotator tagme = new TagMeTokenAnnotator();        
        // tagme.setApiKey("INSERT KEY HERE");        
        // p.addStage(tagme);
        // generate ngrams
        p.addStage(new SimpleNGramGeneratorAnnotator());

//        // remove stopwords
        p.addStage(new StopwordSimpleFilterAnnotator());
//
//        // annotate ngrams
        p.addStage(new StatisticalAnnotator());
        p.addStage(new CoreferenceResolverAnnotator());
        p.addStage(new ChunkingNerAnnotator());
        // Uncomment to use TagMe
        // p.addStage(new TagMeGramAnnotator());
        // Uncomment to use the emotional intensity annotator.
        // This way you'll see how different annotators lead to different
        // keyphrases detection
        // p.addStage(new SyuzhetAnnotator());
        // evaluate ngram features        
        LinearEvaluatorAnnotator evaluator = new LinearEvaluatorAnnotator();
        evaluator.addWeight(StatisticalAnnotator.DEPTH, 0.15);
        evaluator.addWeight(StatisticalAnnotator.HEIGHT, 0.25);
        evaluator.addWeight(StatisticalAnnotator.LIFESPAN, 0.1);
        evaluator.addWeight(StatisticalAnnotator.FREQUENCY_SENTENCE, 0.1);
        evaluator.addWeight(GenericNGramGeneratorAnnotator.NOUNVALUE, 0.3);
        evaluator.addWeight(GenericWikipediaAnnotator.WIKIFLAG, 0.1);
        evaluator.addWeight(CoreferenceResolverAnnotator.NUMBER_OF_REFERENCE, 0.2);
        evaluator.addWeight(CoreferenceResolverAnnotator.IN_ANAPHORA, 0.2);
        evaluator.addWeight(ChunkingNerAnnotator.IS_NER, 0.2);

        p.addStage(evaluator);

        p.addStage(new GramPrinter());

        d.addPipeline(Locale.ENGLISH, p);

        return d;
    }

}