/*
* Copyright (C) 2015 Artificial Intelligence
* Laboratory @ University of Udine.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*/
package it.uniud.ailab.dcore;
import it.uniud.ailab.dcore.annotation.annotators.*;
import it.uniud.ailab.dcore.io.GramPrinter;
import it.uniud.ailab.dcore.io.SentencePrinter;
import it.uniud.ailab.dcore.utils.FileSystem;
import it.uniud.ailab.dcore.wrappers.external.*;
import java.io.File;
import java.io.IOException;
import java.util.Locale;
import org.springframework.beans.factory.BeanDefinitionStoreException;
import org.springframework.context.ApplicationContext;
import org.springframework.context.support.ClassPathXmlApplicationContext;
import org.springframework.context.support.FileSystemXmlApplicationContext;
/**
* A simple factory that generates the default Distiller configuration either
* via XML configuration file or via Java code. We write this class also as a
* tutorial for the users of this library, who can learn how to instantiate the
* Distiller object studying this source code.
*
* @author Marco Basaldella
*/
public class DistillerFactory {
/**
* Instantiates a Distiller object using the default XML configuration; if
* it's not available, uses the safer (but less precise) code configuration,
* which excludes TagMe and inference from the distillation process.
*
* @return a Distiller ready to work.
*/
public static Distiller getDefault() {
// While the code under this comment may be ugly, it works.
//
// It tries to load the default XML config. If the load fails, throws
// the cause of the failure and immediately catches it.
//
// If the config file is not accessible (due to permission,
// non-existance, or whatever), the exception is caught and the default
// code configuration runs.
//
// Otherwise, the exception is re-thrown, so that the developer can
// handle the errors in the config file, which are the other most likely
// cause of failure of configuration loading falure.
try {
return getDefaultXML();
} catch (BeanDefinitionStoreException bsde) {
try {
throw bsde.getCause();
} catch (IOException ioe) {
// the configuration file does not exist or is not accessible:
// load the fallback configuration
System.out.println(
"Distiller config file not found: using fallback configuration");
return getDefaultCode();
} catch (Throwable te) {
throw bsde;
}
}
}
/**
* Instantiates a Distiller object using the default evaluation
* configuration.
*
* @return a Distiller ready to work.
*/
public static Distiller getDefaultEval() {
try {
ApplicationContext context = new ClassPathXmlApplicationContext("eval.xml");
return (Distiller) context.getBean("distiller");
} catch (BeanDefinitionStoreException bsde) {
try {
throw bsde.getCause();
} catch (IOException ioe) {
// the configuration file does not exist or is not accessible:
throw new RuntimeException("FATAL: Impossible to load the default evaluation pipeline.", ioe);
} catch (Throwable te) {
throw bsde;
}
}
}
/**
* Instantiates a Distiller object using the specified configuration and
* returns it.
*
* @param path the path where the config file is located
* @return a Distiller ready to work.
*/
public static Distiller loadFromXML(File path) {
// We add the file:// thing before because, for some ??? reason,
// the Spring Framework decided that all paths are relative paths.
// So, we get the file, retrieve is absolute path, and then add the
// file:// prefix to be sure that the Spring Frameworks treats
// all paths as absolute paths.
// This is less problematic, because the Java platform will handle the
// File and produce its absolute path, even if it has been created
// using a relative one.
ApplicationContext context = new FileSystemXmlApplicationContext(
"file://" + path.getAbsolutePath());
return (Distiller) context.getBean("distiller");
}
/**
* Instantiates a Distiller object using the default configuration and
* returns it. Please note that you should create a config.xml file and copy
* the content of default.xml inside it to get the framework to work.
*
* @return a Distiller ready to work.
*/
public static Distiller getDefaultXML() {
ApplicationContext context = new ClassPathXmlApplicationContext("config.xml");
return (Distiller) context.getBean("distiller");
}
/**
* Instantiates a Distiller object using a configuration packaged in the
* Distiller JAR file and returns it.
*
* @param configPath the path of the pipeline
* @return a Distiller ready to work.
*/
public static Distiller loadFromPackagedXML(String configPath) {
ApplicationContext context = new ClassPathXmlApplicationContext(configPath);
return (Distiller) context.getBean("distiller");
}
public static Distiller getDefaultCode() {
Distiller d = new Distiller();
// set the language detector tool
d.setLanguageDetector(new CybozuLanguageDetectorAnnotator());
// build the pipeline
Pipeline p = new Pipeline();
// split the text
p.addStage(new OpenNlpBootstrapperAnnotator());
// add wikipedia tags to tokens
//annotate tokens with stemming
p.addStage(new PorterStemmerAnnotator());
// Uncomment the lines below to use the TagMe service
// TagMeTokenAnnotator tagme = new TagMeTokenAnnotator();
// tagme.setApiKey("INSERT KEY HERE");
// p.addStage(tagme);
// generate ngrams
p.addStage(new SimpleNGramGeneratorAnnotator());
// remove stopwords
p.addStage(new StopwordSimpleFilterAnnotator());
// annotate ngrams
p.addStage(new StatisticalAnnotator());
// Uncomment to use TagMe
// p.addStage(new TagMeGramAnnotator());
// Uncomment to use the emotional intensity annotator.
// This way you'll see how different annotators lead to different
// keyphrases detection
// p.addStage(new SyuzhetAnnotator());
// evaluate ngram features
LinearEvaluatorAnnotator evaluator = new LinearEvaluatorAnnotator();
evaluator.addWeight(StatisticalAnnotator.DEPTH, 0.15);
evaluator.addWeight(StatisticalAnnotator.HEIGHT, 0.25);
evaluator.addWeight(StatisticalAnnotator.LIFESPAN, 0.1);
evaluator.addWeight(StatisticalAnnotator.FREQUENCY_SENTENCE, 0.1);
evaluator.addWeight(GenericNGramGeneratorAnnotator.NOUNVALUE, 0.3);
evaluator.addWeight(GenericWikipediaAnnotator.WIKIFLAG, 0.1);
p.addStage(evaluator);
// Uncomment the line below to infer concepts.
// Watch out: the inference process sends lots of requests to Wikipedia,
// so it significantly slows down the process
// p.addStage(new WikipediaInferenceAnnotator());
// filter results
p.addStage(new SkylineGramFilterAnnotator());
// remove redundant grams
//p.addStage(new GramMergerAnnotator());
p.addStage(new GramPrinter());
p.addStage(new SentencePrinter());
d.addPipeline(Locale.ENGLISH, p);
d.addPipeline(Locale.ITALIAN, p);
return d;
}
public static Distiller getStanfordCode() {
Distiller d = new Distiller();
// set the language detector tool
d.setLanguageDetector(new CybozuLanguageDetectorAnnotator());
// build the pipeline
Pipeline p = new Pipeline();
// split the text
p.addStage(new StanfordBootstrapperAnnotator());
// add wikipedia tags to tokens
//annotate tokens with stemming
p.addStage(new PorterStemmerAnnotator());
// Uncomment the lines below to use the TagMe service
// TagMeTokenAnnotator tagme = new TagMeTokenAnnotator();
// tagme.setApiKey("INSERT KEY HERE");
// p.addStage(tagme);
// generate ngrams
p.addStage(new SimpleNGramGeneratorAnnotator());
// // remove stopwords
p.addStage(new StopwordSimpleFilterAnnotator());
//
// // annotate ngrams
p.addStage(new StatisticalAnnotator());
p.addStage(new CoreferenceResolverAnnotator());
p.addStage(new ChunkingNerAnnotator());
// Uncomment to use TagMe
// p.addStage(new TagMeGramAnnotator());
// Uncomment to use the emotional intensity annotator.
// This way you'll see how different annotators lead to different
// keyphrases detection
// p.addStage(new SyuzhetAnnotator());
// evaluate ngram features
LinearEvaluatorAnnotator evaluator = new LinearEvaluatorAnnotator();
evaluator.addWeight(StatisticalAnnotator.DEPTH, 0.15);
evaluator.addWeight(StatisticalAnnotator.HEIGHT, 0.25);
evaluator.addWeight(StatisticalAnnotator.LIFESPAN, 0.1);
evaluator.addWeight(StatisticalAnnotator.FREQUENCY_SENTENCE, 0.1);
evaluator.addWeight(GenericNGramGeneratorAnnotator.NOUNVALUE, 0.3);
evaluator.addWeight(GenericWikipediaAnnotator.WIKIFLAG, 0.1);
evaluator.addWeight(CoreferenceResolverAnnotator.NUMBER_OF_REFERENCE, 0.2);
evaluator.addWeight(CoreferenceResolverAnnotator.IN_ANAPHORA, 0.2);
evaluator.addWeight(ChunkingNerAnnotator.IS_NER, 0.2);
p.addStage(evaluator);
p.addStage(new GramPrinter());
d.addPipeline(Locale.ENGLISH, p);
return d;
}
}