package edu.cmu.minorthird.text; import java.io.File; import org.apache.log4j.Logger; /** * A no options loader. * It checks the given file/string and does one of two things: * 1) if Directory - load each file as a document assuming that labels are embedded * 2) if file - load each line as a document assuming the first word is a document name * @author ksteppe */ public class SimpleTextLoader { private static Logger log = Logger.getLogger(SimpleTextLoader.class); public static TextLabels load(File file, boolean externalLabelFile) { TextBase base = null; MutableTextLabels tempLabels = null; MutableTextLabels labels = null; try { if (!file.isDirectory()) { TextBaseLoader loader = new TextBaseLoader(TextBaseLoader.DOC_PER_LINE, false); base = loader.load(file); }else { TextBaseLoader loader = new TextBaseLoader(TextBaseLoader.DOC_PER_FILE, true); base = loader.load(file); tempLabels = loader.getLabels(); } if (base == null) base = tempLabels.getTextBase(); labels = new BasicTextLabels(base); if (externalLabelFile) { String fileName = file.getName(); if (fileName.lastIndexOf('.')>=0) fileName = fileName.substring(0, fileName.lastIndexOf('.')); fileName += ".labels"; File dir = file.getParentFile(); File labelsFile = new File(dir, fileName); new TextLabelsLoader().importOps(labels, base, labelsFile); } else if (tempLabels != null) labels = tempLabels; } catch (Exception e) { log.error(e, e); } return labels; } public static TextLabels load(String fileName, boolean externalLabelFile) { return load(new File(fileName), externalLabelFile); } //-------------------Instance stuff for the Wizard ---------------------------------------- public boolean labelFile = true; public SimpleTextLoader() {} public TextLabels load(File file) { return load(file, labelFile); } public TextLabels load(String fileName) { return load(fileName, labelFile); } public boolean isLabelFile() { return labelFile; } public void setLabelFile(boolean labelFile) { this.labelFile = labelFile; } //-------------------------------------------------------------------------------- }