/* Copyright 2006, Carnegie Mellon, All Rights Reserved */ package edu.cmu.minorthird.text; import java.io.File; import java.io.FileFilter; import java.io.IOException; import java.util.ArrayList; import java.util.Iterator; import java.util.List; import edu.cmu.minorthird.util.BasicCommandLineProcessor; import edu.cmu.minorthird.util.CommandLineProcessor; import edu.cmu.minorthird.util.IOUtil; import edu.cmu.minorthird.util.ProgressCounter; import edu.cmu.minorthird.util.gui.SmartVanillaViewer; import edu.cmu.minorthird.util.gui.ViewerFrame; /* * A more scalable version of a document-per-file TextBase with * standoff annotation. In a LabeledDirectory, annotations are * generated document-by-document, and each set of annotations is * saved in a .labels file for that document alone, rather than by * reading the entire collection into memory and annotating it at * once. * * @author William Cohen */ public class LabeledDirectory implements CommandLineProcessor.Configurable{ private static final FileFilter TEXT_FILE_FILTER=new FileFilter(){ @Override public boolean accept(File file){ return file.getName().endsWith(".txt"); } }; private File dir; private List<Annotator> annotatorList=new ArrayList<Annotator>(); private List<String> requireList=new ArrayList<String>(); private boolean resetAll=false; private String nameOfFileToView=null; /** A CommandLineProcessor for LabeledDirectory. */ public class MyCLP extends BasicCommandLineProcessor{ public void dir(String s){ dir=getDirectory(s); } public void annotate(String s){ annotatorList.add(getAnnotator(s)); } public void require(String s){ requireList.add(s); } public void reset(){ resetAll=true; } public void view(String s){ nameOfFileToView=s; } } /** Return a CommandLineProcessor that be used to configure a LabeledDirectory */ @Override public CommandLineProcessor getCLP(){ return new MyCLP(); } public LabeledDirectory(){ ; } public LabeledDirectory(String s){ dir=getDirectory(s); } public LabeledDirectory(File dir){ this.dir=dir; } /** * Get the TextLabels which annotates this file. */ public MonotonicTextLabels getTextLabels(File f) throws IOException{ TextBase textBase=getTextBase(f); String stem=f.getName().substring(0,f.getName().length()-".txt".length()); File labelFile=new File(f.getParentFile(),stem+".labels"); if(labelFile.exists()) return new TextLabelsLoader().loadOps(textBase,labelFile); else return new BasicTextLabels(textBase); } /** * Get a TextBase that contains exactly this file. */ private TextBase getTextBase(File f) throws IOException{ String contents=IOUtil.readFile(f); BasicTextBase base=new BasicTextBase(); base.loadDocument("someFile",contents); return base; } /** * Re-label the text files in the directory. This uses lists of * annotators and 'require' calls that have been set up * previously. */ public void reLabelText() throws IOException{ File[] textFiles=dir.listFiles(TEXT_FILE_FILTER); if(textFiles==null) throw new IllegalArgumentException("can't list directory "+dir); ProgressCounter filePC= new ProgressCounter("labeling","file",textFiles.length); TextLabelsLoader loader=new TextLabelsLoader(); for(int j=0;j<textFiles.length;j++){ File fileJ=textFiles[j]; MonotonicTextLabels labels=getTextLabels(fileJ); if(resetAll) labels=new BasicTextLabels(labels.getTextBase()); for(Iterator<Annotator> k=annotatorList.iterator();k.hasNext();){ Annotator ann=k.next(); ann.annotate(labels); } for(Iterator<String> k=requireList.iterator();k.hasNext();){ String req=k.next(); labels.require(req,null); } String stem= fileJ.getName().substring(0,fileJ.getName().length()-".txt".length()); File labelFile=new File(dir,stem+".labels"); loader.saveTypesAsOps(labels,labelFile); filePC.progress(); } filePC.finished(); } // convert to a file and check that it is an existing directory private File getDirectory(String dirName){ File dir=new File(dirName); if(!dir.exists()||!dir.isDirectory()){ throw new IllegalArgumentException("not a directory: "+dirName); } return dir; } // load an annotator from a file private Annotator getAnnotator(String annotatorName){ File annFile=new File(annotatorName); try{ return (Annotator)IOUtil.loadSerialized(annFile); }catch(Exception ex){ throw new IllegalArgumentException("can't load annotator "+annotatorName+ ": "+ex); } } /** * A simple main program that allows you to add annotations to a directory of text files. */ public static void main(String[] args) throws IOException{ LabeledDirectory ld=new LabeledDirectory(); ld.getCLP().processArguments(args); if(ld.nameOfFileToView==null){ ld.reLabelText(); }else{ TextLabels labels=ld.getTextLabels(new File(ld.nameOfFileToView)); new ViewerFrame(ld.nameOfFileToView,new SmartVanillaViewer(labels)); } } }