import edu.cmu.minorthird.util.*;
import edu.cmu.minorthird.text.*;
import edu.cmu.minorthird.text.gui.*;
import edu.cmu.minorthird.text.learn.*;
import edu.cmu.minorthird.text.learn.experiments.*;
import edu.cmu.minorthird.text.mixup.*;
import edu.cmu.minorthird.classify.*;
import edu.cmu.minorthird.classify.experiments.*;
import edu.cmu.minorthird.classify.algorithms.linear.*;
import edu.cmu.minorthird.classify.algorithms.trees.*;
import java.util.*;
import java.util.regex.*;
import java.io.*;
import org.apache.log4j.*;
/**
* This creates encapsulated Minorthird annotators for all of the
* text-based SLIF components. The released components will be places
* in the subdirectory 'dist'. Certain tmp files are placed in
* 'dist/helper'; these are not needed by the final encapsulated
* annotators in 'dist'.
*
*/
/* Currently, only the CRF-based extractors actually work. The
* DictHMMs need retrained, or else need a fix to SVM serialization.
* The semiCRFs have some other problem.
*/
public class MakeReleasableComponents
{
final static String[] PROTEIN_ANNOTATORS = new String[]{
"CRFonUt","CRFonYapex","CRFongenia",
"DictHMMonGenia","DictHMMonUt","DictHMMonYapex",
"semiCRFongenia","semiCRFonut","semiCRFonyapex"
};
final static String[] IMG_PTR_CLASSES = new String[]{
"local","regional"
};
static public void main(String[] args) throws IOException
{
String sep = File.pathSeparator;
File distDir = new File("dist"); // for things to distribute
File helperDir = new File(distDir,"helper"); // temporary items
if (!distDir.exists()) distDir.mkdir();
if (!helperDir.exists()) helperDir.mkdir();
// create the necessary helper files for the caption scoping
// copy {scope,features}.mixup from lib to helperDir
String scopeString = IOUtil.readFile(new File("lib/scope.mixup"));
setFileContents(scopeString,new File(helperDir,"scope.mixup"));
String featuresString = IOUtil.readFile(new File("lib/features.mixup"));
setFileContents(featuresString,new File(helperDir,"features.mixup"));
// local.mixup calls the localFilter-annotator
setFileContents("provide 'local';\nannotateWith 'localFilter.ann';\n", new File(helperDir,"local.mixup"));
// regional.mixup calls the regionalFilter-annotator
setFileContents("provide 'regional';\nannotateWith 'regionalFilter.ann';\n", new File(helperDir,"regional.mixup"));
// caption.mixup calls the local, regional, and scope.mixup
setFileContents("provide 'caption';\nrequire 'local';require 'regional';require 'scope';require 'imagePointer','ImagePointerAnnotator';\n",
new File(helperDir,"caption.mixup"));
///
// convert trained filters (eg localFilter.ser) to encapsulated annotators
//
for (int i=0; i<IMG_PTR_CLASSES.length; i++) {
String ci = IMG_PTR_CLASSES[i];
// turn the filter for class ci into an annotator
BinaryClassifier filter = (BinaryClassifier)IOUtil.loadSerialized(new File("lib/"+ci+"Filter.ser"));
FinderAnnotator filterAnnotator =
new FinderAnnotator(new FilteredFinder(filter,new ImgPtrFE(),LearnImagePtrExtractor.candidateFinder),
ci);
String annotatorFileName = ci+"Filter.ann";
IOUtil.saveSerialized(filterAnnotator,new File(helperDir,annotatorFileName));
// this stuff is needed to encapsulate the annotator
makeFilterHelper(annotatorFileName,ci);
String pathi =
//"class/ImgPtrFE.class" +sep+
"dist/helper/"+annotatorFileName +sep+
"dist/helper/"+ci+".mixup";
EncapsulatedAnnotator anni = new EncapsulatedAnnotator(ci,pathi);
System.out.println("encapsulating "+ci+" with "+pathi);
IOUtil.saveSerialized(anni,new File(helperDir,ci+"Filter.eann"));
}
// build the annotator for scopes, which uses the filter annotators
String path0 =
//"class/ImagePointerAnnotator.class" +sep+
//"class/ImgPtrFE.class" +sep+
"dist/helper/scope.mixup" +sep+
"dist/helper/local.mixup" +sep+
"dist/helper/regional.mixup" +sep+
"dist/helper/regionalFilter.eann" +sep+
"dist/helper/regionalFilter.ann" +sep+
"dist/helper/localFilter.eann" +sep+
"dist/helper/localFilter.ann" +sep+
"dist/helper/features.mixup" +sep+
"dist/helper/caption.mixup";
System.out.println("encapsulating caption with "+path0);
EncapsulatedAnnotator ann0 = new EncapsulatedAnnotator("caption",path0);
IOUtil.saveSerialized(ann0,new File(distDir,"Caption.eann"));
// build encapsulated versions of the various protein entity extractors
String[] exportedTypes = new String[]{"protein","fillMeInLater"};
for (int i=0; i<PROTEIN_ANNOTATORS.length; i++) {
String pi = PROTEIN_ANNOTATORS[i];
// fix annoying inconsistencies
String cleanPi = pi
.replaceAll("Ut","Texas")
.replaceAll("ut","Texas")
.replaceAll("yapex","Yapex")
.replaceAll("genia","Genia")
.replaceAll("semi","Semi");
String req = "protein"+cleanPi;
makeHelper(pi,cleanPi,req);
String path = "lib/"+pi +sep+ "lib/proteinFeatures.mixup" +sep+ "lib/newproteinFeatures.mixup"
+sep+ "dist/helper/protein"+cleanPi+".mixup";
exportedTypes[1] = "proteinFrom"+cleanPi;
System.out.println("encapsulating "+pi);
EncapsulatedAnnotator ann = new EncapsulatedAnnotator(req,path,exportedTypes);
IOUtil.saveSerialized(ann,new File(distDir,cleanPi+".eann"));
}
//wrap the cell line annotator in something that outputs 'cell line' instead of _predicted
makeCellLineHelper();
EncapsulatedAnnotator cann = new EncapsulatedAnnotator("cellLine","lib/CellLine.eann"+sep+"dist/helper/cellLine.mixup",new String[]{"cellLine"});
IOUtil.saveSerialized((Serializable)cann,new File(distDir,"CellLine.eann"));
}
// write a 'helper' mixup file into dist/helper, which will rename 'prediction' appropriately, for protein annotators
static private void makeHelper(String annotator,String cleanAnnotator,String requiredAnnotation) throws IOException,FileNotFoundException
{
PrintWriter out = new PrintWriter(new BufferedOutputStream(new FileOutputStream(new File("dist/helper/"+requiredAnnotation+".mixup"))));
out.println("provide '"+requiredAnnotation+"';");
out.println();
out.println("annotateWith "+annotator+";");
out.println("defSpanType protein =_prediction: [...];");
out.println("defSpanType proteinFrom"+cleanAnnotator+" =_prediction: [...];");
out.close();
}
// write a 'helper' mixup file for the cell line annotator
static private void makeCellLineHelper() throws IOException,FileNotFoundException
{
PrintWriter out = new PrintWriter(new BufferedOutputStream(new FileOutputStream(new File("dist/helper/cellLine.mixup"))));
out.println("provide 'cellLine';");
out.println();
out.println("annotateWith 'CellLine.eann';");
out.println("defSpanType cellLine =_prediction: [...];");
out.close();
}
// write a 'helper' mixup file for the filter annotators
static private void makeFilterHelper(String annotatorFileName,String requiredAnnotation) throws IOException,FileNotFoundException
{
PrintWriter out = new PrintWriter(new BufferedOutputStream(new FileOutputStream(new File("dist/helper/"+requiredAnnotation+".mixup"))));
out.println("provide '"+requiredAnnotation+"';");
out.println();
out.println("annotateWith '"+annotatorFileName+"';");
out.close();
}
// set the contents of a file to be this string
static private void setFileContents(String s,File file) throws IOException,FileNotFoundException
{
PrintWriter out = new PrintWriter(new BufferedOutputStream(new FileOutputStream(file)));
out.print(s);
out.close();
}
}