package de.berlin.hu.uima.ae.feature;
import java.io.IOException;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.uima.UIMAException;
import org.apache.uima.UIMAFramework;
import org.apache.uima.jcas.JCas;
import org.apache.uima.resource.metadata.TypeSystemDescription;
import org.apache.uima.util.InvalidXMLException;
import org.apache.uima.util.XMLInputSource;
import org.uimafit.factory.JCasFactory;
import de.berlin.hu.chemspot.ChemSpot;
import de.berlin.hu.types.PubmedDocument;
public class FeatureGeneratorApp {
private static ChemSpot chemspot = null;
private static TypeSystemDescription typeSystem = null;
private static Map<Long, JCas> jCases = new HashMap<Long, JCas>();
public static void initialize(String pathToDictionaryFile, String pathToIDsFile) {
chemspot = new ChemSpot(null, pathToDictionaryFile, null, pathToIDsFile, null);
try {
typeSystem = UIMAFramework.getXMLParser().parseTypeSystemDescription(new XMLInputSource(chemspot.getClass().getClassLoader().getResource("desc/TypeSystem.xml")));
} catch (InvalidXMLException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
public static List<FeatureToken> generateFeatureTokens(String text) {
// get JCas object for currently executed thread
long threadId = Thread.currentThread().getId();
// create new jcas if necessary (i.e. a thread calls this method for the first time)
if (!jCases.containsKey(threadId)) {
synchronized (jCases) {
try {
jCases.put(threadId, JCasFactory.createJCas(typeSystem));
} catch (UIMAException e) {
throw new RuntimeException(e);
}
}
}
// get jcas
JCas jcas = jCases.get(threadId);
jcas.reset();
jcas.setDocumentText(text);
PubmedDocument pd = new PubmedDocument(jcas);
pd.setBegin(0);
pd.setEnd(text.length());
pd.setPmid("");
pd.addToIndexes(jcas);
chemspot.tag(jcas);
System.out.println(ChemSpot.serializeAnnotations(jcas));
return chemspot.getFeatureTokenGenerator().getFeatureTokens(jcas);
}
public static void main(String[] args) throws InvalidXMLException, IOException {
String pathToDictionaryFile = null;//"../../data/dict.zip";
String pathToIDsFile = null;//"../../data/ids.zip";
// String text = "We examined the effect of exogenous estradiol on the changes in serum steroid hormone levels. Induced by a nonlethal dose of Escherichia coli endotoxin in male rats and the deaths due to nonlethal and lethal doses of endotoxin.";
// String text = "A serum of 18-bromo-12-butyl-11-chloro-4,8-diethyl-5-hydroxy-15-methoxy is great.";
String text = "A serum of 18-bromo-12-butyl-11-bromo-4,8-diethyl-5-hydroxy is great for combination with aspirin and Lysophosphatidate as well as LoseC and associated names and Antidiarrheals.";
initialize(pathToDictionaryFile, pathToIDsFile);
List<FeatureToken> featureTokens = generateFeatureTokens(text);
for (FeatureToken token : featureTokens) {
System.out.printf("%d-%d\t%s\t%s%n", token.getBegin(), token.getEnd(), token.getCoveredText(), token.getFeatures().toString());
}
}
}