package arkref.ace;
import java.io.File;
import java.io.IOException;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import arkref.parsestuff.U;
public class AcePreprocess {
public static void go(String path1) throws IOException {
String shortpath = arkref.analysis.Preprocess.shortPath(path1);
shortpath = shortpath.replace("_APF.XML", "");
String sgmlFilename = shortpath + ".SGM";
assert new File(sgmlFilename).exists();
String sgml = U.readFile(sgmlFilename);
Pattern p = Pattern.compile("<TEXT>(.*)</TEXT>", Pattern.DOTALL);
Matcher m = p.matcher(sgml);
m.find();
String text = m.group(1);
U.writeFile(text, shortpath + ".txt");
}
public static void main(String args[]) throws IOException {
for (String arg : args) {
if (args.length > 1) U.pf("DOC\t%s\n", arg);
go(arg);
}
}
}