/** * */ package fna.parsing; import java.io.BufferedWriter; import java.io.File; import java.io.FileInputStream; import java.io.FileOutputStream; import java.io.FileWriter; import java.io.IOException; import java.nio.channels.FileChannel; import org.apache.log4j.Logger; import org.jdom.Document; import org.jdom.Element; import org.jdom.input.SAXBuilder; import org.jdom.output.XMLOutputter; import org.jdom.xpath.XPath; /** * @author Hong Updates * For extracted XML formated descriptions from Treatise */ @SuppressWarnings({ "unused" }) public class Type2Transformer extends Thread { //private File source =new File(Registry.SourceDirectory); //a folder of text documents to be annotated private File source = new File(Registry.SourceDirectory); //File target = new File(Registry.TargetDirectory); //File target = new File("Z:\\DATA\\Plazi\\2ndFetchFromPlazi\\target-taxonX-ants-trash"); //private String tableprefix = "plazi_ants"; //target folder File target = new File(Registry.TargetDirectory); //private String tableprefix = "plazi_ants"; private XMLOutputter outputter = null; // this is the dataprfix from general tab private String dataprefix = null; private ProcessListener listener; protected static final Logger LOGGER = Logger.getLogger(CharacterStatementsTransformer.class); /** * @param listener * @param dataprefix */ public Type2Transformer(ProcessListener listener, String dataprefix) { this.listener = listener; this.dataprefix = dataprefix; File target = new File(Registry.TargetDirectory); Utilities.resetFolder(target, "descriptions"); Utilities.resetFolder(target, "transformed"); Utilities.resetFolder(target, "descriptions-dehyphened"); Utilities.resetFolder(target, "markedup"); Utilities.resetFolder(target, "final"); Utilities.resetFolder(target, "co-occurrence"); } public void run(){ listener.setProgressBarVisible(true); transform(); listener.setProgressBarVisible(false); } /** * just take the content of <description>s out and save them in the target folder */ public void transform(){ try{ /*Runtime r = Runtime.getRuntime(); String src = "\""+source.getAbsolutePath()+"\""; String tgt = "\""+target.getAbsolutePath()+"\\transformed\""; String cmd = "copy "+src+" "+tgt; Process p = r.exec(cmd); int exitVal = p.waitFor(); if(exitVal>0){ throw new Exception("transformed not created"); }*/ File[] files = source.listFiles(); SAXBuilder builder = new SAXBuilder(); listener.progress(1); int total = files.length; for(int i = 0; i<total; i++){ File f = files[i]; String tgt = target.getAbsolutePath()+System.getProperty("file.separator")+"transformed"; File newFile = new File(tgt+System.getProperty("file.separator")+f.getName()); if(!newFile.exists()) newFile.createNewFile(); FileChannel inputChannel = new FileInputStream(f).getChannel(); FileChannel outputChannel = new FileOutputStream(newFile).getChannel(); inputChannel.transferTo(0,inputChannel.size(),outputChannel); Document doc = builder.build(f); Element root = doc.getRootElement(); Element descrp = (Element)XPath.selectSingleNode(root, "//treatment/description"); String text = descrp.getTextNormalize(); writeDescription2Descriptions(text,f.getName().replaceAll("xml$", "txt") ); //record the position for each paragraph. listener.progress((i+1)*100/total); listener.info((i)+"", f.getName()); } }catch(Exception e){ e.printStackTrace(); } } private void writeDescription2Descriptions(String textNormalize, String fn) { try { File file = new File(target+System.getProperty("file.separator")+"descriptions", fn); BufferedWriter out = new BufferedWriter(new FileWriter(file)); out.write(textNormalize); out.close(); // don't forget to close the output stream!!! } catch (IOException e) { e.printStackTrace(); LOGGER.error("Failed to output text file in Type2Transformer:outputDescriptionText", e); throw new ParsingException("Failed to output text file.", e); } } /** * */ public Type2Transformer() { // TODO Auto-generated constructor stub } }