/**
*
*/
package conceptmapping;
import java.io.BufferedWriter;
import java.io.File;
import java.io.PrintWriter;
import java.io.StringWriter;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.Scanner;
import org.apache.log4j.Logger;
import org.jdom.Document;
import org.jdom.Element;
import org.jdom.input.SAXBuilder;
import org.jdom.xpath.XPath;
import outputter.ApplicationUtilities;
/**
* @author hong cui
* the cvs file output by CharaParser-Biocreative2012 version does have character text in it,
* making it difficult for biocruators to review the results
* This class adds text to the cvs file.
*
* the input cvs file has cols in this order:
* source characterid stateid description entity entitylabel entityid quality qualitylabel qualityid qualitynegated qualitynegatedlabel qnparentlabel qnparentid qualitymodifier qualitymodifierlabel qualitymodifierid entitylocator entitylocatorlabel entitylocatorid countt
*/
public class CharacterText2CSV {
private static final Logger LOGGER = Logger.getLogger(CharacterText2CSV.class);
Element root;
Path csvpath;
Path newcsv;
final static Charset ENCODING = StandardCharsets.UTF_8;
/**
*
*/
public CharacterText2CSV(String nexmlfilepath, String csvfilepath) {
try{
SAXBuilder builder = new SAXBuilder();
Document xml = builder.build(new File(nexmlfilepath));
root = xml.getRootElement();
csvpath = Paths.get(csvfilepath);
newcsv = Paths.get(csvfilepath.replaceFirst("\\.csv$", ".new.csv"));
}catch(Exception e){
LOGGER.error("", e);
}
}
public void addCharacaterText(){
ArrayList<String> rows = new ArrayList<String>();
try{
Scanner scanner = new Scanner(csvpath, ENCODING.name());
while (scanner.hasNextLine()){
String line = scanner.nextLine();
if(line.startsWith("\"source\"")){
rows.add(line);
continue;
}
String[] cols = line.split("\",\"");
String characterid = cols[1].trim();
XPath xpath = XPath.newInstance("//nex:char[@states='"+characterid+"']");
Element e = (Element) xpath.selectSingleNode(root);
String text = "";
if(e.getAttribute("label")!=null) text = e.getAttributeValue("label");
text = newLine(cols, text);
System.out.println("fetch character text:"+text);
rows.add(text); //insert the new line with character text
rows.add(line); //add the original line
}
System.out.println("creating new csv file");
write(rows);
scanner.close();
System.out.println("new csv file is at "+newcsv.getFileName());
}catch(Exception e){
LOGGER.error("", e);
}
}
private String newLine(String[] cols, String charactertext){
StringBuffer sb = new StringBuffer();
sb.append("\""+cols[0].replaceFirst("^\"", "")+"\","); //source
sb.append("\""+cols[1]+"\","); //characterid
sb.append("\"\","); //stateid
sb.append("\""+charactertext+"\","); //description
for(int i = 4; i < cols.length; i++){
sb.append("\"\","); //all other cols
}
return sb.toString().replaceFirst(",$", "").trim();
}
void write(ArrayList<String> aLines) throws Exception {
try (BufferedWriter writer = Files.newBufferedWriter(newcsv, ENCODING)){
for(String line : aLines){
writer.write(line);
writer.newLine();
}
}
}
/**
* @param args
*/
public static void main(String[] args) {
String nexml = "C:/Users/updates/CharaParserTest/EQ-OLeary2013/Trail_12_April/source/OLeary_et_al_2013.xml";
String csv = "C:/Users/updates/CharaParserTest/EQ-OLeary2013/mammal-output.csv";
CharacterText2CSV ctc = new CharacterText2CSV(nexml, csv);
ctc.addCharacaterText();
}
}