/*
* Concept profile generation tool suite
* Copyright (C) 2015 Biosemantics Group, Erasmus University Medical Center,
* Rotterdam, The Netherlands
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published
* by the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>
*/
package JochemBuilder.EvaluationScripts;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import org.erasmusmc.utilities.ReadTextFile;
import org.erasmusmc.utilities.WriteTextFile;
public class GetResultsFromChemicalCorpusIndexation {
public void run(String home, String corpusFile, String falsePosPositionsFileName, String falseNegPositionsFileName, String truePosPositionsFileName, String analysisResultFileName){
WriteTextFile outPutFile = new WriteTextFile(analysisResultFileName);
ReadTextFile corpus = new ReadTextFile(home+corpusFile);
Map<String,List<String>> corpusMap = getKolarikAbstractsAsMap(corpus);
ReadTextFile truePosResultsfile = new ReadTextFile(truePosPositionsFileName);
HashMap<String,List<String>> truePosMap = getPositionsAsMap(truePosResultsfile);
checkPositions("true positive", corpusMap, truePosMap, outPutFile);
ReadTextFile falsePosResultsfile = new ReadTextFile(falsePosPositionsFileName);
HashMap<String,List<String>> falsePosMap = getPositionsAsMap(falsePosResultsfile);
checkPositions("false positive", corpusMap, falsePosMap, outPutFile);
ReadTextFile falseNegResultsfile = new ReadTextFile(falseNegPositionsFileName);
HashMap<String,List<String>> falseNegMap = getPositionsAsMap(falseNegResultsfile);
checkPositions("false negative", corpusMap, falseNegMap, outPutFile);
outPutFile.close();
}
private HashMap<String,List<String>> getPositionsAsMap(ReadTextFile file){
String pmid = "";
HashMap<String,List<String>> posMap = new HashMap<String,List<String>>();
Iterator<String> Iterator = file.getIterator();
while (Iterator.hasNext()) {
String line = Iterator.next();
if (line.startsWith("###")){
pmid = line.substring(4);
} else if (line.length()!=0){
List<String> positions = posMap.get(pmid);
if (positions==null){
positions = new ArrayList<String>();
}
positions.add(line);
posMap.put(pmid, positions);
}
}
return posMap;
}
private Map<String,List<String>> getKolarikAbstractsAsMap(ReadTextFile corpus){
Map<String,List<String>> abstracts = new HashMap<String,List<String>>();
String corpusPmid = "";
Iterator<String> fileIterator = corpus.getIterator();
while (fileIterator.hasNext()) {
String line = fileIterator.next();
if (line.startsWith("###")){
corpusPmid = line.substring(4);
}
else if (line.length()!=0){
List<String> abs = abstracts.get(corpusPmid);
if (abs==null){
abs = new ArrayList<String>();
}
abs.add(line);
abstracts.put(corpusPmid, abs);
}
}
return abstracts;
}
private void checkPositions(String type, Map<String, List<String>> corpusMap, Map<String,List<String>> positionsMap, WriteTextFile outPutFile){
int no = 0;
String pmid = "";
Iterator it = corpusMap.entrySet().iterator();
while (it.hasNext()) {
Map.Entry pairs = (Map.Entry)it.next();
pmid = pairs.getKey().toString();
if (positionsMap.containsKey(pmid)){
boolean inEntity = false;
String tokenClass = "";
String entity = "";
String prevEnd = "";
List<String> corpusLines = (List<String>) pairs.getValue();
List<String> positions = positionsMap.get(pmid);
for (String pair: positions){
String[] startAndEnd = pair.split("\t");
Integer startPos = Integer.parseInt(startAndEnd[0]);
Integer endPos = Integer.parseInt(startAndEnd[1]);
for (String line: corpusLines){
String[] columns = line.split("\t");
Integer start = Integer.parseInt(columns[1]);
Integer end = Integer.parseInt(columns[2]);
if (inEntity){
if (endPos.equals(end)){
if (start.equals(prevEnd))
entity = entity+columns[0];
else entity = entity+" "+columns[0];
no++;
outPutFile.writeln(type+"\t"+entity+"\t"+startPos+"\t"+endPos+"\t"+tokenClass+"\t"+pmid);
inEntity = false;
} else {
if (start.equals(prevEnd))
entity = entity+columns[0];
else entity = entity+" "+columns[0];
}
}
else if (startPos.equals(start)){
inEntity = true;
entity = columns[0];
tokenClass = columns[4];
if (endPos.equals(end)){
no++;
outPutFile.writeln(type+"\t"+entity+"\t"+startPos+"\t"+endPos+"\t"+tokenClass+"\t"+pmid);
inEntity = false;
}
}
prevEnd = end.toString();
}
}
}
}
System.out.println("Number of "+type+" entities: "+no);
}
}