package experiments.collective.entdoccentric;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import com.google.gson.Gson;
import experiments.collective.entdoccentric.calbc.Concept;
import experiments.collective.entdoccentric.filter.Filter;
import experiments.evaluation.UnicodeBOMInputStream;
public abstract class QueryDataGeneration {
protected File jsonFile;
protected BufferedReader bufferedReader;
protected Gson gson;
protected Filter filter;
public QueryDataGeneration() {
initialize();
}
public QueryDataGeneration(Filter filter) {
initialize();
this.filter = filter;
}
private void initialize() {
if (StartEvaluation.calbcJSON == null) {
// StartEvaluation.calbcJSON =
// "/home/quh/Arbeitsfläche/Code_Data/Calbc/wrongoutput_10p_corrected.json";
StartEvaluation.calbcJSON = "/home/quh/Arbeitsfläche/Code_Data/Calbc/output.json";
}
jsonFile = new File(StartEvaluation.calbcJSON);
gson = new Gson();
try {
FileInputStream fis = new FileInputStream(jsonFile);
UnicodeBOMInputStream ubis = new UnicodeBOMInputStream(fis);
InputStreamReader isr = new InputStreamReader(ubis);
bufferedReader = new BufferedReader(isr);
ubis.skipBOM();
} catch (IOException e) {
e.printStackTrace();
}
}
protected LinkedList<String> setResultLinks(StandardQueryDataObject obj,
List<Concept> concepts) {
LinkedList<String> result = new LinkedList<String>();
for (Concept concept : concepts) {
String[] splitter = concept.getUrl().split(":");
String link = "";
if (splitter[1].equalsIgnoreCase("uniprot")
&& !splitter[2].equalsIgnoreCase("") && splitter[2] != null) {
link = "UN_" + splitter[2];
} else if (splitter[1].equalsIgnoreCase("entrezgene")
&& !splitter[2].equalsIgnoreCase("") && splitter[2] != null) {
link = "NC_" + splitter[2];
} else if (splitter[1].equalsIgnoreCase("umls")
&& !splitter[2].equalsIgnoreCase("") && splitter[2] != null) {
link = "LI_" + splitter[2];
} else if (splitter[1].equalsIgnoreCase("ncbi")
&& !splitter[2].equalsIgnoreCase("") && splitter[2] != null) {
link = "NC_" + splitter[2];
} else if (splitter[1].equalsIgnoreCase("disease")
&& !splitter[2].equalsIgnoreCase("") && splitter[2] != null) {
link = "LI_" + splitter[2];
}
if (!link.equalsIgnoreCase("")) {
result.add(link);
}
}
return result;
}
protected String extractText(int position, String text) {
long startArea = position - StartEvaluation.contextArea;
long endArea = position + StartEvaluation.contextArea;
// System.out.println(StartEvaluation.contextArea);
if (startArea < 0) {
startArea = 0;
}
if (endArea > text.length() - 1) {
endArea = text.length() - 1;
}
String tempText = text.substring((int) startArea, (int) endArea);
String[] splitter = tempText.split(" ");
String result = "";
for (int i = 1; i < splitter.length - 1; i++) {
result += splitter[i] + " ";
}
// System.out.println(result);
return result;
}
public abstract StandardQueryDataObject hasNext();
}