package test.java.integration.helpers;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.Map;
import java.util.Scanner;
import java.util.StringTokenizer;
import com.datascience.utils.CostMatrix;
public class FileReaders {
public FileReaders(){
}
/**
* Loads the misclassification cost data
* @param filename
* @throws FileNotFoundException
*/
private static final String defaultEncoding = "UTF-8";
public CostMatrix<String> loadCostMatrix(String filename)
throws FileNotFoundException{
CostMatrix<String> matrix = new CostMatrix<String>();
FileInputStream stream = new FileInputStream(filename);
Scanner scanner = new Scanner(stream, defaultEncoding);
String line, categoryFrom, categoryTo;
double cost;
while (scanner.hasNextLine()) {
line = scanner.nextLine();
StringTokenizer st = new StringTokenizer(line, "\t");
categoryFrom = st.nextToken();
categoryTo = st.nextToken();
cost = Double.valueOf(st.nextToken());
matrix.add(categoryFrom, categoryTo, cost);
}
return matrix;
}
/**
* Loads the summary.txt file with formatting <Worker><tabulation><Errors><tabulation><Quality><tabulation><Submissions>
* @param filename
* Name of file containing the expected worker summary
* @return LinkedList that contains the summary of the quality reports
*/
public LinkedList <Map<String, Object>> loadWorkerSummaryFile(String filename)
throws FileNotFoundException {
LinkedList <Map<String, Object>> workerSummaries = new LinkedList<Map<String, Object>>();
FileInputStream stream = new FileInputStream(filename);
Scanner scanner = new Scanner(stream, defaultEncoding);
String line, workerName, errorRate, quality, submissions;
//scan the first line (containing the titles) - don't add it into the results list
line = scanner.nextLine();
while (scanner.hasNextLine()) {
line = scanner.nextLine();
StringTokenizer st = new StringTokenizer(line, "\t");
workerName = st.nextToken();
errorRate = st.nextToken();
quality = st.nextToken();
submissions = st.nextToken();
Map<String, Object> workerSummary = new HashMap<String, Object>();
workerSummary.put("Worker", workerName);
workerSummary.put("Error rate", errorRate);
workerSummary.put("Quality (Expected)", quality);
workerSummary.put("Number of Annotations", submissions);
workerSummaries.add(workerSummary);
}
scanner.close();
return workerSummaries;
}
}