package uk.ac.shef.dcs.jate.app;
import com.google.gson.Gson;
import com.google.gson.reflect.TypeToken;
import org.apache.commons.csv.CSVFormat;
import org.apache.solr.common.util.Pair;
import uk.ac.shef.dcs.jate.io.CSVFileOutputReader;
import uk.ac.shef.dcs.jate.io.FileOutputReader;
import uk.ac.shef.dcs.jate.io.JSONFileOutputReader;
import uk.ac.shef.dcs.jate.model.JATETerm;
import uk.ac.shef.dcs.jate.util.IOUtil;
import java.io.File;
import java.io.IOException;
import java.io.Writer;
import java.util.*;
/**
* Given the RANKED result of several different algorithms (must be applied to the same candidate set of terms,
* applying a weighted voting algorithm.
* <p>
* The new score will be the sum of (1.0 divided by the rank a term by each algorithm, scaled by the weight of that algorithm)
*/
public class Voting {
public static void main(String[] args) throws IOException {
String inFolder = "/home/zqz/GDrive/papers/cicling2017/data/seed-terms/genia";
String outFile="/home/zqz/GDrive/papers/cicling2017/data/seed-terms/genia/voted.json";
Map<String, Double> weights = new HashMap<>();
weights.put("genia_attf_seed_terms.json",1.0);
weights.put("genia_chisquare_seed_terms.json",1.0);
weights.put("genia_cvalue_seed_terms.json",1.0);
weights.put("genia_cvalue_seed_terms_mttf1.json",1.0);
weights.put("genia_glossex_seed_terms.json",1.0);
weights.put("genia_rake_seed_terms.json",1.0);
weights.put("genia_termex_seed_terms.json",1.0);
weights.put("genia_tfidf_seed_terms.json",1.0);
weights.put("genia_ttf_seed_terms.json",1.0);
weights.put("genia_weirdness_seed_terms.json",1.0);
weights.put("genia_text_rank_result.csv",1.0);
Map<String, FileOutputReader> readers = new HashMap<>();
FileOutputReader jsonFileOutputReader = new JSONFileOutputReader(new Gson());
FileOutputReader csvFileOutputReader = new CSVFileOutputReader(CSVFormat.DEFAULT);
readers.put("genia_attf_seed_terms.json",jsonFileOutputReader);
readers.put("genia_chisquare_seed_terms.json",jsonFileOutputReader);
readers.put("genia_cvalue_seed_terms.json",jsonFileOutputReader);
readers.put("genia_cvalue_seed_terms_mttf1.json",jsonFileOutputReader);
readers.put("genia_glossex_seed_terms.json",jsonFileOutputReader);
readers.put("genia_rake_seed_terms.json",jsonFileOutputReader);
readers.put("genia_termex_seed_terms.json",jsonFileOutputReader);
readers.put("genia_tfidf_seed_terms.json",jsonFileOutputReader);
readers.put("genia_ttf_seed_terms.json",jsonFileOutputReader);
readers.put("genia_weirdness_seed_terms.json",jsonFileOutputReader);
readers.put("genia_text_rank_result.csv",csvFileOutputReader);
Voting voting = new Voting();
Pair[] results = voting.readAlgorithmResults(inFolder, weights, readers);
List<JATETerm> newResult = voting.vote(results);
Writer w = IOUtil.getUTF8Writer(outFile);
new Gson().toJson(newResult, w);
w.close();
}
/**
* the program will look for files in the pattern '[algorithm_name].[ext]'
*
* @param inFolder that contains output of algorithms
* @param algAndWeight a map that contains algorithm name and its weight
* @return
*/
public Pair[] readAlgorithmResults(String inFolder,
Map<String, Double> algAndWeight,
Map<String, FileOutputReader> algAndReader) {
Pair[] pairs = new Pair[algAndWeight.size()];
int i=0;
for(Map.Entry<String, Double> en: algAndWeight.entrySet()){
File f = new File(inFolder+File.separator+en.getKey());
FileOutputReader reader= algAndReader.get(en.getKey());
List<JATETerm> terms = null;
try {
terms = reader.read(f.toString());
} catch (IOException e) {
e.printStackTrace();
}
pairs[i] = new Pair<>(terms, en.getValue());
i++;
}
return pairs;
}
public List<JATETerm> vote(Pair... algResultWithWeight) {
if (algResultWithWeight.length == 0)
return new ArrayList<>();
Map<String, Double> voteScores = new HashMap<>();
List<JATETerm> out = new ArrayList<>();
for (Pair result : algResultWithWeight) {
Pair<List<JATETerm>, Double> pair = (Pair<List<JATETerm>, Double>) result;
for (int i = 0; i < pair.getKey().size(); i++) {
JATETerm jt = pair.getKey().get(i);
String termStr = jt.getString();
double rankScore = 1.0 / (i + 1) * pair.getValue();
Double finalScore = voteScores.get(termStr);
if (finalScore == null)
finalScore = 0.0;
finalScore += rankScore;
voteScores.put(termStr, finalScore);
}
}
for (Map.Entry<String, Double> entry : voteScores.entrySet()) {
out.add(new JATETerm(entry.getKey(), entry.getValue()));
}
Collections.sort(out);
return out;
}
}