/** * CrowdTransEval, a toolkit for evaluating machine translation * system by using crowdsourcing. * Copyright (C) 2012 Alejandro Navarro Fulleda <anf5@alu.ua.es> * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. */ package es.ua.alex952.cf_helpers; import com.sun.jersey.api.client.WebResource; import com.sun.jersey.api.representation.Form; import es.ua.alex952.cf_helpers.params.JSONParams; import es.ua.alex952.cf_helpers.result_helpers.HTMLGenerator; import es.ua.alex952.cf_helpers.result_helpers.KappaRaters; import es.ua.alex952.cf_helpers.translation_services.ApertiumTranslator; import es.ua.alex952.cf_helpers.translation_services.BingTranslator; import es.ua.alex952.cf_helpers.translation_services.Service; import es.ua.alex952.exceptions.KeyNotConfigured; import es.ua.alex952.exceptions.ParameterNeeded; import java.io.*; import java.util.*; import javax.ws.rs.core.MediaType; import org.slf4j.Logger; import org.slf4j.LoggerFactory; /** * A concrete implementation of one resource, namely Jobs, and the concrete * methods that it can handle. * * @author alex952 */ public class JobsCF extends CFHelper { /** * The translation services of the application. See {@link Service} */ private Service[] services = null; private Integer unit_count = 0; private String pathLO; private String pathTR; private String pathGold; private SentenceShuffler shuffler; private String LO; private String LM; private final Logger logger = LoggerFactory.getLogger(JobsCF.class); private String[] channels; public void setPathLO(String pathLO) { this.pathLO = pathLO; } public void setPathTR(String pathTR) { this.pathTR = pathTR; } public void setPathGold(String pathGold) { this.pathGold = pathGold; } public void setServices(Service[] services) { this.services = services; } /** * Default constructor of the class. It creates the object and assigns to it * the needed paths for the web services invocation, * * @throws KeyNotConfigured * @throws Exception */ public JobsCF(String configFile) throws KeyNotConfigured, Exception { super(configFile); ArrayList<String> urlPaths = new ArrayList<String>(); urlPaths.add("jobs"); this.paths = urlPaths; this.postPrepend = "job"; this.configFile = configFile; this.dataParams = new JSONParams(); if (this.configFile != null) { Properties p = new Properties(); try{ p.load(new FileInputStream(new File(this.configFile))); String s = p.getProperty("ShuffleGrade", "2"); Integer grade = Integer.parseInt(s); this.shuffler = new SentenceShuffler(grade); //Source language and goal language this.LO = p.getProperty("SL"); this.LM = p.getProperty("TL"); //Initialize translation services //Possible dependency injection this.services = new Service[] { new ApertiumTranslator(p.getProperty("ApertiumKey")), new BingTranslator(p.getProperty("BingClientId"), p.getProperty("BingClientSecret")) }; this.channels = p.getProperty("Channels").split(","); } catch (IOException e) { this.shuffler = new SentenceShuffler(2); } } } /** * Constructor that builds up the object with a properties file, which * contains all the fields of the Job itself. * * @param propPath The path of the properties file. * @throws KeyNotConfigured * @throws IOException */ public JobsCF(String propPath, String configFile) throws KeyNotConfigured, IOException, Exception { this(configFile); Properties p = new Properties(); p.load(new FileInputStream(propPath)); for (String key_p : p.stringPropertyNames()) { this.addParameter(key_p, p.getProperty(key_p)); } String cml = HTMLGenerator.generateCML(this.services); this.addParameter("cml", cml); } /** * Function that processes the results of an specific job, known by the id * stored in the {@link JSONParams} object of the class, and builds up the * Google graph for the showcasing of them. * * @return The html graph generated with the results of the job. */ public String processResults() { String id = this.getParameter("id"); HashMap<String, HashMap<String, KappaRaters>> raters = null; JSONParams judgmentsJSON = null; try { raters = this.getRatersJudgements(); judgmentsJSON = this.getJudgments(); } catch (Exception e) { System.err.println("Some error occured while getting the judgements for the job " + id + ": " + e.getMessage()); return null; } HTMLGenerator generator = HTMLGenerator.getGenerator(); generator.generateKappaTable(raters); generator.generateKappaAverageCharts(raters); generator.generateScoreCharts(judgmentsJSON, this.services); return generator.toString(); } @Override public JSONParams create() throws IOException { JSONParams p = super.create(); this.addParameter("id", p.getProperty("id")); return p; } /** * Populates the, previously created, job with the data passed to it. * * @param data Data to populate the job, in JSON format. * @param id The job id to be populated. * @throws IOException */ private void populate(String data, String id) throws IOException { ArrayList<String> paths2 = (ArrayList<String>) this.paths.clone(); paths2.add(id); paths2.add("upload"); Map<String, String> query2 = (HashMap<String, String>)this.queryParams.clone(); query2.put("force", "true"); WebResource wr = ConnectionHelper.getResource(this.baseUrl, paths2, this.type, query2); String ret = wr.type(MediaType.APPLICATION_JSON).post(String.class, data); } /** * Interface for the {@link JobsCF#populate(java.lang.String, java.lang.String)} * using the id of the job already stored in the {@link JSONParams} object * of the class. * * @throws IOException */ public void populate() throws IOException { this.populate(createGoldDataJSON(), this.getParameter("id")); this.populate(createDataJSON(), this.getParameter("id")); this.markGoldStandars(); } /** * Marks the uploaded gold data * ( created with {@link JobsCF#createGoldDataJSON()} ) * as gold in the * CrowdFlower server. * * @throws IOException */ private void markGoldStandars() throws IOException { ArrayList<String> paths2 = (ArrayList<String>) this.paths.clone(); paths2.add(this.getParameter("id")); paths2.add("gold"); WebResource wr = ConnectionHelper.getResource(this.baseUrl, paths2, this.type, this.queryParams); String ret = wr.put(String.class, ""); } /** * Creates the gold data to be uploaded in JSON format * * @return String containing the gold data * @throws IOException */ private String createGoldDataJSON() throws IOException { JSONParams param = new JSONParams(); String json = ""; Integer nservices = this.services.length; Random r = new Random(new Date().getTime()); FileInputStream fis = new FileInputStream(this.pathGold); DataInputStream dis = new DataInputStream(fis); BufferedReader br1 = new BufferedReader(new InputStreamReader(dis)); while (true) { String lo = br1.readLine(); String tr; String tr2; if (lo == null) break; tr = br1.readLine(); tr2 = br1.readLine(); param.addProperty("lo", lo); param.addProperty("tr", tr); int correct_position = r.nextInt(nservices); for (int i = 0; i < nservices; i++) { if (i == correct_position) { param.addProperty(this.services[i].getName(), tr2); param.addProperty("fluency_" + this.services[i].getName() + "_gold", "5"); param.addProperty("fluency_" + this.services[i].getName() + "_gold_reason", "Some reason"); param.addProperty("adequacy_" + this.services[i].getName() + "_gold", "5"); param.addProperty("adequacy_" + this.services[i].getName() + "_gold_reason", "Some reason"); } else { param.addProperty(this.services[i].getName(), this.shuffler.shuffle(tr2)); param.addProperty("fluency_" + this.services[i].getName() + "_gold", "1"); param.addProperty("fluency_" + this.services[i].getName() + "_gold_reason", "Some reason"); param.addProperty("adequacy_" + this.services[i].getName() + "_gold", "1"); param.addProperty("adequacy_" + this.services[i].getName() + "_gold_reason", "Some reason"); } } param.addProperty("_golden", "TRUE"); json += "\n" + param.toString(); param = new JSONParams(); } return json; } /** * Creates the job data to be uploaded in JSON format * * @return String containing the job data * @throws IOException */ private String createDataJSON() throws IOException { JSONParams param = new JSONParams(); unit_count = 0; String json = ""; FileInputStream fis = new FileInputStream(this.pathLO); DataInputStream dis = new DataInputStream(fis); BufferedReader br1 = new BufferedReader(new InputStreamReader(dis)); FileInputStream fis2 = new FileInputStream(this.pathTR); DataInputStream dis2 = new DataInputStream(fis2); BufferedReader br2 = new BufferedReader(new InputStreamReader(dis2)); String line1, line2; while ((line1 = br1.readLine()) != null && (line2 = br2.readLine()) != null) { param = new JSONParams(); param.addProperty("lo", line1); param.addProperty("tr", line2); for (Service c : this.services) { param.addProperty(c.getName(), c.getTranslation(line1, this.LO, this.LM)); } json += "\n" + param.toString(); unit_count++; } return json; } /** * Queries the server to know whether the job is finished or not. * * @param id The jd of the job to be queried about. * @return The finalization state of the job. * @throws IOException */ private boolean isFinished(String id) throws IOException { JSONParams response = this.getInfo(id); String status = response.getProperty("state"); return status != null && status.equals("finished"); } /** * Interface for the {@link JobsCF#isFinished(java.lang.String) } * using the id of the job already stored in the {@link JSONParams} object * of the class. * * @return The finalization state of the job. * @throws IOException */ public boolean isFinished() throws IOException, ParameterNeeded { String id = null; if ((id = this.getParameter("id")) == null) { throw new ParameterNeeded("The id parameter was mandatory for the getJudgements method"); } return this.isFinished(this.getParameter("id")); } /** * Retrieves the results and populates a Hash which contains * information needed by the Kappa calculus * * @return The results in the form of a Hash of services containing the every pair of raters and the * info about the rates given by them. See {@link KappaRaters} * @throws IOException * @throws ParameterNeeded If the id of the job is missing */ public HashMap<String, HashMap<String, KappaRaters>> getRatersJudgements() throws IOException, ParameterNeeded { //Initialization of all hashes based on service's name HashMap<String, HashMap<String, KappaRaters>> ratersHash = new HashMap<String, HashMap<String, KappaRaters>>(); for(Service s: services) { ratersHash.put(s.getName(), new HashMap<String, KappaRaters>()); } ArrayList<String> paths2 = (ArrayList<String>) this.paths.clone(); String id = null; if ((id = this.getParameter("id")) == null) { throw new ParameterNeeded("The id parameter was mandatory for the getJudgements method"); } paths2.add(id); paths2.add("units"); WebResource wr = ConnectionHelper.getResource(this.baseUrl, paths2, this.type, this.queryParams); String response = wr.get(String.class); JSONParams unitsJson = new JSONParams(response); Iterator<String> unitsIds = unitsJson.getKeySet(); FileWriter fw = new FileWriter("results.csv"); BufferedWriter bw = new BufferedWriter(fw); while(unitsIds.hasNext()) { String unitId = unitsIds.next(); populateRaters(unitId, ratersHash, bw); bw.newLine(); } bw.close(); this.logger.info("Results written to results.csv file"); return ratersHash; } public JSONParams getJudgments() throws ParameterNeeded, IOException { ArrayList<String> paths2 = (ArrayList<String>) this.paths.clone(); String id = null; if ((id = this.getParameter("id")) == null) { throw new ParameterNeeded("The id parameter was mandatory for the getJudgements method"); } paths2.add(id); paths2.add("judgments"); WebResource wr = ConnectionHelper.getResource(this.baseUrl, paths2, this.type, this.queryParams); String response = wr.get(String.class); JSONParams judgmentsJSON = new JSONParams(response); return judgmentsJSON; } /** * Used to populate Hashmap with pairs of raters from the * unit's judgments and writes the unit to a csv file. * * @param unitId The id of the unit that is been analyzed * @param raters An already created hashmap to be populated * @param bos FileWriter with witch to write the unit to the csv file * @throws IOException */ private void populateRaters(String unitId, HashMap<String, HashMap<String, KappaRaters>> raters, BufferedWriter bw) throws IOException { ArrayList<String> paths2 = (ArrayList<String>) this.paths.clone(); paths2.add(this.getParameter("id")); paths2.add("units"); paths2.add(unitId); WebResource wr = ConnectionHelper.getResource(this.baseUrl, paths2, this.type, this.queryParams); String response = wr.get(String.class); JSONParams completeUnit = new JSONParams(response); JSONParams judgments = completeUnit.getObject("results").getObject("judgments"); JSONParams judgmentA = null; JSONParams judgmentB = null; for(int i = 0; i < judgments.size(); i++) { judgmentA = judgments.getObjectAt(i); for(int j = 0; j < judgments.size(); j++) { if (i != j) { judgmentB = judgments.getObjectAt(j); int workerA = Integer.parseInt(judgmentA.getProperty("worker_id")); int workerB = Integer.parseInt(judgmentB.getProperty("worker_id")); int leftWorker = Math.min(workerA, workerB); int rightWorker = Math.max(workerA, workerB); for (int k = 0; k < services.length; k++) { Service s = services[k]; HashMap<String, KappaRaters> ratersService = raters.get(s.getName()); KappaRaters kr = null; if (ratersService.containsKey(leftWorker + "/" + rightWorker)) { kr = ratersService.get(leftWorker + "/" + rightWorker); } else { kr = new KappaRaters(workerA, workerB); ratersService.put(leftWorker + "/" + rightWorker, kr); } kr.addAdequacy( Integer.parseInt(judgmentA.getObject("data").getProperty("adequacy_" + s.getName())), Integer.parseInt(judgmentB.getObject("data").getProperty("adequacy_" + s.getName()))); kr.addFluency( Integer.parseInt(judgmentA.getObject("data").getProperty("fluency_" + s.getName())), Integer.parseInt(judgmentB.getObject("data").getProperty("fluency_" + s.getName()))); } } } } writeCSVLine(bw, completeUnit); } /** * Writes a line of wokers judgments to a csv file * * @param object The complete unit being written * @throws IOException */ private void writeCSVLine(BufferedWriter bw, JSONParams object) throws IOException { ArrayList<String> fields = new ArrayList<String>(); String lo = object.getObject("results").getObject("judgments").getObjectAt(0).getObject("unit_data").getProperty("lo"); fields.add(lo); for (int i = 0; i < services.length; i++) { String translation = object.getObject("results").getObject("judgments").getObjectAt(0).getObject("unit_data").getProperty(services[i].getName()); fields.add(translation); } JSONParams judgments = object.getObject("results").getObject("judgments"); JSONParams judgment = null; Integer judgmentsSize = judgments.size(); fields.add(judgmentsSize.toString()); for(int i = 0; i < judgments.size(); i++) { judgment = judgments.getObjectAt(i); fields.add(judgment.getProperty("worker_id")); for (int k = 0; k < services.length; k++) { Service s = services[k]; fields.add("adequacy_" + s.getName()); fields.add(judgment.getObject("data").getProperty("adequacy_" + s.getName())); fields.add("fluency_" + s.getName()); fields.add(judgment.getObject("data").getProperty("fluency_" + s.getName())); } } bw.write(this.implode(fields.toArray(new String[fields.size()]), ",")); } private String implode(String[] fields, String delim) { String ret = ""; for (int i = 0; i < fields.length; i++) { ret += fields[i].replaceAll("[^A-Za-záéíóú ,\\.0-9\\_]", ""); if (i < fields.length - 1) { ret += delim; } } return ret; } /** * Orders a job with the units created for it * * @return The reponse of the server * @throws IOException * @throws ParameterNeeded If the id of the job is missing */ public JSONParams order() throws IOException, ParameterNeeded { String id = null; if ((id = this.getParameter("id")) == null) { throw new ParameterNeeded("The id parameter was mandatory for the getJudgements method"); } ArrayList<String> paths2 = (ArrayList<String>) this.paths.clone(); paths2.add(id); paths2.add("orders"); WebResource wr = ConnectionHelper.getResource(this.baseUrl, paths2, this.type); Form f = new Form(); f.add("key", this.queryParams.get("key")); f.add("debit[units_count]", unit_count); for(String channel: channels) { f.add("channels[0]", channel); } String response = wr.type("application/x-www-form-urlencoded").post(String.class, f); return new JSONParams(response); } public static void main(String[] args) { try { JobsCF helper = new JobsCF(null); helper.addParameter("key", null); } catch (Exception e) { System.err.println(e.getMessage()); } } }