package com.datascience.galc.main; import java.io.BufferedWriter; import java.io.File; import java.io.FileWriter; import org.apache.log4j.Logger; import com.datascience.galc.ContinuousIpeirotis; import com.datascience.core.results.DatumContResults; import com.datascience.galc.EmpiricalData; import com.datascience.core.results.WorkerContResults; import com.datascience.core.base.ContValue; import com.datascience.core.base.IData; import com.datascience.core.base.LObject; import com.datascience.core.base.Worker; class ReportGenerator { private ContinuousIpeirotis ip; private static Logger logger = Logger.getLogger(ReportGenerator.class); public ReportGenerator(ContinuousIpeirotis ip, EngineContext ctx) { this.ip = ip; } /** * */ double getCorrelationRelativeError() { Double relRhoError = 0.0; int n = 0; for (WorkerContResults wr: ip.getResults().getWorkerResults(ip.getData().getWorkers()).values()) { if (wr.getTrueRho()==null) continue; n++; double estRho = wr.getEst_rho(); double realRho = wr.getTrueRho(); double absDiff = Math.abs(realRho - estRho); double relDiff = Math.abs(absDiff / realRho); relRhoError += relDiff; } return relRhoError/n; } /** * */ double getCorrelationAbsoluteError() { Double avgRhoError = 0.0; int n = 0; for (WorkerContResults wr: ip.getResults().getWorkerResults(ip.getData().getWorkers()).values()) { if (wr.getTrueRho() ==null ) continue; n++; double estRho = wr.getEst_rho(); double realRho = wr.getTrueRho(); double absDiff = Math.abs(realRho - estRho); avgRhoError += absDiff; } return avgRhoError/n; } /** * */ public String generateWorkerReport() { StringBuffer sb = new StringBuffer(); sb.append("Name\tLabels\tEstMean\tEstStDev\tEstCorrelation\tTrueMean\tTrueStDev\tTrueCorrelation\n"); for (Worker w : ip.getData().getWorkers()){ WorkerContResults wr = ip.getResults().getWorkerResult(w); sb.append(w.getName() + "\t" + ip.getData().getWorkerAssigns(w).size() + "\t" + wr.getEst_mu() + "\t" + wr.getEst_sigma() + "\t" + wr.getEst_rho() + "\t" + wr.getTrueMu() + "\t" + wr.getTrueSigma() + "\t" + wr.getTrueRho()); sb.append("\n"); } String out = "\nAverage absolute estimation error for correlation values: " + getCorrelationAbsoluteError() + "\n" + "Average relative estimation error for correlation values: " + getCorrelationRelativeError(); logger.info(out); sb.append(out); return sb.toString(); } /** * @return */ Double estimateDistributionSigma() { Double nominator_sigma = 0.0; Double denominator_sigma = 0.0; for (WorkerContResults wr : ip.getResults().getWorkerResults(ip.getData().getWorkers()).values()) { Double b = wr.getBeta(); Double coef = Math.sqrt(b * b - b); Double s = wr.getEst_sigma(); nominator_sigma += coef * s; denominator_sigma += b; } Double est_sigma = nominator_sigma / denominator_sigma; return est_sigma; } /** * @return */ Double estimateDistributionMu() { // Estimate mu and sigma of distribution Double nominator_mu = 0.0; Double denominator_mu = 0.0; for (WorkerContResults wr : ip.getResults().getWorkerResults(ip.getData().getWorkers()).values()) { Double b = wr.getBeta(); Double coef = Math.sqrt(b * b - b); Double m = wr.getEst_mu(); nominator_mu += coef * m; denominator_mu += b; } Double est_mu = nominator_mu / denominator_mu; return est_mu; } public String generateDistributionReport() { String out = "Estimated mu = " + estimateDistributionMu() + "\n" + "Estimated sigma = " + estimateDistributionSigma(); logger.info(out); return out; } double getZetaAbsoluteErrorObject() { // Double avgAbsError = 0.0; // int n = 0; // for (DatumContResults dr : ip.getObjectsResults().values()) { // if ( dr.getTrueZeta() == null ) continue; // n++; // double estZ = dr.getEst_zeta(); // double realZ = dr.getTrueZeta(); // double absDiff = Math.abs(realZ - estZ); // avgAbsError += absDiff; // } // return avgAbsError/n; return 0; } double getZetaRelativeErrorObject() { // Double avgRelError = 0.0; // int n = 0; // for (DatumContResults dr : ip.getObjectsResults().values()) { // if ( dr.getTrueZeta() == null) continue; // n++; // double estZ = dr.getEst_zeta(); // double realZ = dr.getTrueZeta(); // double absDiff = Math.abs(realZ - estZ); // double relDiff = Math.abs(absDiff / realZ); // // avgRelError += relDiff/n; // } // return avgRelError; return 0; } public String generateObjectReport() { double mu = this.estimateDistributionMu(); double sigma = this.estimateDistributionSigma(); StringBuffer sb = new StringBuffer(); for (LObject<ContValue> d : ip.getData().getObjects()){ DatumContResults dr = ip.getResults().getDatumResult(d); dr.setDistributionMu(mu); dr.setDistributionSigma(sigma); sb.append(d.getName() +"\t" + ip.getAverageLabel(d) + "\t" + dr.getEst_value() + "\t" + dr.getEst_zeta() + "\t"); sb.append(d.getEvaluationLabel() != null ? d.getEvaluationLabel().getValue() : "null" + "\t"); sb.append(d.getEvaluationLabel() != null ? d.getEvaluationLabel().getZeta() : "null"); sb.append("\n"); } String out = "\nAverage absolute estimation error for z-values: " + getZetaAbsoluteErrorObject() + "\n" + "Average relative estimation error for z-values: " + getZetaRelativeErrorObject(); logger.info(out); sb.append(out); return sb.toString(); } public void writeReportToFile(String foldername, String filename, String reportcontent) { try { File outfile = new File(foldername + "/" + filename); if (outfile.getParent() != null) { File parentDir = new File(outfile.getParent()); if (!parentDir.exists()) { parentDir.mkdirs(); } } BufferedWriter bw = new BufferedWriter(new FileWriter(outfile)); bw.write(reportcontent); bw.close(); } catch (Exception e) { e.printStackTrace(); } } } public class Engine { private EngineContext ctx; /** * Logger for this class */ private static Logger logger = Logger.getLogger(Engine.class); public Engine(EngineContext ctx) { this.ctx = ctx; } public void execute() { EmpiricalData edata = new EmpiricalData(); edata.loadLabelFile(ctx.getInputFile()); if(ctx.hasTrueWorkersFile()) { edata.loadTrueWorkerData(ctx.getTrueWorkersFile()); } if(ctx.hasTrueObjectsFile()) { edata.loadTrueObjectData(ctx.getTrueObjectsFile()); } if(ctx.hasCorrectFile()) { edata.loadGoldLabelsFile(ctx.getCorrectFile()); } ContinuousIpeirotis ip = new ContinuousIpeirotis(); ip.setIterations(20); ip.setEpsilon(1e-5); ip.compute(); ReportGenerator rpt = new ReportGenerator(ip, ctx); // Report about distributional estimates rpt.writeReportToFile(ctx.getOutputFolder(), "results-distribution.txt", rpt.generateDistributionReport()); // Give report for objects rpt.writeReportToFile(ctx.getOutputFolder(), "results-objects.txt", rpt.generateObjectReport()); // Give report for workers rpt.writeReportToFile(ctx.getOutputFolder(), "results-workers.txt", rpt.generateWorkerReport()); logger.info("Results in folder: " + ctx.getOutputFolder()); } }