package com.datascience.gal.dataGenerator;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.Collection;
import java.util.Map;
import java.util.Properties;
import org.apache.log4j.Level;
import org.apache.log4j.Logger;
public class TroiaDataGenerator {
/**
* @param args
*/
public static void main(String[] args) {
Logger.getRootLogger().setLevel(Level.INFO);
try {
if(args.length<2) {
printHelp();
} else {
parseArgs(args);
verifyParameters();
generateData();
saveData();
System.out.println("Test data generation finished.");
}
} catch (Exception e) {
System.out.println("Unable to generate test data because "
+ e.getMessage());
e.printStackTrace();
}
}
private static void parseArgs(String[] args) {
workersPerObject=1;
minQuality=0;
maxQuality=1;
goldRatio=0;
workerCount=0;
for (int argPointer = 0; argPointer < args.length; argPointer++) {
if (args[argPointer].equalsIgnoreCase(CONFIGURATION_FILE_TAG)) {
if (argPointer++ < args.length) {
try {
parsePropertiesFile(args[argPointer]);
} catch (IOException e) {
System.out.println('\"' + args[argPointer]
+ "\" is not a correct property file.");
Logger.getLogger(TroiaDataGenerator.class).error(e.getMessage());
}
}
} else if (args[argPointer].equalsIgnoreCase(BASIC_WORKER_FILE_TAG)) {
if (argPointer++ < args.length) {
workerQualitiesFilename = args[argPointer];
}
} else if (args[argPointer].equalsIgnoreCase(AI_WORKERS_FILE_TAG)) {
if (argPointer++ < args.length) {
aiWorkersFilename = args[argPointer];
}
} else if (args[argPointer].equalsIgnoreCase(OBJECTS_FILE_TAG)) {
if (argPointer++ < args.length) {
objectsFileName = args[argPointer];
}
} else if (args[argPointer].equalsIgnoreCase(CATEGORIES_FILE_TAG)) {
if (argPointer++ < args.length) {
categoriesFileName = args[argPointer];
}
} else if (args[argPointer].equalsIgnoreCase(CATEGORY_COUNT_TAG)) {
if (argPointer++ < args.length) {
categoryCount = Integer.parseInt(args[argPointer]);
}
} else if (args[argPointer].equalsIgnoreCase(OBJECT_COUNT_TAG)) {
if (argPointer++ < args.length) {
objectCount = Integer.parseInt(args[argPointer]);
}
} else if (args[argPointer].equalsIgnoreCase(WORKER_COUNT_TAG)) {
if (argPointer++ < args.length) {
workerCount = Integer.parseInt(args[argPointer]);
}
} else if (args[argPointer]
.equalsIgnoreCase(WORKERS_PER_OBJECT_TAG)) {
if (argPointer++ < args.length) {
workersPerObject = Integer.parseInt(args[argPointer]);
}
} else if (args[argPointer].equalsIgnoreCase(MIN_QUALITY_TAG)) {
if (argPointer++ < args.length) {
minQuality = Double.parseDouble(args[argPointer]);
}
} else if (args[argPointer].equalsIgnoreCase(MAX_QUALITY_TAG)) {
if (argPointer++ < args.length) {
maxQuality = Double.parseDouble(args[argPointer]);
}
} else if (args[argPointer].equalsIgnoreCase(GOLD_RATIO_TAG)) {
if (argPointer++ < args.length) {
goldRatio = Double.parseDouble(args[argPointer]);
}
} else if (args[argPointer].equalsIgnoreCase(TEST_DATA_FILE_TAG)) {
if (argPointer++ < args.length) {
outputFilename = args[argPointer];
}
}
}
}
private static void verifyParameters() throws Exception {
if(objectCount != 0 || objectsFileName != null) {
if (categoryCount <= 0 && categoriesFileName == null && objectsFileName == null) {
throw new Exception("Category count must be larger then 0.");
}
if (objectCount <= 0 && objectsFileName == null) {
throw new Exception("Object count must be larger then 0.");
}
}
if (workerQualitiesFilename == null&&workerCount!=0&&aiWorkersFilename==null) {
if (workerCount < 0) {
throw new Exception("Worker count must be larger then 0.");
}
if (minQuality < 0 || minQuality > 1) {
throw new Exception(
"Worker quality must be value between 0 and 1.");
}
if (maxQuality < 0 || maxQuality > 1) {
throw new Exception(
"Worker quality must be value between 0 and 1.");
}
if (minQuality > maxQuality) {
throw new Exception(
"Minimum quality must be equal or smaller then maximum quality");
}
}
if (workersPerObject <= 0) {
throw new Exception(
"Workers per object must have value larger then 0.");
}
if (goldRatio < 0 || goldRatio > 1) {
throw new Exception("Gold ratio must be value between 0 and 1.");
}
}
private static void generateData() throws FileNotFoundException {
Collection<String> categoryNames = null;
if (categoriesFileName != null) {
categories = manager
.loadCategoriesWithProbabilities(categoriesFileName);
data.setCategories(categories.keySet());
categoryNames=categories.keySet();
} else if(categoryCount!=0) {
categoryNames = generator.generateCategoryNames(categoryCount);
data.setCategories(categoryNames);
}
if (workerQualitiesFilename != null) {
data.setArtificialWorkers(manager.loadBasicWorkers(
workerQualitiesFilename, categoryNames));
} else if (aiWorkersFilename != null) {
data.setArtificialWorkers(manager.loadArtificialWorkersFromFile(aiWorkersFilename));
} else {
if(workerCount!=0) {
data.setArtificialWorkers(generator.generateArtificialWorkers(
workerCount, categoryNames, minQuality, maxQuality));
}
}
if(objectsFileName!=null) {
data.setObjectCollection(manager.loadTestObjectsFromFile(objectsFileName));
} else if(objectCount!=0) {
data.setObjectCollection(generator.generateTestObjects(objectCount, categoryNames));
}
if(data.getObjectCollection()!=null) {
data.setGoldLabels(generator.generateGoldLabels(data.getObjectCollection(),goldRatio));
}
if(data.getArtificialWorkers()!=null&&data.getObjectCollection()!=null) {
data.setLabels(generator.generateLabels(data.getArtificialWorkers(),data.getObjectCollection(), workersPerObject));
}
}
private static void saveData() throws IOException {
manager.saveTestData(outputFilename, data);
}
private static void parsePropertiesFile(String filename) throws IOException {
Properties props = new Properties();
FileInputStream inputStream = new FileInputStream(filename);
props.load(inputStream);
inputStream.close();
categoryCount = Integer.parseInt(props
.getProperty(CATEGORY_COUNT_PROPERTY));
objectCount = Integer
.parseInt(props.getProperty(OBJECT_COUNT_PROPERTY));
workerCount = Integer
.parseInt(props.getProperty(WORKER_COUNT_PROPERTY));
minQuality = Double.parseDouble(props
.getProperty(MINIMUM_QUALITY_PROPERTY));
maxQuality = Double.parseDouble(props
.getProperty(MAXIMUM_QUALITY_PROPERY));
goldRatio = Double.parseDouble(props.getProperty(GOLD_RATIO_PROPERTY));
workersPerObject = Integer.parseInt(props
.getProperty(WORKERS_PER_OBJECT_PROPERTY));
}
private static void printHelp() {
System.out.println("Troia test data generator parameters :");
System.out.println('\t' + CONFIGURATION_FILE_TAG + SEPARATOR
+ "Loads configuration from settings file");
System.out.println('\t' + TEST_DATA_FILE_TAG + SEPARATOR + "base name for output files");
System.out.println('\t'+CATEGORY_COUNT_TAG+SEPARATOR+"number of categories in test data");
System.out.println('\t'+OBJECT_COUNT_TAG+SEPARATOR+"number of objects in test data");
System.out.println('\t'+WORKER_COUNT_TAG+SEPARATOR+"number of workers in test data");
System.out.println('\t'+MAX_QUALITY_TAG+SEPARATOR+"maximal quality of worker (from 0 to 1)");
System.out.println('\t'+MIN_QUALITY_TAG+SEPARATOR+"minimal quality of worker (from 0 to 1)");
System.out.println('\t'+WORKERS_PER_OBJECT_TAG+SEPARATOR+"number of workers assigned to single object");
System.out.println('\t'+GOLD_RATIO_TAG+SEPARATOR+"ratio of gold labels among objects (from 0 to 1)");
System.out.println('\t'+BASIC_WORKER_FILE_TAG+SEPARATOR+" name of file containing basic workers definition.");
System.out.println('\t'+CATEGORIES_FILE_TAG+SEPARATOR+" name of file containing category priors");
System.out.println('\t'+OBJECTS_FILE_TAG+SEPARATOR+" name of file containing test objects");
System.out.println('\t'+AI_WORKERS_FILE_TAG+SEPARATOR+" name of file containing jsonified workers");
}
private static final String CATEGORY_COUNT_TAG = "-c";
private static final String OBJECT_COUNT_TAG = "-o";
private static final String WORKER_COUNT_TAG = "-w";
private static final String MAX_QUALITY_TAG = "-h";
private static final String MIN_QUALITY_TAG = "-l";
private static final String WORKERS_PER_OBJECT_TAG = "-p";
private static final String GOLD_RATIO_TAG = "-g";
private static final String BASIC_WORKER_FILE_TAG = "-q";
private static final String CATEGORIES_FILE_TAG = "-a";
private static final String TEST_DATA_FILE_TAG = "-t";
private static final String CONFIGURATION_FILE_TAG = "-f";
private static final String OBJECTS_FILE_TAG = "-j";
private static final String AI_WORKERS_FILE_TAG = "-i";
private static final String CATEGORY_COUNT_PROPERTY = "category_count";
private static final String OBJECT_COUNT_PROPERTY = "object_count";
private static final String WORKER_COUNT_PROPERTY = "worker_count";
private static final String MINIMUM_QUALITY_PROPERTY = "minimal_worker_quality";
private static final String MAXIMUM_QUALITY_PROPERY = "maximal_worker_quality";
private static final String WORKERS_PER_OBJECT_PROPERTY = "workers_per_object";
private static final String GOLD_RATIO_PROPERTY = "gold_ratio";
private static final String SEPARATOR = " - ";
private static int categoryCount = 0;
private static int objectCount = 0;
private static int workerCount = 0;
private static double minQuality = 0;
private static double maxQuality = 0;
private static int workersPerObject = 0;
private static double goldRatio = 0;
private static String workerQualitiesFilename = null;
private static String aiWorkersFilename = null;
private static String categoriesFileName = null;
private static String objectsFileName = null;
private static String outputFilename = "testData";
private static Map<String, Double> categories;
private static Data data = new Data();
private static DataGenerator generator = DataGenerator
.getInstance();
private static DataManager manager = DataManager.getInstance();
}