package org.aksw.gerbil.tools;
import java.io.Closeable;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import javax.sql.DataSource;
import org.aksw.gerbil.database.ExperimentDAO;
import org.aksw.gerbil.database.ExperimentDAOImpl;
import org.aksw.gerbil.dataid.DataIDGenerator;
import org.aksw.gerbil.datatypes.ExperimentTaskResult;
import org.apache.commons.io.IOUtils;
import org.apache.jena.riot.Lang;
import org.apache.jena.riot.RDFDataMgr;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.context.support.ClassPathXmlApplicationContext;
import org.springframework.jdbc.core.RowMapper;
import org.springframework.jdbc.core.namedparam.NamedParameterJdbcTemplate;
import org.springframework.jdbc.datasource.DriverManagerDataSource;
import com.hp.hpl.jena.rdf.model.Model;
import com.hp.hpl.jena.rdf.model.Resource;
public class DataDumpTool implements Closeable {
private static final Logger LOGGER = LoggerFactory.getLogger(DataDumpTool.class);
private static final String OUTPUT_FILE_NAME = "datadump.nt";
private static final Lang OUTPUT_LANG = Lang.NT;
private static final String GERBIL_BASE_URL = "http://gerbil.aksw.org/gerbil/";
private static final String EXPERIMENT_IDS_QUERY = "SELECT id, taskId FROM Experiments";
public static void main(String[] args) {
ClassPathXmlApplicationContext context = null;
DataDumpTool tool = null;
try {
context = new ClassPathXmlApplicationContext("/spring/database/database-context.xml");
DriverManagerDataSource database = context.getBean("databaseDataSource", DriverManagerDataSource.class);
tool = new DataDumpTool(GERBIL_BASE_URL, database);
tool.run(OUTPUT_FILE_NAME, OUTPUT_LANG);
} finally {
IOUtils.closeQuietly(tool);
IOUtils.closeQuietly(context);
}
}
private String gerbilBaseUrl;
private DataSource dataSource;
private ExperimentDAO dao;
public DataDumpTool(String gerbilBaseUrl, DataSource dataSource) {
this.gerbilBaseUrl = gerbilBaseUrl;
this.dataSource = dataSource;
this.dao = new ExperimentDAOImpl(dataSource);
}
public void run(String outputFileName, Lang language) {
List<ExperimentToTaskLink> experiments = loadExperiments();
LOGGER.info("Loaded {} experiment to experiment task links from the database.", experiments.size());
DataIDGenerator generator = new DataIDGenerator(gerbilBaseUrl);
Model model = generator.generateDataIDModel();
Resource experimentResource;
String taskUri;
List<Resource> listOfTasks;
Map<String, Resource> experimentInstances = new HashMap<String, Resource>();
Map<String, List<Resource>> taskResources = new HashMap<String, List<Resource>>();
int count = 0;
for (ExperimentToTaskLink link : experiments) {
// Get the experiment resource
if (experimentInstances.containsKey(link.experimentId)) {
experimentResource = experimentInstances.get(link.experimentId);
} else {
experimentResource = generator.createExperimentResource(model, link.experimentId);
experimentInstances.put(link.experimentId, experimentResource);
}
// Get the experiment task resource
taskUri = generator.generateExperimentTaskUri(link.experimentTaskId);
if (taskResources.containsKey(taskUri)) {
listOfTasks = taskResources.get(taskUri);
} else {
listOfTasks = new ArrayList<Resource>();
ExperimentTaskResult result = dao.getResultOfExperimentTask(link.experimentTaskId);
if (result == null) {
LOGGER.error("Couldn't find an experiment task with the id {}. It will be ignored.",
link.experimentTaskId);
listOfTasks = null;
} else {
generator.createExperimentTask(model, result, null, listOfTasks);
taskResources.put(taskUri, listOfTasks);
}
}
// Link both resources
if (listOfTasks != null) {
generator.linkTasksToExperiment(model, experimentResource, listOfTasks);
}
++count;
if ((count % 1000) == 0) {
LOGGER.info("Processed {} experiment task links.", count);
}
}
File outputFile = new File(outputFileName);
if ((outputFile.getParentFile() != null) && (!outputFile.getParentFile().exists())) {
outputFile.getParentFile().mkdirs();
}
FileOutputStream fout = null;
try {
fout = new FileOutputStream(outputFile);
RDFDataMgr.write(fout, model, language);
} catch (FileNotFoundException e) {
LOGGER.error("Exception while writing model.", e);
} finally {
IOUtils.closeQuietly(fout);
}
}
private List<ExperimentToTaskLink> loadExperiments() {
NamedParameterJdbcTemplate template = new NamedParameterJdbcTemplate(dataSource);
return template.query(EXPERIMENT_IDS_QUERY, new ExperimentToTaskLinkRowMapper());
}
@Override
public void close() throws IOException {
IOUtils.closeQuietly(dao);
}
protected static class ExperimentToTaskLink {
public String experimentId;
public int experimentTaskId;
public ExperimentToTaskLink(String experimentId, int experimentTaskId) {
this.experimentId = experimentId;
this.experimentTaskId = experimentTaskId;
}
}
protected static class ExperimentToTaskLinkRowMapper implements RowMapper<ExperimentToTaskLink> {
@Override
public ExperimentToTaskLink mapRow(ResultSet arg0, int arg1) throws SQLException {
return new ExperimentToTaskLink(arg0.getString(1), arg0.getInt(2));
}
}
}