package org.hadatac.metadata.loader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.io.LineNumberReader;
import java.io.UnsupportedEncodingException;
import java.net.URLEncoder;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.StringTokenizer;
import org.apache.commons.io.FileUtils;
import org.apache.jena.rdf.model.Model;
import org.apache.jena.rdf.model.Property;
import org.apache.jena.rdf.model.RDFNode;
import org.apache.jena.rdf.model.Resource;
import org.apache.jena.rdf.model.Selector;
import org.apache.jena.rdf.model.SimpleSelector;
import org.apache.jena.rdf.model.Statement;
import org.apache.jena.rdf.model.StmtIterator;
import org.apache.jena.rdf.model.impl.StatementImpl;
import org.apache.jena.rdf.model.ModelFactory;
import org.apache.jena.riot.RDFDataMgr;
import org.hadatac.console.controllers.AuthApplication;
import org.hadatac.console.controllers.triplestore.UserManagement;
import org.hadatac.console.models.SysUser;
import org.hadatac.console.views.html.deployments.newDeployment;
import org.hadatac.entity.pojo.DataAcquisition;
import org.hadatac.entity.pojo.Deployment;
import org.hadatac.entity.pojo.Measurement;
import org.hadatac.entity.pojo.TriggeringEvent;
import org.hadatac.metadata.loader.LabkeyDataHandler.PlainTriple;
import org.hadatac.metadata.model.SpreadsheetParsingResult;
import org.hadatac.utils.Feedback;
import org.hadatac.utils.NameSpaces;
import org.labkey.remoteapi.CommandException;
import org.hadatac.data.model.ParsingResult;
import play.mvc.Controller;
public class TripleProcessing {
public static final String KB_FORMAT = "text/turtle";
public static final String TTL_DIR = "tmp/ttl/";
public static int count;
public static String printFileWithLineNumber(int mode, String filename) {
String str = "";
int lineNumber = 1;
LineNumberReader reader = null;
String line = null;
try {
reader = new LineNumberReader(new FileReader(filename));
while ((line = reader.readLine()) != null) {
str += Feedback.println(mode, lineNumber++ + " " + line);
}
reader.close();
} catch (Exception e) {
e.printStackTrace();
}
return str;
}
public static SpreadsheetParsingResult generateTTL(int mode,
Map< String, List<PlainTriple> > sheet, List<String> predicates) {
String shttl = "";
String message = "";
// Prints all identified predicates as a turtle comment
shttl = shttl + "# properties: ";
for (String pred : predicates) {
ValueCellProcessing.validateNameSpace(pred);
shttl = shttl + "[" + pred + "] ";
}
shttl = shttl + "\n";
int processedRows = 0;
int processedTriples = 0;
for (String uri : sheet.keySet()) {
List<PlainTriple> row = sheet.get(uri);
shttl = shttl + processTriplesOfRow(row, predicates);
processedRows++;
processedTriples += row.size();
}
System.out.println(String.format("%d rows processed!", processedRows));
message += Feedback.println(mode, "processed " + processedRows + " row(s) " + "( " + processedTriples + " Triples ).");
SpreadsheetParsingResult result = new SpreadsheetParsingResult(message, shttl);
return result;
}
public static String processTriplesOfRow(List<PlainTriple> triples, List<String> predicates) {
String clttl = "";
boolean bListSubject = false;
Iterator<PlainTriple> iterTriple = triples.iterator();
while(iterTriple.hasNext()){
PlainTriple triple = iterTriple.next();
String cellValue = triple.obj.trim();
String predicate = triple.pred.trim();
if(!bListSubject){
clttl = clttl + ValueCellProcessing.processSubjectValue(triple.sub.trim());
bListSubject = true;
}
// cell has object value
clttl = clttl + " " + predicate + " ";
if (ValueCellProcessing.isObjectSet(cellValue)) {
StringTokenizer st;
if(cellValue.contains("&")){
st = new StringTokenizer(cellValue, "&");
}
else{
st = new StringTokenizer(cellValue, ",");
}
while (st.hasMoreTokens()) {
clttl = clttl + ValueCellProcessing.processObjectValue(st.nextToken().trim());
if (st.hasMoreTokens()){
clttl = clttl + ", ";
}
}
}
else{
clttl = clttl + ValueCellProcessing.processObjectValue(cellValue);
}
if(iterTriple.hasNext()){
clttl = clttl + " ; \n";
}
else{
clttl = clttl + " . \n\n";
}
}
return clttl;
}
public static List<String> getLabKeyMetadataLists(String labkey_site, String user_name,
String password, String path) throws CommandException {
LabkeyDataHandler loader = new LabkeyDataHandler(labkey_site, user_name, password, path);
try {
List<String> queryNames = loader.getMetadataQueryNames(false);
return queryNames;
} catch (CommandException e) {
if(e.getMessage().equals("Unauthorized")){
throw e;
}
}
return null;
}
public static List<String> getLabKeyInstanceDataLists(String labkey_site, String user_name,
String password, String path) throws CommandException {
LabkeyDataHandler loader = new LabkeyDataHandler(labkey_site, user_name, password, path);
try {
List<String> queryNames = loader.getInstanceDataQueryNames();
return queryNames;
} catch (CommandException e) {
if(e.getMessage().equals("Unauthorized")){
throw e;
}
}
return null;
}
public static List<String> getLabKeyFolders(String labkey_site, String user_name,
String password, String path) throws CommandException {
LabkeyDataHandler loader = new LabkeyDataHandler(labkey_site, user_name, password, path);
try {
List<String> folders = loader.getSubfolders();
return folders;
} catch (CommandException e) {
if(e.getMessage().equals("Unauthorized")){
throw e;
}
}
return null;
}
private static String loadTriples(
LabkeyDataHandler loader,
List<String> list_names,
Map< String, Map< String, List<PlainTriple> > > mapSheets,
Map< String, List<String> > mapPreds) throws CommandException {
String message = "";
try {
List<String> queryNames = null;
if(list_names == null){
queryNames = loader.getAllQueryNames();
}
else{
queryNames = new LinkedList<String>();
queryNames.addAll(list_names);
}
for(String query : queryNames){
List<String> cols = loader.getColumnNames(query, false);
if(loader.containsInstanceData(cols) || loader.containsMetaData(cols)){
mapSheets.put(query, loader.selectRows(query, cols));
}
mapPreds.put(query, cols);
}
System.out.println("Data extraction finished...");
} catch (CommandException e) {
if(e.getMessage().equals("Unauthorized")){
throw e;
}
else{
return e.getMessage();
}
}
return message;
}
public static Model importStudy(String labkey_site, String user_name,
String password, String path, String studyUri) throws CommandException {
LabkeyDataHandler loader = new LabkeyDataHandler(labkey_site, user_name, password, path);
Map< String, Map< String, List<PlainTriple> > > mapSheets =
new HashMap< String, Map< String, List<PlainTriple> > >();
Map< String, List<String> > mapPreds =
new HashMap< String, List<String> >();
loadTriples(loader, loader.getAllQueryNames(), mapSheets, mapPreds);
String ttl = NameSpaces.getInstance().printTurtleNameSpaceList();
for(String queryName : mapSheets.keySet()){
Map< String, List<PlainTriple> > sheet = mapSheets.get(queryName);
SpreadsheetParsingResult result = generateTTL(Feedback.WEB, sheet, mapPreds.get(queryName));
ttl = ttl + "\n# concept: " + queryName + result.getTurtle() + "\n";
}
String fileName = "";
try {
fileName = TTL_DIR + "labkey.ttl";
FileUtils.writeStringToFile(new File(fileName), ttl);
} catch (IOException e) {
e.printStackTrace();
}
Model refModel = RDFDataMgr.loadModel(fileName);
Model targetModel = ModelFactory.createDefaultModel();
HashSet<String> visitedNodes = new HashSet<String>();
Selector selector = new SimpleSelector(
refModel.getResource(ValueCellProcessing.replacePrefixEx(studyUri)), (Property)null, (RDFNode)null);
StmtIterator iter = refModel.listStatements(selector);
if (iter.hasNext()) {
Resource studyNode = iter.nextStatement().getSubject();
forwardTraverseGraph(studyNode, visitedNodes, refModel, targetModel);
}
selector = new SimpleSelector(
null, (Property)null, refModel.getResource(ValueCellProcessing.replacePrefixEx(studyUri)));
iter = refModel.listStatements(selector);
if (iter.hasNext()) {
RDFNode studyNode = iter.nextStatement().getObject();
backwardTraverseGraph((Resource)studyNode, visitedNodes, refModel, targetModel);
}
return targetModel;
}
private static void forwardTraverseGraph(Resource node, HashSet<String> visitedNodes,
Model refModel, Model targetModel) {
StmtIterator iter = node.listProperties();
while (iter.hasNext()) {
Statement stmt = iter.nextStatement();
RDFNode object = stmt.getObject();
if (object.isResource()) {
targetModel.add(node, stmt.getPredicate(), object);
if (!visitedNodes.contains(object.toString())) {
visitedNodes.add(node.toString());
forwardTraverseGraph((Resource)object, visitedNodes, refModel, targetModel);
}
}
else {
targetModel.add(node, stmt.getPredicate(), object);
}
}
}
private static void backwardTraverseGraph(Resource node, HashSet<String> visitedNodes,
Model refModel, Model targetModel) {
Selector selector = new SimpleSelector(null, (Property)null, node);
StmtIterator iter = refModel.listStatements(selector);
while (iter.hasNext()) {
Statement stmt = iter.nextStatement();
Resource subject = stmt.getSubject();
targetModel.add(subject, stmt.getPredicate(), node);
forwardTraverseGraph(subject, visitedNodes, refModel, targetModel);
backwardTraverseGraph(subject, visitedNodes, refModel, targetModel);
}
}
public static ParsingResult importDataAcquisition(String labkey_site, String user_name,
String password, String path, String target_study_uri) throws CommandException {
final SysUser user = AuthApplication.getLocalUser(Controller.session());
String ownerUri = UserManagement.getUriByEmail(user.getEmail());
String message = "";
LabkeyDataHandler loader = new LabkeyDataHandler(labkey_site, user_name, password, path);
Map< String, Map< String, List<PlainTriple> > > mapSheets =
new HashMap< String, Map< String, List<PlainTriple> > >();
Map< String, List<String> > mapPreds =
new HashMap< String, List<String> >();
List<String> list_names = new ArrayList<String>();
list_names.add("DataAcquisition");
String ret = loadTriples(loader, list_names, mapSheets, mapPreds);
if(!ret.equals("")){
return new ParsingResult(1, message + ret);
}
String filePath = TTL_DIR + "labkey.ttl";
message += parseTriplesToTTL(Feedback.WEB, filePath, mapSheets, mapPreds);
ParsingResult isValid = verifyTTL(Feedback.WEB, filePath);
message += isValid.getMessage();
if (isValid.getStatus() != 0) {
return new ParsingResult(1, message);
}
for(String queryName : mapSheets.keySet()){
Map< String, List<PlainTriple> > sheet = mapSheets.get(queryName);
for (String uri : sheet.keySet()) {
System.out.println(String.format("Processing data acquisition %s", uri));
String dataAcquisitionUri = ValueCellProcessing.convertToWholeURI(uri);
DataAcquisition dataAcquisition = DataAcquisition.findByUri(dataAcquisitionUri);
if (null == dataAcquisition) {
dataAcquisition = new DataAcquisition();
dataAcquisition.setUri(dataAcquisitionUri);
dataAcquisition.setOwnerUri(ownerUri);
dataAcquisition.setPermissionUri(ownerUri);
dataAcquisition.setTriggeringEvent(TriggeringEvent.INITIAL_DEPLOYMENT);
dataAcquisition.setNumberDataPoints(
Measurement.getNumByDataAcquisition(dataAcquisition));
}
boolean bCanSave = true;
for (PlainTriple triple : sheet.get(uri)) {
String cellValue = triple.obj.trim();
String predicate = triple.pred.trim();
if (predicate.equals("a")) {
StringTokenizer st;
if(cellValue.contains("&")){
st = new StringTokenizer(cellValue, "&");
}
else{
st = new StringTokenizer(cellValue, ",");
}
while (st.hasMoreTokens()) {
dataAcquisition.addTypeUri(
ValueCellProcessing.convertToWholeURI(st.nextToken().trim()));
}
}
else if (predicate.equals("prov:wasAssociatedWith")) {
StringTokenizer st;
if(cellValue.contains("&")){
st = new StringTokenizer(cellValue, "&");
}
else{
st = new StringTokenizer(cellValue, ",");
}
while (st.hasMoreTokens()) {
dataAcquisition.addAssociatedUri(
ValueCellProcessing.convertToWholeURI(st.nextToken().trim()));
}
}
else if (predicate.equals("rdfs:label")) {
dataAcquisition.setLabel(cellValue);
}
else if (predicate.equals("rdfs:comment")) {
dataAcquisition.setComment(cellValue);
}
else if (predicate.equals("prov:startedAtTime")) {
dataAcquisition.setStartedAt(cellValue);
}
else if (predicate.equals("prov:endedAtTime")) {
dataAcquisition.setEndedAt(cellValue);
}
else if (predicate.equals("prov:used")) {
dataAcquisition.setParameter(cellValue);
}
else if (predicate.equals("hasco:isDataAcquisitionOf")) {
String studyUri = ValueCellProcessing.convertToWholeURI(cellValue);
if (!target_study_uri.equals("")) {
if (!studyUri.equals(target_study_uri)) {
bCanSave = false;
break;
}
}
dataAcquisition.setStudyUri(studyUri);
}
else if (predicate.equals("hasco:hasTriggeringEvent")) {
dataAcquisition.setTriggeringEvent(dataAcquisition.getTriggeringEventByName(cellValue));
}
else if (predicate.equals("hasco:hasMethod")) {
dataAcquisition.setMethodUri(ValueCellProcessing.convertToWholeURI(cellValue));
}
else if (predicate.equals("hasco:hasSchema")) {
dataAcquisition.setSchemaUri(ValueCellProcessing.convertToWholeURI(cellValue));
}
else if (predicate.equals("hasneto:hasDeployment")) {
String deployment_uri = ValueCellProcessing.convertToWholeURI(cellValue);
dataAcquisition.setDeploymentUri(deployment_uri);
Deployment deployment = Deployment.find(deployment_uri);
if (deployment != null) {
dataAcquisition.setPlatformUri(deployment.getPlatform().getUri());
dataAcquisition.setInstrumentUri(deployment.getInstrument().getUri());
dataAcquisition.setPlatformName(deployment.getPlatform().getLabel());
dataAcquisition.setInstrumentModel(deployment.getInstrument().getLabel());
dataAcquisition.setStartedAtXsdWithMillis(deployment.getStartedAt());
}
}
}
if (bCanSave == false || dataAcquisition.getStartedAt() == null || dataAcquisition.getStartedAt().isEmpty()
|| dataAcquisition.getDeploymentUri() == null || dataAcquisition.getDeploymentUri().isEmpty()
|| dataAcquisition.getSchemaUri() == null || dataAcquisition.getSchemaUri().isEmpty()) {
continue;
}
dataAcquisition.save();
System.out.println("Successfully saved " + dataAcquisition.getUri() + " in Solr");
}
}
return new ParsingResult(0, message);
}
private static ParsingResult verifyTTL(int mode, String filePath) {
String listing = "";
try {
listing = URLEncoder.encode(SpreadsheetProcessing.printFileWithLineNumber(mode, filePath), "UTF-8");
} catch (UnsupportedEncodingException e) {
e.printStackTrace();
};
String message = "";
message += Feedback.println(mode, " ");
message += Feedback.println(mode, " Generated " + filePath + " and stored locally.");
try {
RDFDataMgr.loadModel(filePath);
message += Feedback.println(mode, " ");
message += Feedback.print(mode, "SUCCESS parsing the document!");
message += Feedback.println(mode, " ");
message += Feedback.println(mode, "==== TURTLE (TTL) CODE GENERATED FROM LABKEY ====");
message += listing;
} catch (Exception e) {
message += Feedback.println(mode, " ");
message += Feedback.print(mode, "ERROR parsing the document!");
message += Feedback.println(mode, " ");
message += e.getMessage();
message += Feedback.println(mode, " ");
message += Feedback.println(mode, " ");
message += Feedback.println(mode, "==== TURTLE (TTL) CODE GENERATED FROM LABKEY ====");
message += listing;
return new ParsingResult(1, message);
}
return new ParsingResult(0, message);
}
private static String parseTriplesToTTL(int mode, String filePath,
Map< String, Map< String, List<PlainTriple> > > mapSheets,
Map< String, List<String> > mapPreds) {
String message = "";
message += Feedback.println(mode, " Parsing triples from LABKEY " );
message += Feedback.println(mode, " ");
String ttl = NameSpaces.getInstance().printTurtleNameSpaceList();
for(String queryName : mapSheets.keySet()) {
Map< String, List<PlainTriple> > sheet = mapSheets.get(queryName);
message += Feedback.print(mode, " Processing sheet " + queryName + " () ");
for (int i = queryName.length(); i < 25; i++) {
message += Feedback.print(mode, ".");
}
SpreadsheetParsingResult result = generateTTL(mode, sheet, mapPreds.get(queryName));
ttl = ttl + "\n# concept: " + queryName + result.getTurtle() + "\n";
message += result.getMessage();
}
try {
FileUtils.writeStringToFile(new File(filePath), ttl);
} catch (IOException e) {
message += e.getMessage();
return message;
}
return message;
}
public static String generateTTL(int mode, String oper, RDFContext rdf, String labkey_site,
String user_name, String password, String path, List<String> list_names) throws CommandException {
String message = "";
if (oper.equals("load")) {
message += Feedback.println(mode, " Triples before loading from LABKEY: " + rdf.totalTriples());
message += Feedback.println(mode, " ");
}
LabkeyDataHandler loader = new LabkeyDataHandler(labkey_site, user_name, password, path);
Map< String, Map< String, List<PlainTriple> > > mapSheets =
new HashMap< String, Map< String, List<PlainTriple> > >();
Map< String, List<String> > mapPreds =
new HashMap< String, List<String> >();
String ret = loadTriples(loader, list_names, mapSheets, mapPreds);
if(!ret.equals("")) {
return (message + ret);
}
String filePath = TTL_DIR + "labkey.ttl";
message += parseTriplesToTTL(mode, filePath, mapSheets, mapPreds);
message += verifyTTL(mode, filePath).getMessage();
if (oper.equals("load")) {
message += Feedback.print(mode, " Uploading generated file.");
rdf.loadLocalFile(mode, filePath, KB_FORMAT);
message += Feedback.println(mode, "");
message += Feedback.println(mode, " ");
message += Feedback.println(mode, " Triples after [loading from LABKEY]: " + rdf.totalTriples());
}
return message;
}
public static String processTTL(int mode, String oper, RDFContext rdf, String fileName) {
String message = "";
if (oper.equals("load")) {
message += Feedback.println(mode, " Triples before loading from LABKEY: " + rdf.totalTriples());
message += Feedback.println(mode, " ");
}
String listing = "";
try {
listing = URLEncoder.encode(SpreadsheetProcessing.printFileWithLineNumber(mode, fileName), "UTF-8");
} catch (UnsupportedEncodingException e) {
e.printStackTrace();
};
message += Feedback.println(mode, " ");
message += Feedback.println(mode, " Generated " + fileName + " and stored locally.");
try {
RDFDataMgr.loadModel(fileName);
message += Feedback.println(mode, " ");
message += Feedback.print(mode, "SUCCESS parsing the document!");
message += Feedback.println(mode, " ");
message += Feedback.println(mode, "==== TURTLE (TTL) FILE CONTENT ====");
message += listing;
} catch (Exception e) {
message += Feedback.println(mode, " ");
message += Feedback.print(mode, "ERROR parsing the document!");
message += Feedback.println(mode, " ");
message += e.getMessage();
message += Feedback.println(mode, " ");
message += Feedback.println(mode, " ");
message += Feedback.println(mode, "==== TURTLE (TTL) FILE CONTENT ====");
message += listing;
return message;
}
message += Feedback.print(mode, " Uploading generated file.");
rdf.loadLocalFile(mode, fileName, KB_FORMAT);
message += Feedback.println(mode, "");
message += Feedback.println(mode, " ");
message += Feedback.println(mode, " Triples after [loading]: " + rdf.totalTriples());
return message;
}
}