package org.hadatac.metadata.loader; import java.io.File; import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.FileReader; import java.io.IOException; import java.io.LineNumberReader; import java.io.UnsupportedEncodingException; import java.net.URLEncoder; import java.text.SimpleDateFormat; import java.util.Date; import org.apache.commons.io.FileUtils; import org.apache.jena.rdf.model.Model; import org.apache.jena.riot.RDFDataMgr; import org.apache.poi.xssf.usermodel.XSSFSheet; import org.apache.poi.xssf.usermodel.XSSFWorkbook; import org.hadatac.metadata.loader.Loader; import org.hadatac.metadata.loader.SheetProcessing; import org.hadatac.metadata.model.SpreadsheetParsingResult; import org.hadatac.utils.Feedback; import org.hadatac.utils.NameSpaces; public class SpreadsheetProcessing { public static final String KB_FORMAT = "text/turtle"; public static final String TTL_DIR = "tmp/ttl/"; public static String printFileWithLineNumber(int mode, String filename) { String str = ""; int lineNumber = 1; LineNumberReader reader = null; String line = null; try { reader = new LineNumberReader(new FileReader(filename)); while ((line = reader.readLine()) != null) { str += Feedback.println(mode, lineNumber++ + " " + line); } reader.close(); } catch (Exception e) { e.printStackTrace(); } return str; } public static String generateTTL(int mode, String oper, RDFContext rdf, String xlsName) { String message = ""; if (oper.equals("load")) { message += Feedback.println(mode, " Triples before [loadXLS]: " + rdf.totalTriples()); message += Feedback.println(mode, " "); } message += Feedback.println(mode, " Parsing spreadsheet " + xlsName); message += Feedback.println(mode, " "); String ttl = ""; //Create Workbook instance holding reference to .xlsx file FileInputStream file; try { file = new FileInputStream(new File(xlsName)); XSSFWorkbook workbook; try { ttl += NameSpaces.getInstance().printTurtleNameSpaceList(); workbook = new XSSFWorkbook(file); //Iterate through workbook's sheets for (int currentSheet=0; currentSheet < workbook.getNumberOfSheets(); currentSheet++) { XSSFSheet sheet = workbook.getSheetAt(currentSheet); message += Feedback.print(mode, " Processing sheet " + sheet.getSheetName() + " "); for (int i = sheet.getSheetName().length(); i < 20; i++) { message += Feedback.print(mode, "."); } SpreadsheetParsingResult result = SheetProcessing.generateTTL(mode, sheet); ttl = ttl + "\n# concept: " + sheet.getSheetName() + result.getTurtle() + "\n"; message += result.getMessage(); } workbook.close(); file.close(); } catch (IOException e) { message += Feedback.println(mode, "[ERROR]: Could not open file " + xlsName + " as an XLS spreadsheet"); return message; //e.printStackTrace(); } } catch (FileNotFoundException e) { message += Feedback.println(mode, "[ERROR]: Could not open file " + xlsName); return message; //e.printStackTrace(); } String fileName = ""; try { String timeStamp = new SimpleDateFormat("yyyyMMdd-HHmmss").format(new Date()); fileName = TTL_DIR + "HASNetO-" + timeStamp + ".ttl"; FileUtils.writeStringToFile(new File(fileName), ttl); } catch (IOException e) { message += e.getMessage(); return message; } String listing = ""; try { listing = URLEncoder.encode(SpreadsheetProcessing.printFileWithLineNumber(mode, fileName), "UTF-8"); //System.out.println(SpreadsheetProcessing.printFileWithLineNumber(mode, fileName)); } catch (UnsupportedEncodingException e1) { e1.printStackTrace(); }; message += Feedback.println(mode, " "); message += Feedback.println(mode, " Generated " + fileName + " and stored locally."); try { Model model = RDFDataMgr.loadModel(fileName); message += Feedback.println(mode, " "); message += Feedback.print(mode, "SUCCESS parsing the document!"); message += Feedback.println(mode, " "); } catch (Exception e) { message += Feedback.println(mode, " "); message += Feedback.print(mode, "ERROR parsing the document!"); message += Feedback.println(mode, " "); message += e.getMessage(); message += Feedback.println(mode, " "); message += Feedback.println(mode, " "); message += Feedback.println(mode, "==== TURTLE (TTL) CODE GENERATED FROM THE SPREADSHEET ===="); message += listing; return message; } if (oper.equals("load")) { message += Feedback.print(mode, " Uploading generated file."); rdf.loadLocalFile(mode, fileName, KB_FORMAT); message += Feedback.println(mode, ""); message += Feedback.println(mode, " "); message += Feedback.println(mode, " Triples after [loadXLS]: " + rdf.totalTriples()); } return message; } }