/* * This software is Copyright 2005,2006,2007,2008 Langdale Consultants. * Langdale Consultants can be contacted at: http://www.langdale.com.au */ package au.com.langdale.profiles; import java.io.PrintWriter; import java.util.HashMap; import java.util.Map; import org.apache.poi.hssf.usermodel.HSSFCell; import org.apache.poi.hssf.usermodel.HSSFRow; import org.apache.poi.hssf.usermodel.HSSFSheet; import org.apache.poi.hssf.usermodel.HSSFWorkbook; import au.com.langdale.kena.Composition; import au.com.langdale.kena.OntModel; import au.com.langdale.kena.OntResource; import au.com.langdale.kena.Resource; import au.com.langdale.kena.ResourceFactory; import au.com.langdale.profiles.ProfileClass.PropertyInfo; import au.com.langdale.util.Logger; import au.com.langdale.util.NSMapper; import com.hp.hpl.jena.vocabulary.OWL; import com.hp.hpl.jena.vocabulary.RDF; /** * Extract a profile model from a spreadsheet. The POI library is used * to read the spreadsheet and POI classes represent the spreadsheet, worksheets, * rows and cells. * <p> * Usage: * <ol> * <li>Instantiate with a spreadsheet object and a base model. * <li>Create a number of CellSpec objects and pass each to scanCells() * <li>Invoke reorganize() * <li>Retrieve the result. */ public class SpreadsheetParser { private static Resource CLASS = ResourceFactory.createResource(OWL.Class); private static Resource OBJECT_PROPERTY = ResourceFactory.createResource(OWL.ObjectProperty); private static Resource PROPERTY = ResourceFactory.createResource(RDF.Property); /** * Represents a Row or Sheet number. */ public static class IndexNum { private int value; public int toInt() { return value; } public IndexNum set(int value) { this.value = value; return this; } public IndexNum set(String value) { this.value = Integer.parseInt(value) - 1; if( this.value < 0) throw new NumberFormatException(); return this; } @Override public String toString() { return String.valueOf(value + 1); } } /** * Represents a column letter. */ public static class ColNum { private short value; public short toShort() { return value; } public ColNum set(int value) { this.value = (short)value; return this; } public ColNum set(String value) { this.value = (short) (Integer.parseInt(value, 36) - 10); if( this.value < 0 || this.value >= 26 ) throw new NumberFormatException(); return this; } @Override public String toString() { return Integer.toString(value+10, 36); } } /** * Represents a group of cells from which profile information can be extracted. */ public static abstract class CellSpec { public final IndexNum sheetNo = new IndexNum(); public final IndexNum firstRow = new IndexNum(); public final ColNum classCol = new ColNum(); public final ColNum propCol = new ColNum(); public final ColNum flagCol = new ColNum(); public final ColNum cardCol = new ColNum(); public final ColNum nsCol = new ColNum(); public abstract void handleRow(SpreadsheetParser context, HSSFRow row) throws ParseProblem; } /** * Represents a group of cells from which profile class definitions can be extracted. */ public static class ClassCellSpec extends CellSpec { @Override public void handleRow(SpreadsheetParser context, HSSFRow row) throws ParseProblem { if( ! isTrue(row, flagCol)) return; context.getProfileFor(getLocalName(row, classCol)); } } /** * Represents a group of cells from which DatatypeProperty definitions can be extracted. */ public static class AttribCellSpec extends CellSpec { @Override public void handleRow(SpreadsheetParser context, HSSFRow row) throws ParseProblem { if( ! isTrue(row, flagCol)) return; ProfileClass profile = context.getProfileFor(getLocalName(row, classCol)); context.addProperty(profile, getLocalName(row, propCol), PROPERTY); } } /** * Represents a group of cells from which ObjectProperty definitions can be extracted. */ public static class AssocCellSpec extends CellSpec { @Override public void handleRow(SpreadsheetParser context, HSSFRow row) throws ParseProblem { if( ! isTrue(row, flagCol)) return; ProfileClass profile = context.getProfileFor(getLocalName(row, classCol)); OntResource prop = context.addProperty(profile, getLocalName(row, propCol), OBJECT_PROPERTY); context.setCardinality(profile, prop, getString(row, cardCol)); } } /** * An exception used internally to propagate errors to the Logger. */ public static class ParseProblem extends Exception { private static final long serialVersionUID = -1913955957664239443L; public ParseProblem(String message) { super(message); } } private String namespace; private OntModel model; private HSSFWorkbook book; private OntModel background; private OntModel result; private Map profiles = new HashMap(); private NSMapper mapper; private Logger logger; /** * Instantiate * @param book: the spreadsheet * @param background: the base model to which the profile will apply * @param namespace: the namespace for newly created profile defintions * @param logger: destination for error messages */ public SpreadsheetParser(HSSFWorkbook book, OntModel result, OntModel background, String namespace, Logger logger) { this.namespace = namespace; this.book = book; this.result = result; this.background = background; mapper = new NSMapper(background); model = Composition.merge(result, background); this.logger = logger; } /** * @return: the extracted profile model */ public OntModel getResult() { return result; } /** * Scan a portion of the spreadsheet and extract profile information. * @param spec: indicates the part of the spreadsheet to scan * and the type of profile information expected. */ public void scanCells(CellSpec spec) { HSSFSheet sheet = book.getSheetAt(spec.sheetNo.toInt()); int rownum = spec.firstRow.toInt(); for( ;; ) { HSSFRow row = sheet.getRow(rownum++); if( row == null) break; try { spec.handleRow(this, row); } catch (ParseProblem e) { log(e, spec.sheetNo.toString(), new IndexNum().set(rownum).toString()); //print(row, spec); } } } /** * Complete the profile and a apply RDFS profile design rules. * This is called once all cells of interest have been scanned. */ public void reorganize() { Reorganizer utility = new Reorganizer(result, background, true); utility.run(); result = utility.getResult(); } private ProfileClass getProfileFor(String name) throws ParseProblem { String uri = namespace + name; // construct a profile class URI ProfileClass profile = (ProfileClass) profiles.get(uri); if( profile == null) { Resource base = mapper.map(name, CLASS); // construct a base class URI if( base == null ) throw new ParseProblem("undefined class: " + name); profile = new ProfileClass(model.createClass(uri), namespace, model.createResource(base.asNode())); profiles.put(uri, profile); } return profile; } private OntResource addProperty(ProfileClass profile, String name, Resource type) throws ParseProblem { String qualified = profile.getBaseClass().getLocalName() + "." + name; Resource base = mapper.map(qualified, type); // construct a base property URI if( base == null ) throw new ParseProblem("undefined "+ type.asNode().getLocalName() + " : " + qualified); OntResource prop = model.createResource(base.getURI()); profile.createAllValuesFrom(prop, false); return prop; } private void setCardinality(ProfileClass profile, OntResource prop, String card) { PropertyInfo info = profile.getPropertyInfo(prop); if( card.startsWith("1")) info.setMinCardinality(1); if( card.endsWith("1")) info.setMaxCardinality(1); } private static boolean isTrue(HSSFRow row, ColNum colnum) { HSSFCell cell = row.getCell(colnum.toShort()); if( cell == null ) return false; return cell.toString().trim().equals("TRUE"); } private static String getString(HSSFRow row, ColNum colnum) throws ParseProblem { HSSFCell cell = row.getCell(colnum.toShort()); if( cell == null ) throw new ParseProblem("cell " + colnum + " is empty"); return cell.toString().trim(); } private static String getLocalName(HSSFRow row, ColNum colnum) throws ParseProblem { String raw = getString(row, colnum); return raw; // TODO: process as NCNAME } public static void print(PrintWriter out, HSSFRow row, CellSpec spec) { printCell(out, row, spec.nsCol); printCell(out, row, spec.classCol); printCell(out, row, spec.propCol); printCell(out, row, spec.cardCol); printCell(out, row, spec.flagCol); out.println(); } public static void printCell(PrintWriter out, HSSFRow row, ColNum index) { if(row == null) return; HSSFCell cell = row.getCell(index.toShort()); if( cell == null) out.print(" null"); else out.print(" " + cell.toString()); } void log(ParseProblem error, String sheet, String row) { logger.log("Sheet: " + sheet + " Row: " + row + " - " + error.getMessage()); } }