/* * Copyright 2012-2013 Ontology Engineering Group, Universidad Polit�cnica de Madrid, Spain * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package widoco; import java.io.ByteArrayInputStream; import java.io.IOException; import java.io.StringWriter; import java.net.URLDecoder; import java.util.HashMap; import java.util.Properties; import java.util.logging.Level; import java.util.logging.Logger; import javax.xml.parsers.DocumentBuilder; import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.parsers.ParserConfigurationException; import javax.xml.transform.OutputKeys; import javax.xml.transform.Transformer; import javax.xml.transform.TransformerException; import javax.xml.transform.TransformerFactory; import javax.xml.transform.dom.DOMSource; import javax.xml.transform.stream.StreamResult; import org.w3c.dom.DOMException; import org.w3c.dom.Document; import org.w3c.dom.Node; import org.w3c.dom.NodeList; import org.xml.sax.SAXException; /** * Class made for parsing and manipulating LODE's html. * This class contains most of the TemplateGeneratorOLD class * @author Daniel Garijo */ public class LODEParser { private final HashMap<String,String> replacements; //replace lode's ids with the classes and properties. //this will allow navigating the document properly. It might be troublesome if a class is names as a prop. private String classes; private String classList; private String properties; private String propertyList; private String dataProp; private String dataPropList; private String annotationProp; private String annotationPropList; private String namedIndividuals; private String namedIndividualList; private final HashMap <String,String> namespaceDeclarations; Configuration c; /** * Constructor for the lode parser. The reason for creating this class is that * I don't want to edit LODE's xls file, and I only want to reuse certain parts. * @param lodeContent text obtained as a response from LODE. * @param c configuration object * @param langFile language file to do proper annotations of classes, props, etc. */ public LODEParser(String lodeContent, Configuration c, Properties langFile) { replacements = new HashMap<String, String>(); namespaceDeclarations = new HashMap<String, String>(); this.c = c; parse(lodeContent, langFile); } public String getClassList() { return classList; } public String getClasses() { return classes; } public String getDataProp() { return dataProp; } public String getDataPropList() { return dataPropList; } public String getProperties() { return properties; } public String getPropertyList() { return propertyList; } public HashMap<String, String> getNamespaceDeclarations() { return namespaceDeclarations; } public String getAnnotationProp() { return annotationProp; } public String getAnnotationPropList() { return annotationPropList; } public String getNamedIndividuals() { return namedIndividuals; } public String getNamedIndividualList() { return namedIndividualList; } private void parse(String content, Properties langFile){ try { DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance(); DocumentBuilder db = dbf.newDocumentBuilder(); Document doc = db.parse(new ByteArrayInputStream(content.getBytes("UTF-8")));//StandardCharsets.UTF_8 NodeList html = doc.getElementsByTagName("div"); // String cList = "", pList= "", dPList= "", c= "", p= "", dp=""; for(int i = 0; i<html.getLength();i++){ String attrID = html.item(i).getAttributes().item(0).getTextContent(); if(attrID.equals("classes")){ classList = getTermList(html.item(i)); classes = nodeToString(html.item(i)); classes = classes.replace("<h2>"+langFile.getProperty(Constants.LANG_CLASSES)+"</h2>", "<h3 id=\"classes\" class=\"list\">"+langFile.getProperty(Constants.LANG_CLASSES)+"</h3>"); } else if(attrID.equals("objectproperties")){ propertyList =getTermList(html.item(i)); properties = (nodeToString(html.item(i))); properties = properties.replace("<h2>"+langFile.getProperty(Constants.LANG_OBJ_PROP)+"</h2>", "<h3 id=\"properties\" class=\"list\">"+langFile.getProperty(Constants.LANG_OBJ_PROP)+"</h3>"); } else if(attrID.equals("dataproperties")){ dataPropList = (getTermList(html.item(i))); dataProp = (nodeToString(html.item(i))); dataProp = dataProp.replace("<h2>"+langFile.getProperty(Constants.LANG_DATA_PROP)+"</h2>", "<h3 id=\"dataproperties\" class=\"list\">"+langFile.getProperty(Constants.LANG_DATA_PROP)+"</h3>"); } else if(attrID.equals("annotationproperties")){ annotationPropList = (getTermList(html.item(i))); annotationProp = (nodeToString(html.item(i))); annotationProp = annotationProp.replace("<h2>"+langFile.getProperty(Constants.LANG_ANN_PROP)+"</h2>", "<h3 id=\"annotationproperties\" class=\"list\">"+langFile.getProperty(Constants.LANG_ANN_PROP)+"</h3>"); } else if(attrID.equals("namedindividuals")){ namedIndividualList = (getTermList(html.item(i))); namedIndividuals = (nodeToString(html.item(i))); namedIndividuals = namedIndividuals.replace("<h2>"+langFile.getProperty(Constants.LANG_NAMED_INDIV)+"</h2>", "<h3 id=\"namedindividuals\" class=\"list\">"+langFile.getProperty(Constants.LANG_NAMED_INDIV)+"</h3>"); } else if(attrID.equals("namespacedeclarations")){ Node namespace = html.item(i); //<dt> prefix </dt> <dd>namespace</dd> try{ NodeList dl = namespace.getChildNodes().item(1).getChildNodes();//first node is h2. second is dl int j = 0; while(j<dl.getLength()){ String key = dl.item(j).getTextContent(); if(dl.item(j).getNodeName().equals("dt")){ String value = dl.item(j+1).getTextContent(); //System.out.println(key+","+value); //there might be duplicate ns. Don't add them if(!namespaceDeclarations.containsValue(value)){ namespaceDeclarations.put(key,value); } } j++; } }catch(Exception e){ System.err.println("Error while retrieving the namespaces from LODE"); } } } //fix ids if(!"".equals(classList)&&classList!=null){ classList = fixIds(classList); classes = fixIds(classes); } if(!"".equals(propertyList) &&propertyList!=null){ propertyList = fixIds(propertyList); properties = fixIds(properties); } if(!"".equals(dataPropList)&& dataPropList!=null){ dataPropList = fixIds(dataPropList); dataProp = fixIds(dataProp); } if(!"".equals(annotationPropList)&& annotationPropList!=null){ annotationPropList = fixIds(annotationPropList); annotationProp = fixIds(annotationProp); } if(!"".equals(namedIndividualList)&& namedIndividualList!=null){ namedIndividualList = fixIds(namedIndividualList); namedIndividuals = fixIds(namedIndividuals); } System.out.println("Parsing Complete!"); } catch (ParserConfigurationException ex) { System.out.println("Exception interpreting the resource: "+ ex.getMessage()); } catch (DOMException ex) { System.out.println("Exception interpreting the resource: "+ ex.getMessage()); } catch (SAXException ex) { Logger.getLogger(LODEParser.class.getName()).log(Level.SEVERE, null, ex); } catch (IOException ex) { Logger.getLogger(LODEParser.class.getName()).log(Level.SEVERE, null, ex); } } private String getTermList(Node n){ NodeList divs = n.getChildNodes(); for(int j = 0; j<divs.getLength(); j++){ if(divs.item(j).getNodeName().equals("ul")){ return(nodeToString(divs.item(j))); } } return null; } private String nodeToString(Node n){ try { TransformerFactory transfac = TransformerFactory.newInstance(); Transformer trans = transfac.newTransformer(); trans.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "yes"); trans.setOutputProperty(OutputKeys.INDENT, "yes"); StringWriter sw = new StringWriter(); StreamResult result = new StreamResult(sw); DOMSource source = new DOMSource(fixAnchor(n)); trans.transform(source, result); return sw.toString(); // String returnValue= sw.toString().replace("\n", ""); // return(returnValue); } catch (IllegalArgumentException ex) { System.err.println("Error while writing to xml "+ex.getMessage()); //ex.printStackTrace(); return null; } catch (TransformerException ex) { System.err.println("Error while writing to xml "+ex.getMessage()); //ex.printStackTrace(); return null; } } //this methods removes the first 2 anchors of the div returned by LODE (they lead to an error). //it also changes the id of the div replacing it with the name found in the anchor //(the second one) private Node fixAnchor(Node nodeToFix) { try{ NodeList outerDiv = nodeToFix.getChildNodes(); for(int i = 0; i<outerDiv.getLength(); i++){ Node currentNode = outerDiv.item(i); if(currentNode.getNodeName().equals("div")){ //NodeList list = nodeToFix.getChildNodes(); Node firstAnchor = currentNode.getFirstChild(); Node secondAnchor = firstAnchor.getNextSibling(); String newID = firstAnchor.getAttributes().getNamedItem("name").getNodeValue(); newID = newID.replace(c.getMainOntology().getNamespaceURI(), ""); try{ //if the URI contains special characters, we must decode them for referencing them properly. newID = URLDecoder.decode(newID, "UTF-8"); }catch(Exception e){ System.err.println("Error when encoding node."); } if (newID.startsWith("#")){ newID = newID.replace("#", ""); }//fix in case the author insert the NS URI without "#" if(secondAnchor.getNodeName().equals("a")){ currentNode.removeChild(secondAnchor); } //we save the the id for derreferencing properly the resource. Note that //if a property has the same name as a Class this could lead to problems replacements.put(currentNode.getAttributes().getNamedItem("id").getNodeValue()+"\"", newID+"\""); //I include the comma at the end so smaller ids don't replace larger ids. (quick fix) //we remove the anchor, which makes an error in the visualization currentNode.removeChild(firstAnchor); } } return nodeToFix; }catch(DOMException ex){ System.err.println("Could not fix node"); return nodeToFix; } } /** * Method to fix the ids generated automatically by LODE with the URIs of the classes and properties. * @param textToBeFixed The input text with the links to be fixed * @return */ private String fixIds(String textToBeFixed){ for (String keyToReplace : replacements.keySet()) { textToBeFixed = textToBeFixed.replace(keyToReplace, replacements.get(keyToReplace)); textToBeFixed = textToBeFixed.replace("<span>:", "<span>"); } return textToBeFixed; } }