package eu.esdihumboldt.hale.io.codelist.skos.reader;
import java.io.IOException;
import java.io.InputStream;
import java.io.StringReader;
import java.net.URI;
import java.util.ArrayList;
import java.util.Collection;
import java.util.LinkedHashMap;
import java.util.Locale;
import java.util.Map;
import java.util.Set;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import org.semanticweb.skos.SKOSAnnotation;
import org.semanticweb.skos.SKOSConcept;
import org.semanticweb.skos.SKOSConceptScheme;
import org.semanticweb.skos.SKOSDataset;
import org.semanticweb.skos.SKOSEntity;
import org.semanticweb.skos.SKOSUntypedLiteral;
import org.semanticweb.skosapibinding.SKOSManager;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.NamedNodeMap;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.EntityResolver;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import de.fhg.igd.slf4jplus.ALogger;
import de.fhg.igd.slf4jplus.ALoggerFactory;
import eu.esdihumboldt.hale.common.codelist.CodeList;
/**
* Reads a SKOS code list and treat concepts as code entries
*
* @author Arun
*/
public class SkosCodeList implements CodeList {
private static final ALogger log = ALoggerFactory.getLogger(SkosCodeList.class);
private static final String SKOS_URI = "http://www.w3.org/2004/02/skos/core#";
private static final String SKOS_PREF_LABEL = SKOS_URI + "prefLabel";
private static final String SKOS_DEF_LABEL = SKOS_URI + "definition";
private static final String SKOS_TOPCONCEPT_LABEL = SKOS_URI + "topConceptOf";
private static final String SKOS_INSCHEME_LABEL = SKOS_URI + "inScheme";
private static final String USAGENOTE_LABEL = "usageNote";
private static final String ATTRIBUTE_LANGUAGE = "xml:lang";
private String identifier;
private String namespace;
private String description;
private final URI location;
private final Map<String, CodeEntry> entriesByName = new LinkedHashMap<String, CodeEntry>();
private final Map<String, CodeEntry> entriesByIdentifier = new LinkedHashMap<String, CodeEntry>();
private SKOSDataset dataSet;
private final String language;
private static final String DEFAULT_LANGUAGE = Locale.US.getLanguage();
/**
* Create a code list from a RDF file and URL.
*
* @param in input stream of source
* @param location the location from where code list loaded
* @param language language of the concepts
* @throws Exception if something will go wrong
*/
public SkosCodeList(InputStream in, URI location, String language) throws Exception {
this.location = location;
this.identifier = null;
this.language = language;
try {
SKOSManager manager = new SKOSManager();
dataSet = manager.loadDatasetFromPhysicalURI(location);
// get ConceptSchemes
if (!loadConceptScheme())
if (!loadConcepts()) {
if (!loadConceptsAsXML(in)) {
throw new RuntimeException("no concept found!");
}
}
this.dataSet = null;
} catch (Exception ex) {
log.error("Error reading skos code list", ex);
throw ex;
}
}
@Override
public Collection<CodeEntry> getEntries() {
return new ArrayList<CodeEntry>(entriesByIdentifier.values());
}
@Override
public String getNamespace() {
return namespace;
}
@Override
public String getIdentifier() {
if (identifier != null)
return identifier;
if (location != null)
return location.toString();
return null;
}
@Override
public String getDescription() {
return description;
}
@Override
public CodeEntry getEntryByName(String name) {
return entriesByName.get(name);
}
@Override
public CodeEntry getEntryByIdentifier(String identifier) {
return entriesByIdentifier.get(identifier);
}
@Override
public URI getLocation() {
return location;
}
private boolean loadConceptScheme() {
Set<SKOSConceptScheme> schemes = dataSet.getSKOSConceptSchemes();
if (schemes.isEmpty())
return false;
// get Scheme from uri
SKOSConceptScheme scheme = schemes.iterator().next();
// get annotation of ConceptScheme
handleConceptSchemeNode(scheme);
// i can get all the concepts from this scheme
loadConcepts(scheme);
return true;
}
private boolean loadConcepts() {
return loadConcepts(null);
}
private boolean loadConcepts(SKOSConceptScheme scheme) {
Set<SKOSConcept> concepts;
if (scheme == null)
concepts = dataSet.getSKOSConcepts();
else {
// get Concepts of Scheme
concepts = scheme.getConceptsInScheme(dataSet);
// If isEmpty, then try to load from dataSet
if (concepts.isEmpty())
concepts = dataSet.getSKOSConcepts();
}
if (concepts.isEmpty())
return false;
for (SKOSConcept conceptsInScheme : concepts) {
// System.err.println("\tConcepts: " + conceptsInScheme.getURI());
// get Annotation of Concept
addConcept(conceptsInScheme);
}
return true;
}
private void handleConceptSchemeNode(SKOSEntity entity) {
String namespace = null;
String description = null;
String identifier = null;
String usageNote = null;
for (SKOSAnnotation anno : entity.getSKOSAnnotations(dataSet)) {
// System.err.print("\t\tAnnotation: " + anno.getURI() + "-> ");
if (anno.isAnnotationByConstant()) {
if (!anno.getAnnotationValueAsConstant().isTyped()) {
SKOSUntypedLiteral con = anno.getAnnotationValueAsConstant()
.getAsSKOSUntypedLiteral();
if (isDefinition(anno.getURI().toString())) {
description = con.getLiteral();
}
else if (isUsageNote(anno.getURI().toString())) {
usageNote = con.getLiteral();
}
}
}
}
namespace = entity.getURI().toString();
identifier = namespace;
if (description != null && usageNote != null)
description += "\n\n" + usageNote;
this.namespace = namespace;
this.description = description;
this.identifier = identifier;
}
private void addConcept(SKOSEntity entity) {
String namespace = null;
String name = null;
String description = null;
String usageNote = null;
String identifier = null;
String topConcept = null;
boolean prefLanguageNameAvailable = false;
boolean prefLanguageDefinitionAvailable = false;
for (SKOSAnnotation anno : entity.getSKOSAnnotations(dataSet)) {
// System.err.print("\t\tAnnotation: " + anno.getURI() + "-> ");
if (anno.isAnnotationByConstant()) {
if (!anno.getAnnotationValueAsConstant().isTyped()) {
SKOSUntypedLiteral con = anno.getAnnotationValueAsConstant()
.getAsSKOSUntypedLiteral();
if (isPrefLabel(anno.getURI().toString())) {
if (con.hasLang()) {
if (con.getLang().equals(this.language)) {
name = con.getLiteral();
prefLanguageNameAvailable = true;
}
else if (!prefLanguageNameAvailable
&& con.getLang().equals(DEFAULT_LANGUAGE)) {
name = con.getLiteral();
prefLanguageNameAvailable = true;
}
else if (!prefLanguageNameAvailable) {
name = con.getLiteral();
}
}
else if (!prefLanguageNameAvailable) {
name = con.getLiteral();
}
}
else if (isDefinition(anno.getURI().toString())) {
if (con.hasLang()) {
if (con.getLang().equals(this.language)) {
description = con.getLiteral();
prefLanguageDefinitionAvailable = true;
}
else if (!prefLanguageDefinitionAvailable
&& con.getLang().equals(DEFAULT_LANGUAGE)) {
description = con.getLiteral();
prefLanguageDefinitionAvailable = true;
}
else if (!prefLanguageDefinitionAvailable) {
description = con.getLiteral();
}
}
else if (!prefLanguageDefinitionAvailable) {
description = con.getLiteral();
}
}
else if (isTopConcept(anno.getURI().toString())) {
topConcept = con.getLiteral();
}
else if (isUsageNote(anno.getURI().toString())) {
usageNote = con.getLiteral();
}
}
}
}
if (this.namespace == null)
this.namespace = topConcept;
namespace = entity.getURI().toString();
identifier = entity.getURI().toString();
if (description != null && usageNote != null)
description += "\n\n" + usageNote;
if (name != null && description != null) {
CodeEntry entry = new CodeEntry(name, description, identifier, namespace);
this.entriesByName.put(name, entry);
this.entriesByIdentifier.put(identifier, entry);
}
}
private boolean isPrefLabel(String uri) {
return SKOS_PREF_LABEL.equals(uri);
}
private boolean isDefinition(String uri) {
return SKOS_DEF_LABEL.equals(uri);
}
private boolean isUsageNote(String uri) {
return uri.endsWith(USAGENOTE_LABEL);
}
private boolean isTopConcept(String uri) {
return SKOS_TOPCONCEPT_LABEL.equals(uri) || SKOS_INSCHEME_LABEL.equals(uri);
}
@SuppressWarnings("unused")
private String extractIdentifier(String uri) {
String id;
id = uri.substring(uri.lastIndexOf("/") + 1, uri.length());
return id;
}
/**
* @see Object#hashCode()
*/
@Override
public int hashCode() {
final int prime = 31;
int result = 1;
result = prime * result + ((identifier == null) ? 0 : identifier.hashCode());
result = prime * result + ((location == null) ? 0 : location.hashCode());
result = prime * result + ((namespace == null) ? 0 : namespace.hashCode());
return result;
}
/**
* @see Object#equals(Object)
*/
@Override
public boolean equals(Object obj) {
if (this == obj)
return true;
if (obj == null)
return false;
if (getClass() != obj.getClass())
return false;
SkosCodeList other = (SkosCodeList) obj;
if (identifier == null) {
if (other.identifier != null)
return false;
}
else if (!identifier.equals(other.identifier))
return false;
if (location == null) {
if (other.location != null)
return false;
}
else if (!location.equals(other.location))
return false;
if (namespace == null) {
if (other.namespace != null)
return false;
}
else if (!namespace.equals(other.namespace))
return false;
return true;
}
private boolean loadConceptsAsXML(InputStream in) throws Exception {
String namespace = null;
String name = null;
String description = null;
String identifier = null;
final DocumentBuilderFactory builderFactory = DocumentBuilderFactory.newInstance();
try {
DocumentBuilder builder = builderFactory.newDocumentBuilder();
builder.setEntityResolver(new EntityResolver() {
@Override
public InputSource resolveEntity(String publicId, String systemId)
throws SAXException, IOException {
return new InputSource(new StringReader("")); //$NON-NLS-1$
}
});
Document doc = builder.parse(in);
// read scheme
NodeList listOfConceptScheme = doc.getElementsByTagName("skos:conceptScheme");
if (listOfConceptScheme != null) {
// will read first Concept scheme
Node conceptSchemeNode = listOfConceptScheme.item(0);
if (conceptSchemeNode != null
&& conceptSchemeNode.getNodeType() == Node.ELEMENT_NODE) {
Element conceptScheme = (Element) conceptSchemeNode;
this.namespace = conceptScheme.getAttribute("rdf:about");
this.identifier = namespace;
NodeList children = conceptScheme.getChildNodes();
for (int j = 0; j < children.getLength(); j++) {
Node nd = children.item(j);
if (nd != null) {
String nodeName = nd.getNodeName();
if (nodeName.equals("skos:definition")) {
this.description = nd.getNodeValue();
}
else if (this.description == null && nodeName.endsWith("description")) {
this.description = nd.getNodeValue();
}
}
}
}
}
NodeList listOfConcepts = doc.getElementsByTagName("skos:concept");
int totalConcepts = listOfConcepts.getLength();
if (totalConcepts == 0)
return false;
boolean prefLanguageNameAvailable = false;
boolean prefLanguageDefinitionAvailable = false;
for (int i = 0; i < listOfConcepts.getLength(); i++) {
Node conceptNode = listOfConcepts.item(i);
if (conceptNode != null && conceptNode.getNodeType() == Node.ELEMENT_NODE) {
prefLanguageNameAvailable = false;
prefLanguageDefinitionAvailable = false;
Element concept = (Element) conceptNode;
namespace = concept.getAttribute("rdf:about");
identifier = namespace;
NodeList children = concept.getChildNodes();
for (int j = 0; j < children.getLength(); j++) {
Node nd = children.item(j);
if (nd != null) {
String nodeName = nd.getNodeName();
if (nodeName.equals("skos:prefLabel")) {
if (isLanguageAttributeAvailable(nd)) {
if (getLanguageAttribute(nd).equals(this.language)) {
name = nd.getFirstChild().getNodeValue();
prefLanguageNameAvailable = true;
}
else if (!prefLanguageNameAvailable
&& getLanguageAttribute(nd).equals(DEFAULT_LANGUAGE)) {
name = nd.getFirstChild().getNodeValue();
prefLanguageNameAvailable = true;
}
else if (!prefLanguageNameAvailable) {
name = nd.getFirstChild().getNodeValue();
}
}
else if (!prefLanguageNameAvailable) {
name = nd.getFirstChild().getNodeValue();
}
}
else if (nodeName.equals("skos:definition")) {
if (isLanguageAttributeAvailable(nd)) {
if (getLanguageAttribute(nd).equals(this.language)) {
description = nd.getFirstChild().getNodeValue();
prefLanguageDefinitionAvailable = true;
}
else if (!prefLanguageDefinitionAvailable
&& getLanguageAttribute(nd).equals(DEFAULT_LANGUAGE)) {
description = nd.getFirstChild().getNodeValue();
prefLanguageDefinitionAvailable = true;
}
else if (!prefLanguageDefinitionAvailable) {
description = nd.getFirstChild().getNodeValue();
}
}
else if (!prefLanguageDefinitionAvailable) {
description = nd.getFirstChild().getNodeValue();
}
}
else if (nodeName.equals("skos:topConceptOf")) {
if (nd.hasChildNodes()) {
this.namespace = nd.getFirstChild().getNodeValue();
}
}
else if (description == null && nodeName.endsWith("description")) {
if (nd.getChildNodes().getLength() != 0)
description = nd.getFirstChild().getNodeValue();
else
description = "";
}
}
}
if (name != null) {
CodeEntry entry = new CodeEntry(name, description, identifier, namespace);
this.entriesByName.put(name, entry);
this.entriesByIdentifier.put(identifier, entry);
}
name = null;
description = null;
identifier = null;
namespace = null;
}
} // end of for loop
} catch (Exception e) {
log.error("Error while reading skos code list as XML", e); //$NON-NLS-1$
throw e;
}
return true;
}
private String getLanguageAttribute(Node node) {
NamedNodeMap attrs = node.getAttributes();
return attrs.getNamedItem(ATTRIBUTE_LANGUAGE).getNodeValue();
}
private boolean isLanguageAttributeAvailable(Node node) {
if (node.hasAttributes()) {
NamedNodeMap attrs = node.getAttributes();
if (attrs == null)
return false;
if (attrs.getNamedItem(ATTRIBUTE_LANGUAGE) == null)
return false;
return true;
}
else
return false;
}
}