/*
* #!
* Ontopia Engine
* #-
* Copyright (C) 2001 - 2013 The Ontopia Project
* #-
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
* !#
*/
package net.ontopia.topicmaps.xml;
import java.io.IOException;
import java.io.InputStream;
import java.net.MalformedURLException;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Map;
import net.ontopia.infoset.core.LocatorIF;
import net.ontopia.infoset.impl.basic.URILocator;
import net.ontopia.topicmaps.core.AssociationIF;
import net.ontopia.topicmaps.core.AssociationRoleIF;
import net.ontopia.topicmaps.core.OccurrenceIF;
import net.ontopia.topicmaps.core.ReifiableIF;
import net.ontopia.topicmaps.core.ScopedIF;
import net.ontopia.topicmaps.core.TMObjectIF;
import net.ontopia.topicmaps.core.TopicIF;
import net.ontopia.topicmaps.core.TopicMapBuilderIF;
import net.ontopia.topicmaps.core.TopicMapIF;
import net.ontopia.topicmaps.core.TopicMapImporterIF;
import net.ontopia.topicmaps.core.TopicMapReaderIF;
import net.ontopia.topicmaps.core.TopicMapStoreIF;
import net.ontopia.topicmaps.core.TopicNameIF;
import net.ontopia.topicmaps.core.VariantNameIF;
import net.ontopia.topicmaps.impl.basic.InMemoryTopicMapStore;
import net.ontopia.topicmaps.utils.ClassInstanceUtils;
import net.ontopia.topicmaps.utils.MergeUtils;
import net.ontopia.utils.OntopiaRuntimeException;
import net.ontopia.utils.StringUtils;
import net.ontopia.utils.URIUtils;
import net.ontopia.xml.AbstractXMLFormatReader;
import net.ontopia.xml.DefaultXMLReaderFactory;
import net.ontopia.xml.ValidatingContentHandler;
import org.xml.sax.Attributes;
import org.xml.sax.ContentHandler;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;
/**
* PUBLIC: A reader importing topic maps (or fragments) from the
* TM/XML syntax.
*
* @since 3.1
*/
public class TMXMLReader extends AbstractXMLFormatReader
implements TopicMapReaderIF, TopicMapImporterIF {
public static final String PROPERTY_VALIDATE = "validate";
private LocatorIF base;
private boolean validate;
// --- Constructors
/**
* PUBLIC: Creates a reader reading from the given file name.
*/
public TMXMLReader(String filename) {
this.base = URIUtils.getURI(filename);
this.source = new InputSource(base.getAddress());
this.validate = true;
}
/**
* PUBLIC: Creates a reader reading from the given location.
*/
public TMXMLReader(LocatorIF base) {
this.base = base;
this.source = new InputSource(base.getAddress());
this.validate = true;
}
/**
* PUBLIC: Creates a reader reading from the given location, using a
* different base address.
*/
public TMXMLReader(InputSource source, LocatorIF base) {
this.base = base;
this.source = source;
this.validate = true;
}
// --- Accessors
public boolean getValidate() {
return validate;
}
public void setValidate(boolean validate) {
this.validate = validate;
}
// --- TopicMapReaderIF implementation
public TopicMapIF read() throws IOException {
InMemoryTopicMapStore store = new InMemoryTopicMapStore();
store.setBaseAddress(base);
TopicMapIF topicmap = store.getTopicMap();
importInto(topicmap);
ClassInstanceUtils.resolveAssociations1(topicmap);
ClassInstanceUtils.resolveAssociations2(topicmap);
return topicmap;
}
public void importInto(TopicMapIF topicmap) throws IOException {
// Check that store is ok
TopicMapStoreIF store = topicmap.getStore();
if (store == null)
throw new IOException("Topic map not connected to a store.");
XMLReader parser;
try {
parser = DefaultXMLReaderFactory.createXMLReader();
} catch (SAXException e) {
throw new IOException("Problems occurred when creating SAX2 XMLReader: " +
e.getMessage());
}
// Register content handlers
ContentHandler handler = new TMXMLContentHandler(topicmap, base);
if (validate)
handler = new ValidatingContentHandler(handler, getTMXMLSchema(), true);
parser.setContentHandler(handler);
// Parse input source
try {
parser.parse(source);
} catch (SAXException e) {
if (e.getException() instanceof IOException)
throw (IOException) e.getException();
throw new OntopiaRuntimeException(e);
//throw new IOException("XML related problem: " + e.toString());
}
}
public Collection readAll() throws IOException {
return Collections.singleton(read());
}
// --- ContentHandler
// constants for state
private static final int START = 0; // before document element
private static final int TOP = 1; // inside doc elem, outside topic
private static final int TOPIC = 2; // inside topic
private static final int IDENTIFIER = 3; // inside tm:identifier
private static final int MAYBETOPICNAME = 4; // inside characteristic
private static final int BASENAME = 5; // ...
private static final int TOPICNAME = 6;
private static final int VARIANT = 7;
private static final int ASSOCIATION = 8;
private static final int ROLE = 9;
final class TMXMLContentHandler extends AbstractTopicMapContentHandler {
private Map nsprefixes;
private TopicMapIF topicmap;
private int state;
private TopicMapBuilderIF builder;
private TopicIF topic;
private StringBuilder buffer;
private TopicNameIF basename;
private AssociationIF association;
private boolean isuri;
private String reifier;
private TopicIF type; // used for topic names only
// carryovers from start tag to object creation
private Collection scope;
private String datatype;
public TMXMLContentHandler(TopicMapIF topicmap, LocatorIF base) {
super(base);
this.topicmap = topicmap;
this.builder = topicmap.getBuilder();
this.state = START;
this.buffer = new StringBuilder();
this.nsprefixes = new HashMap();
}
public void startElement(String uri, String name, String qName,
Attributes atts)
throws SAXException {
//System.out.println("<" + name + ": " + state);
// try {
switch(state) {
case START: // before document element
// this is the document element, of which we only want the id, if given
handleReifier(topicmap, atts);
state = TOP;
break;
case TOP: // inside doc elem, outside topic
// this has to be the start element of a topic
topic = getTopicById(atts.getValue("", "id"));
TopicIF ttype = getType(uri, name);
if (ttype != null)
topic.addType(ttype);
state = TOPIC;
break;
case TOPIC:
// this has to be some property of the topic
if (TMXMLWriter.NS_TM.equals(uri) &&
("identifier".equals(name) || "locator".equals(name)))
state = IDENTIFIER;
else if (atts.getValue("", "role") != null) {
// it's an association, of some kind
// let's make association and role played by this topic
association = builder.makeAssociation(getType(uri, name));
TopicIF roletype = getTopicByAttRef(atts.getValue("", "role"));
builder.makeAssociationRole(association, roletype, topic);
scope = getScope(atts);
addScope(association);
handleReifier(association, atts);
if (atts.getValue("", "topicref") != null) {
// binary association
TopicIF other = getTopicByAttRef(atts.getValue("", "topicref"));
roletype = getTopicByAttRef(atts.getValue("", "otherrole"));
builder.makeAssociationRole(association, roletype, other);
}
// if unary or binary association: nothing more happens
// if n-ary association: child elements for roles appear
state = ASSOCIATION;
} else {
state = MAYBETOPICNAME;
scope = getScope(atts);
reifier = atts.getValue("", "reifier");
checkDatatype(atts);
type = getType(uri, name); // needed if this is a topic name
}
break;
case MAYBETOPICNAME:
// could be occurrence, could be topic name
if (TMXMLWriter.NS_TM.equals(uri) && "value".equals(name)) {
// ok, it was a topic name
state = BASENAME;
// we were collecting chars in case it was an occurrence; now dump
buffer.setLength(0);
}
break;
case TOPICNAME:
if (TMXMLWriter.NS_TM.equals(uri) && "variant".equals(name)) {
state = VARIANT;
scope = getScope(atts);
reifier = atts.getValue("", "reifier");
datatype = atts.getValue("", "datatype");
isuri = datatype != null && datatype.equals(TMXMLWriter.XSD_ANYURI);
}
break;
case ASSOCIATION:
// this must be the role element inside an association element
TopicIF roletype = getType(uri, name);
TopicIF other = getTopicByAttRef(atts.getValue("", "topicref"));
AssociationRoleIF role = builder.makeAssociationRole(association,
roletype, other);
handleReifier(role, atts);
state = ROLE;
break;
}
// } catch (Throwable e) {
// e.printStackTrace();
// throw new OntopiaRuntimeException(e);
// }
}
public void characters(char ch[], int start, int length) {
if (state == IDENTIFIER ||
state == BASENAME ||
state == VARIANT ||
state == MAYBETOPICNAME)
buffer.append(ch, start, length);
}
public void endElement(String uri, String name, String qName)
throws SAXException {
//System.out.println("</" + name + ": " + state);
try {
switch(state) {
case TOP:
state = START;
break;
case TOPIC:
state = TOP;
break;
case IDENTIFIER:
state = TOPIC;
LocatorIF loc = createLocator(buffer.toString());
buffer.setLength(0);
if ("identifier".equals(name))
registerSubjectIndicator(topic, loc);
else if ("locator".equals(name))
registerSubjectLocator(topic, loc);
break;
case BASENAME:
state = TOPICNAME;
basename = builder.makeTopicName(topic, type, buffer.toString());
addScope(basename);
handleReifier(basename, reifier);
reifier = null;
buffer.setLength(0);
break;
case VARIANT:
state = TOPICNAME;
VariantNameIF vn;
if (isuri) {
try {
vn = builder.makeVariantName(basename, new URILocator(buffer.toString()));
} catch (MalformedURLException e) {
throw new SAXException("Invalid URI for variant name", e);
}
} else {
vn = builder.makeVariantName(basename, buffer.toString());
}
addScope(vn);
handleReifier(vn, reifier);
reifier = null;
buffer.setLength(0);
break;
case TOPICNAME:
state = TOPIC;
break;
case MAYBETOPICNAME:
// it turned out to be an occurrence (because we're seeing the end of
// the element and haven't seen <value>)
state = TOPIC;
if (datatype == null)
datatype = TMXMLWriter.XSD_STRING;
OccurrenceIF occ = builder.makeOccurrence(topic,
getType(uri, name),
buffer.toString(),
createLocator(datatype));
buffer.setLength(0);
addScope(occ);
handleReifier(occ, reifier);
reifier = null;
break;
case ASSOCIATION:
state = TOPIC;
break;
case ROLE:
state = ASSOCIATION;
break;
}
} catch (Exception e) {
System.out.println("" + base + ": " + e);
throw new OntopiaRuntimeException(e);
}
}
public void startPrefixMapping(String prefix, String uri) {
nsprefixes.put(prefix, uri);
}
public void endPrefixMapping(String prefix) {
nsprefixes.remove(prefix);
}
private TopicIF getType(String uri, String name) throws SAXException {
if (uri == null || "".equals(uri))
return getTopicById(name);
if (TMXMLWriter.NS_TM.equals(uri) && "topic".equals(name))
return null; // element for typeless construct
try {
return getTopicBySubjectIdentifier(new URILocator(uri + name));
} catch (java.net.MalformedURLException e) {
throw new SAXException("Invalid URI: " + uri + name);
}
}
private Collection getScope(Attributes atts) {
String value = atts.getValue("", "scope");
if (value == null)
return Collections.EMPTY_SET;
String[] tokens = StringUtils.split(value);
Collection scope = new HashSet(tokens.length);
for (int ix = 0; ix < tokens.length; ix++)
scope.add(getTopicByAttRef(tokens[ix]));
return scope;
}
private void addScope(ScopedIF scoped) {
Iterator it = scope.iterator();
while (it.hasNext())
scoped.addTheme((TopicIF) it.next());
scope = null;
}
private void checkDatatype(Attributes atts) {
datatype = atts.getValue("", "datatype");
}
private TopicIF getTopicByAttRef(String attref) {
if (attref.indexOf(':') == -1)
return getTopicById(attref);
else
return getTopicByQName(attref);
}
private TopicIF getTopicById(String id) {
LocatorIF loc = createLocator('#' + id);
TopicIF topic = (TopicIF) topicmap.getObjectByItemIdentifier(loc);
if (topic == null) {
topic = builder.makeTopic();
registerSourceLocator(topic, id);
}
return topic;
}
private TopicIF getTopicBySubjectIdentifier(LocatorIF psi) {
TopicIF type = topicmap.getTopicBySubjectIdentifier(psi);
if (type == null) {
type = builder.makeTopic();
registerSubjectIndicator(type, psi);
}
return type;
}
private TopicIF getTopicByQName(String qname) {
int pos = qname.indexOf(':');
String prefix = qname.substring(0, pos);
String local = qname.substring(pos + 1);
if (!nsprefixes.containsKey(prefix))
throw new OntopiaRuntimeException("Undeclared namespace prefix " +
prefix + " in " + qname);
try {
LocatorIF psi = new URILocator(nsprefixes.get(prefix) + local);
return getTopicBySubjectIdentifier(psi);
} catch (java.net.MalformedURLException e) {
throw new OntopiaRuntimeException("Invalid namespace URI from qname " +
qname + ": " + prefix + local);
}
}
private void registerSubjectIndicator(TopicIF topic, LocatorIF psi) {
TopicIF other = topicmap.getTopicBySubjectIdentifier(psi);
if (other == null) {
topic.addSubjectIdentifier(psi);
return;
}
if (other != topic)
MergeUtils.mergeInto(topic, other);
}
// stolen from XTMContentHandler
protected void registerSourceLocator(TMObjectIF tmobject, String id) {
// No need to register source locator if id is null
if (id == null) return;
tmobject.addItemIdentifier(createLocator('#' + id));
}
protected void registerSubjectLocator(TopicIF topic, LocatorIF loc) {
TopicIF other = topicmap.getTopicBySubjectLocator(loc);
if (other == null) {
topic.addSubjectLocator(loc);
return;
}
if (other != topic)
MergeUtils.mergeInto(topic, other);
}
// stolen from XTMContentHandler
protected LocatorIF createLocator(String address) {
if (address.length() == 0)
return doc_address;
else
return doc_address.resolveAbsolute(address);
}
private void handleReifier(ReifiableIF reifiable, Attributes atts) {
handleReifier(reifiable, atts.getValue("", "reifier"));
}
private void handleReifier(ReifiableIF reifiable, String ref) {
if (ref == null) return;
LocatorIF base = doc_address;
TopicIF reifier = getTopicByAttRef(ref);
reify(reifiable, reifier);
}
private void reify(ReifiableIF reifiable, TopicIF reifier) {
reifiable.setReifier(reifier);
}
}
private InputSource getTMXMLSchema() throws IOException {
ClassLoader cl = Thread.currentThread().getContextClassLoader();
InputStream i = cl.getResourceAsStream("net/ontopia/topicmaps/xml/tmxml.rnc");
return new InputSource(i);
}
/**
* Sets additional properties for the TMXMLReader. Only accepts the property "validate", which
* corresponds to the {@link #setValidate(boolean)} method. Only accepts a boolean value.
* @param properties
*/
public void setAdditionalProperties(Map<String, Object> properties) {
Object value = properties.get(PROPERTY_VALIDATE);
if ((value != null) && (value instanceof Boolean)) {
setValidate((Boolean) value);
}
}
}