/* * #! * Ontopia Engine * #- * Copyright (C) 2001 - 2013 The Ontopia Project * #- * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * !# */ package net.ontopia.topicmaps.xml; import java.io.File; import java.io.FileOutputStream; import java.io.IOException; import java.io.OutputStreamWriter; import java.io.Writer; import java.util.ArrayList; import java.util.Collection; import java.util.Collections; import java.util.HashMap; import java.util.Iterator; import java.util.Map; import java.util.List; import java.util.Set; import net.ontopia.infoset.core.LocatorIF; import net.ontopia.topicmaps.core.AssociationIF; import net.ontopia.topicmaps.core.AssociationRoleIF; import net.ontopia.topicmaps.core.TopicNameIF; import net.ontopia.topicmaps.core.DataTypes; import net.ontopia.topicmaps.core.OccurrenceIF; import net.ontopia.topicmaps.core.ReifiableIF; import net.ontopia.topicmaps.core.ScopedIF; import net.ontopia.topicmaps.core.TopicIF; import net.ontopia.topicmaps.core.TopicMapIF; import net.ontopia.topicmaps.core.TopicMapWriterIF; import net.ontopia.topicmaps.core.VariantNameIF; import net.ontopia.utils.CompactHashSet; import net.ontopia.utils.OntopiaRuntimeException; import net.ontopia.utils.StringUtils; import net.ontopia.utils.ObjectUtils; import net.ontopia.xml.PrettyPrinter; import org.xml.sax.ContentHandler; import org.xml.sax.SAXException; import org.xml.sax.helpers.AttributesImpl; /** * PUBLIC: A writer exporting topic maps (or fragments) to the TM/XML * syntax. * * @since 3.1 */ public class TMXMLWriter extends AbstractTopicMapExporter implements TopicMapWriterIF { public static final String PROPERTY_PREFIXES = "prefixes"; public static final String PROPERTY_DOCUMENT_ELEMENT = "documentElement"; protected static final AttributesImpl EMPTY_ATTR_LIST = new AttributesImpl(); protected static final String EMPTY_NAMESPACE = ""; protected static final String EMPTY_LOCALNAME = ""; private ContentHandler out; // If writer is instantiated, the void close() method closes it. private Writer writer = null; private String docelem = "topicmap"; private AttributesImpl atts = new AttributesImpl(); private Map nsuris; // nsuri -> prefix private Map prefixes; // inverse private Set exported; // contains IDs of exported associations private Set unassigned; // namespace URIs not yet given a prefix public static final String NS_ISO = "http://psi.topicmaps.org/iso13250/model/"; public static final String NS_XTM = "http://www.topicmaps.org/xtm/1.0/core.xtm#"; public static final String NS_TM = "http://psi.ontopia.net/xml/tm-xml/"; public static final String XSD_ANYURI = "http://www.w3.org/2001/XMLSchema#anyURI"; public static final String XSD_STRING = "http://www.w3.org/2001/XMLSchema#string"; // --- Constructors /** * PUBLIC: Creates a writer writing to the given file in the utf-8 * character encoding. */ public TMXMLWriter(String filename) throws IOException { this(filename, "utf-8"); } /** * PUBLIC: Creates a writer writing to the given file in the given * character encoding. */ public TMXMLWriter(String filename, String encoding) throws IOException { writer = new OutputStreamWriter(new FileOutputStream(filename), encoding); this.out = makePrinter(writer, encoding); init(); } /** * PUBLIC: Creates a writer writing to the given writer in the utf-8 * character encoding. */ public TMXMLWriter(Writer out) throws IOException { this(out, "utf-8"); } /** * PUBLIC: Creates a writer writing to the given writer in the given * character encoding. * @since 3.2 */ public TMXMLWriter(Writer out, String encoding) throws IOException { this.out = makePrinter(out, encoding); init(); } /** * PUBLIC: Creates a writer writing to the given file in UTF-8. * @since 3.2 */ public TMXMLWriter(File out) throws IOException { String encoding = "utf-8"; writer = new OutputStreamWriter(new FileOutputStream(out), encoding); this.out = makePrinter(writer, encoding); init(); } /** * INTERNAL: Creates a writer writing to the given ContentHandler. */ public TMXMLWriter(ContentHandler out) { this.out = out; init(); } private void init() { this.filter = null; this.exported = new CompactHashSet(); this.nsuris = new HashMap(); this.prefixes = new HashMap(); this.unassigned = new CompactHashSet(); addPrefix("iso", NS_ISO); addPrefix("xtm", NS_XTM); addPrefix("tm", NS_TM); } /** * PUBLIC: Closes the Writer created for internal use. * * Call this method when you have finished using the fragment * exporter interface on the topic map writer. Don't call this * method if using the TopicMapWriterIF interface. */ public void close() throws IOException { if (writer != null) writer.close(); } // --- Accessors /** * PUBLIC: Returns the element type name of the document element. */ public String getDocumentElement() { return docelem; } /** * PUBLIC: Sets the document element type name to use. */ public void setDocumentElement(String docelem) { this.docelem = docelem; } // --- TopicMapWriterIF implementation /** * PUBLIC: Writes the given topic map to the underlying writer. * @exception IOException Thrown if writing the topic map fails. * @param topicmap The topic map to be exported. */ public void write(TopicMapIF topicmap) throws IOException { try { gatherPrefixes(topicmap.getTopics()); startTopicMap(topicmap); writeTopics(filterCollection(topicmap.getTopics())); endTopicMap(); close(); } catch (SAXException e) { throw new OntopiaRuntimeException(e); } } // --- Fragment-exporter interface /** * PUBLIC: Writes the start tag of the document element (to be used * in fragment exporting only). */ public void startTopicMap(TopicMapIF topicmap) throws SAXException { assignRemainingNamespaces(); Iterator it = nsuris.keySet().iterator(); while (it.hasNext()) { String nsuri = (String) it.next(); String prefix = (String) nsuris.get(nsuri); atts.addAttribute(EMPTY_NAMESPACE, EMPTY_LOCALNAME, "xmlns:" + prefix, "CDATA", nsuri); } if (topicmap != null) // topic map can be null in some situations (particularly when using tmrap) addReifierAttribute(topicmap, atts); out.startDocument(); out.startElement(EMPTY_NAMESPACE, EMPTY_LOCALNAME, docelem, atts); atts.clear(); } /** * PUBLIC: Gets the namespace prefixes to be used (to be used in * fragment exporting mode only). Must be called before * startTopicMap. */ public void gatherPrefixes(Collection topics) { Iterator it = topics.iterator(); while (it.hasNext()) gatherPrefixes((TopicIF) it.next()); } /** * PUBLIC: Writes a set of topics (fragment exporting mode only). */ public void writeTopics(Collection topics) throws SAXException { Iterator it = topics.iterator(); while (it.hasNext()) writeTopic((TopicIF) it.next()); } /** * PUBLIC: Writes a single topic (fragment exporting mode only). */ public void writeTopic(TopicIF topic) throws SAXException { final String TOPIC = getElementTypeName(NS_TM + "topic"); final String BASENAME = getElementTypeName(NS_ISO + "topic-name"); final String OCCURRENCE = getElementTypeName(NS_TM + "occurrence"); final String ASSOCIATION = getElementTypeName(NS_TM + "association"); final String ROLE = "role"; String elem = TOPIC; // special name meaning: no topic type Iterator typeit = filterCollection(topic.getTypes()).iterator(); if (typeit.hasNext()) elem = getElementTypeName((TopicIF) typeit.next(), TOPIC); atts.addAttribute(EMPTY_NAMESPACE, EMPTY_LOCALNAME, "id", "CDATA", getTopicId(topic)); out.startElement(EMPTY_NAMESPACE, EMPTY_LOCALNAME, elem, atts); atts.clear(); // indicators Iterator it = filterCollection(topic.getSubjectIdentifiers()).iterator(); while (it.hasNext()) { LocatorIF loc = (LocatorIF) it.next(); out.startElement(EMPTY_NAMESPACE, EMPTY_LOCALNAME, "tm:identifier", EMPTY_ATTR_LIST); writeText(getSubjectIndicatorRef(topic, loc)); out.endElement(EMPTY_NAMESPACE, EMPTY_LOCALNAME, "tm:identifier"); } // subject locator it = filterCollection(topic.getSubjectLocators()).iterator(); while (it.hasNext()) { LocatorIF loc = (LocatorIF) it.next(); out.startElement(EMPTY_NAMESPACE, EMPTY_LOCALNAME, "tm:locator", EMPTY_ATTR_LIST); writeText(loc.getExternalForm()); out.endElement(EMPTY_NAMESPACE, EMPTY_LOCALNAME, "tm:locator"); } // topic name it = filterCollection(topic.getTopicNames()).iterator(); while (it.hasNext()) { TopicNameIF bn = (TopicNameIF) it.next(); String bnelem = getElementTypeName(bn.getType(), BASENAME); String scope = getScope(bn); if (scope != null) atts.addAttribute(EMPTY_NAMESPACE, EMPTY_LOCALNAME, "scope", "CDATA", scope); addReifierAttribute(bn, atts); out.startElement(EMPTY_NAMESPACE, EMPTY_LOCALNAME, bnelem, atts); out.startElement(EMPTY_NAMESPACE, EMPTY_LOCALNAME, "tm:value", EMPTY_ATTR_LIST); writeText(bn.getValue()); out.endElement(EMPTY_NAMESPACE, EMPTY_LOCALNAME, "tm:value"); // variant names Iterator it2 = filterCollection(bn.getVariants()).iterator(); while (it2.hasNext()) { VariantNameIF vn = (VariantNameIF) it2.next(); atts.clear(); scope = getScope(vn); if (scope != null) atts.addAttribute(EMPTY_NAMESPACE, EMPTY_LOCALNAME, "scope", "CDATA", scope); if (ObjectUtils.different(vn.getDataType(), DataTypes.TYPE_STRING)) atts.addAttribute(EMPTY_NAMESPACE, EMPTY_LOCALNAME, "datatype", "CDATA", vn.getDataType().getAddress()); addReifierAttribute(vn, atts); out.startElement(EMPTY_NAMESPACE, EMPTY_LOCALNAME, "tm:variant", atts); writeText(vn.getValue()); out.endElement(EMPTY_NAMESPACE, EMPTY_LOCALNAME, "tm:variant"); } out.endElement(EMPTY_NAMESPACE, EMPTY_LOCALNAME, bnelem); atts.clear(); } // occurrence it = filterCollection(topic.getOccurrences()).iterator(); while (it.hasNext()) { OccurrenceIF occ = (OccurrenceIF) it.next(); String occelem = getElementTypeName(occ.getType(), OCCURRENCE); String scope = getScope(occ); if (scope != null && filterOk(scope)) atts.addAttribute(EMPTY_NAMESPACE, EMPTY_LOCALNAME, "scope", "CDATA", scope); if (ObjectUtils.different(occ.getDataType(), DataTypes.TYPE_STRING)) atts.addAttribute(EMPTY_NAMESPACE, EMPTY_LOCALNAME, "datatype", "CDATA", occ.getDataType().getAddress()); addReifierAttribute(occ, atts); out.startElement(EMPTY_NAMESPACE, EMPTY_LOCALNAME, occelem, atts); writeText(occ.getValue()); out.endElement(EMPTY_NAMESPACE, EMPTY_LOCALNAME, occelem); atts.clear(); } // remaining types // (if getTypes() returned more types they are still in typeit; // if so, we can output them as associations here) while (typeit.hasNext()) { TopicIF type = (TopicIF) typeit.next(); atts.addAttribute(EMPTY_NAMESPACE, EMPTY_LOCALNAME, "role", "CDATA", "xtm:instance"); atts.addAttribute(EMPTY_NAMESPACE, EMPTY_LOCALNAME, "otherrole", "CDATA", "xtm:class"); atts.addAttribute(EMPTY_NAMESPACE, EMPTY_LOCALNAME, "topicref", "CDATA", getTopicId(type)); out.startElement(EMPTY_NAMESPACE, EMPTY_LOCALNAME, "xtm:class-instance", atts); out.endElement(EMPTY_NAMESPACE, EMPTY_LOCALNAME, "xtm:class-instance"); atts.clear(); } // association it = topic.getRoles().iterator(); while (it.hasNext()) { AssociationRoleIF role = (AssociationRoleIF) it.next(); AssociationIF assoc = role.getAssociation(); if (!filterOk(assoc)) continue; // Do not output filtered associations. if (exported.contains(assoc.getObjectId())) continue; // output each association only once exported.add(assoc.getObjectId()); String assocelem = getElementTypeName(assoc.getType(), ASSOCIATION); String scope = getScope(assoc); if (scope != null) atts.addAttribute(EMPTY_NAMESPACE, EMPTY_LOCALNAME, "scope", "CDATA", scope); atts.addAttribute(EMPTY_NAMESPACE, EMPTY_LOCALNAME, "role", "CDATA", getElementTypeName(role.getType(), ROLE)); addReifierAttribute(assoc, atts); int arity = assoc.getRoles().size(); if (arity == 1 || arity == 2) { // unary or binary AssociationRoleIF otherrole = null; Iterator it2 = assoc.getRoles().iterator(); while (it2.hasNext()) { AssociationRoleIF r = (AssociationRoleIF) it2.next(); if (r != role) { otherrole = r; break; } } if (otherrole != null && otherrole.getPlayer() != null) { // if unary we skip spec of the other role // also skip if player is null atts.addAttribute(EMPTY_NAMESPACE, EMPTY_LOCALNAME, "topicref", "CDATA", getTopicId(otherrole.getPlayer())); atts.addAttribute(EMPTY_NAMESPACE, EMPTY_LOCALNAME, "otherrole", "CDATA", getElementTypeName(otherrole.getType(), ROLE)); } out.startElement(EMPTY_NAMESPACE, EMPTY_LOCALNAME, assocelem, atts); out.endElement(EMPTY_NAMESPACE, EMPTY_LOCALNAME, assocelem); } else { // n-ary out.startElement(EMPTY_NAMESPACE, EMPTY_LOCALNAME, assocelem, atts); Iterator it2 = assoc.getRoles().iterator(); while (it2.hasNext()) { AssociationRoleIF r = (AssociationRoleIF) it2.next(); if (r == role) continue; // this is our role, which is already covered atts.clear(); atts.addAttribute(EMPTY_NAMESPACE, EMPTY_LOCALNAME, "topicref", "CDATA", getTopicId(r.getPlayer())); String roleelem = getElementTypeName(r.getType(), ROLE); out.startElement(EMPTY_NAMESPACE, EMPTY_LOCALNAME, roleelem, atts); out.endElement(EMPTY_NAMESPACE, EMPTY_LOCALNAME, roleelem); } out.endElement(EMPTY_NAMESPACE, EMPTY_LOCALNAME, assocelem); } atts.clear(); } out.endElement(EMPTY_NAMESPACE, EMPTY_LOCALNAME, elem); } /** * PUBLIC: Write the end tag of the document element (fragment mode * only). */ public void endTopicMap() throws SAXException { out.endElement(EMPTY_NAMESPACE, EMPTY_LOCALNAME, docelem); out.endDocument(); } /** * PUBLIC: Gets the namespace prefixes to be used (to be used in * fragment exporting mode only). Must be called before * startTopicMap. */ public void gatherPrefixes(TopicIF topic) { findPrefixFor(topic.getTypes()); // base names Iterator it = topic.getTopicNames().iterator(); while (it.hasNext()) { TopicNameIF bn = (TopicNameIF) it.next(); findPrefixFor(bn.getType()); findPrefixFor(bn.getScope()); Iterator it2 = bn.getVariants().iterator(); while (it2.hasNext()) { VariantNameIF vn = (VariantNameIF) it2.next(); findPrefixFor(vn.getScope()); } } // occurrences it = topic.getOccurrences().iterator(); while (it.hasNext()) { OccurrenceIF occ = (OccurrenceIF) it.next(); findPrefixFor(occ.getType()); findPrefixFor(occ.getScope()); } // associations it = topic.getRoles().iterator(); while (it.hasNext()) { AssociationRoleIF role = (AssociationRoleIF) it.next(); AssociationIF assoc = role.getAssociation(); findPrefixFor(assoc.getType()); findPrefixFor(assoc.getScope()); Iterator it2 = assoc.getRoles().iterator(); while (it2.hasNext()) { AssociationRoleIF role2 = (AssociationRoleIF) it2.next(); findPrefixFor(role2.getType()); } } } /** * PRIVATE: Returns the namespace URI to prefix mapping maintained * internally by the writer. Never, ever call this method. It exists * only for testing purposes. */ public Map getNamespaceURIMapping() { return nsuris; } // --- Internal methods private String getScope(ScopedIF scoped) { Iterator it = filterCollection(scoped.getScope()).iterator(); if (!it.hasNext()) return null; StringBuilder buf = new StringBuilder(); while (it.hasNext()) { TopicIF theme = (TopicIF) it.next(); buf.append(getElementTypeName(theme, null) + " "); } return buf.substring(0, buf.length() - 1); // lose last space } private void findPrefixFor(Collection topics) { Iterator it = topics.iterator(); while (it.hasNext()) findPrefixFor((TopicIF) it.next()); } private void findPrefixFor(TopicIF type) { getElementTypeName(type, ""); } private void writeText(String text) throws SAXException { char[] ch = text.toCharArray(); out.characters(ch, 0, ch.length); } private String getElementTypeName(TopicIF topic, String def) { if (topic == null) return def; Collection subjids = topic.getSubjectIdentifiers(); if (!subjids.isEmpty()) { LocatorIF subjid = (LocatorIF) subjids.iterator().next(); return getElementTypeName(subjid.getAddress()); } return getTopicId(topic); } private String getElementTypeName(String psi) { int slash = psi.lastIndexOf('/'); int hash = psi.lastIndexOf('#'); int pos = Math.max(slash, hash); String localname = psi.substring(pos + 1); // FIXME: could crash String prefix = getPrefix(psi.substring(0, pos + 1)); return prefix + ":" + localname; } private String getPrefix(String baseurl) { String prefix = (String) nsuris.get(baseurl); if (prefix == null && !unassigned.contains(baseurl)) { // try to make nice, friendly prefix out of URI int first = baseurl.lastIndexOf('/'); int second = baseurl.lastIndexOf('/', first - 1); // FIXME: could crash? if (first != -1 && second != -1) { String candidate = baseurl.substring(second + 1, first); candidate = StringUtils.normalizeId(candidate); if (candidate != null && candidate.length() <= 1) candidate = null; // could be too short after cutting if (candidate != null && !prefixes.containsKey(candidate)) prefix = candidate; } if (prefix == null) unassigned.add(baseurl); // will assign preXX prefix later else addPrefix(prefix, baseurl); } return prefix; } private void assignRemainingNamespaces() { // We fix bug #1933 by sorting the namespace URIs before assigning // prefixes to them. List uris = new ArrayList(unassigned); unassigned = null; Collections.sort(uris); for (int ix = 0; ix < uris.size(); ix++) addPrefix("pre" + nsuris.size(), (String) uris.get(ix)); } private String getTopicId(TopicIF topic) { LocatorIF baseaddr = topic.getTopicMap().getStore().getBaseAddress(); if (baseaddr != null) { String base = baseaddr.getAddress(); Iterator it = topic.getItemIdentifiers().iterator(); while (it.hasNext()) { String extractedId = extractRelativeId(base, (LocatorIF) it.next()); if (extractedId != null) return extractedId; } } return "id" + topic.getObjectId(); } private String extractRelativeId(String base, LocatorIF srcloc) { String addr = srcloc.getAddress(); if (addr.startsWith(base) && addr.length() > base.length()) { String id = addr.substring(base.length() + 1); if (isValidXMLId(id)) return id; } return null; } private PrettyPrinter makePrinter(Writer out, String encoding) throws IOException { return new PrettyPrinter(out, encoding); } public void addPrefix(String prefix, String nsuri) { prefixes.put(prefix, nsuri); nsuris.put(nsuri, prefix); } // see XTMTopicMapExporter protected String getSubjectIndicatorRef(TopicIF topic, LocatorIF indicator) { TopicMapIF topicmap = topic.getTopicMap(); LocatorIF baseloc = topicmap.getStore().getBaseAddress(); String address = indicator.getExternalForm(); if (baseloc != null) { String base = baseloc.getExternalForm(); if (base != null && address.startsWith(base) && address.indexOf('#') != -1) { String id = address.substring(address.indexOf('#')); if (isValidXMLId(id.substring(1))) return id; } } return address; } /** * Add reifier attribute if object has a reifier. */ private void addReifierAttribute(ReifiableIF tmobject, AttributesImpl atts) { TopicIF reifier = tmobject.getReifier(); if (reifier != null) { if (filter == null || filter.ok(reifier)) { String reifierAttribute = getTopicId(reifier); atts.addAttribute(EMPTY_NAMESPACE, EMPTY_LOCALNAME, "reifier", "CDATA", reifierAttribute); } } } /** * Sets additional properties for the TMXMLWriter. Accepted properties: * <ul><li>'documentElement' (String), corresponds to * {@link #setDocumentElement(java.lang.String)}</li> * <li>'prefixes' (Map), each key-value pair is passed to * {@link #addPrefix(java.lang.String, java.lang.String)} as Strings.</li> * </ul> * @param properties */ public void setAdditionalProperties(Map<String, Object> properties) { Object value = properties.get(PROPERTY_DOCUMENT_ELEMENT); if ((value != null) && (value instanceof String)) { setDocumentElement((String) value); } value = properties.get(PROPERTY_PREFIXES); if ((value != null) && (value instanceof Map)) { Map _prefixes = (Map) value; for (Object k : _prefixes.entrySet()) { addPrefix(k.toString(), _prefixes.get(k).toString()); } } } }