/* * #! * Ontopia Classify * #- * Copyright (C) 2001 - 2013 The Ontopia Project * #- * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * !# */ package net.ontopia.topicmaps.classify; import java.io.BufferedInputStream; import java.io.ByteArrayInputStream; import java.io.IOException; import java.util.Arrays; import java.util.Collection; import java.util.HashSet; import net.ontopia.utils.OntopiaRuntimeException; import net.ontopia.xml.DefaultXMLReaderFactory; import org.xml.sax.Attributes; import org.xml.sax.ContentHandler; import org.xml.sax.InputSource; import org.xml.sax.SAXException; import org.xml.sax.SAXParseException; import org.xml.sax.XMLReader; import org.xml.sax.helpers.DefaultHandler; /** * INTERNAL: */ public class XMLFormatModule implements FormatModuleIF { protected Collection<String> skipElements; protected String[] extensions = new String[] {".xml"}; protected byte[] magicBytes = FormatModule.getBytes("<?xml"); public XMLFormatModule() { setSkipElements(Arrays.asList(new String[] {"sgml.block", "verbatim", "example", "sgml", "author", "bibliog", "web", "Authorinfo", "AuthorInfo", "AUTHORINFO", "code.block", "code.line", "Pre", "PRE", "programlisting", "acknowl", "code"})); } public void setSkipElements(Collection<String> skipElements) { this.skipElements = new HashSet<String>(skipElements); } protected XMLReader createXMLReader() throws SAXException { return new DefaultXMLReaderFactory().createXMLReader(); } protected ContentHandler getContentHandler(TextHandlerIF handler) { return new XMLHandler(handler); } public boolean matchesContent(ClassifiableContentIF cc) { return FormatModule.startsWith(cc.getContent(), magicBytes); } public boolean matchesIdentifier(ClassifiableContentIF cc) { return FormatModule.matchesExtension(cc.getIdentifier(), extensions); } public void readContent(ClassifiableContentIF cc, TextHandlerIF handler) { // create new parser object XMLReader parser; try { parser = createXMLReader(); // create content handler parser.setContentHandler(getContentHandler(handler)); // parse input source parser.parse(new InputSource(new BufferedInputStream(new ByteArrayInputStream(cc.getContent())))); } catch (SAXParseException e) { throw new OntopiaRuntimeException("XML parsing problem: " + e.toString() + " at: "+ e.getSystemId() + ":" + e.getLineNumber() + ":" + e.getColumnNumber(), e); } catch (SAXException e) { if (e.getException() instanceof IOException) throw new OntopiaRuntimeException((IOException) e.getException()); throw new OntopiaRuntimeException(e); } catch (Exception e) { throw new OntopiaRuntimeException(e); } } private class XMLHandler extends DefaultHandler { private TextHandlerIF thandler; private int skipLevel; private XMLHandler(TextHandlerIF thandler) { this.thandler = thandler; } public void startElement(String nsuri, String lname, String qname, Attributes attrs) throws SAXException { if (skipElements != null && skipElements.contains(lname)) { skipLevel++; } else if (skipLevel == 0) { thandler.startRegion(lname); } } public void characters (char[] ch, int start, int length) { if (skipLevel == 0) thandler.text(ch, start, length); } public void endElement(String nsuri, String lname, String qname) throws SAXException { if (skipElements != null && skipElements.contains(lname)) { skipLevel--; } else if (skipLevel == 0) { thandler.endRegion(); } } } }