package org.basex.build.xml;
import static org.basex.core.Text.*;
import java.io.BufferedInputStream;
import java.io.ByteArrayInputStream;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.Reader;
import javax.xml.parsers.SAXParserFactory;
import javax.xml.transform.sax.SAXSource;
import org.basex.build.SingleParser;
import org.basex.core.ProgressException;
import org.basex.core.Prop;
import org.basex.io.IO;
import org.basex.io.IOContent;
import org.basex.io.IOFile;
import org.basex.util.Util;
import org.xml.sax.InputSource;
import org.xml.sax.SAXParseException;
import org.xml.sax.XMLReader;
/**
* This class parses an XML document with Java's internal SAX parser. Note that
* not all files cannot be parsed with the default parser; for example, the
* DBLP documents contain too many entities and cause an out of memory error.
* The internal {@link XMLParser} can be used as alternative.
*
* @author BaseX Team 2005-12, BSD License
* @author Christian Gruen
*/
public final class SAXWrapper extends SingleParser {
/** External DTD parsing. */
private static final String EXTDTD =
"http://apache.org/xml/features/nonvalidating/load-external-dtd";
/** Lexical handler. */
private static final String LEXHANDLER =
"http://xml.org/sax/properties/lexical-handler";
/** File counter. */
long counter;
/** Current line. */
int line = 1;
/** SAX handler reference. */
private SAXHandler saxh;
/** Optional XML reader. */
private final SAXSource saxs;
/** File length. */
private long length;
/** Properties. */
private final Prop prop;
/**
* Constructor.
* @param source sax source
* @param pr Properties
*/
public SAXWrapper(final SAXSource source, final Prop pr) {
this(source, "", "", pr);
}
/**
* Constructor.
* @param source sax source
* @param name name
* @param path target path
* @param pr Properties
*/
public SAXWrapper(final SAXSource source, final String name,
final String path, final Prop pr) {
super(IO.get(source.getSystemId()), path);
if(!name.isEmpty()) src.name(name);
saxs = source;
prop = pr;
}
/**
* Constructor.
* @param source sax source
* @param path target path
* @param pr Properties
*/
public SAXWrapper(final IO source, final String path, final Prop pr) {
super(source, path);
saxs = new SAXSource(source.inputSource());
prop = pr;
}
@Override
public void parse() throws IOException {
final InputSource is = wrap(saxs.getInputSource());
final String in = saxs.getSystemId() == null ? DOTS : saxs.getSystemId();
try {
XMLReader r = saxs.getXMLReader();
if(r == null) {
final SAXParserFactory f = SAXParserFactory.newInstance();
f.setFeature(EXTDTD, prop.is(Prop.DTD));
f.setFeature("http://xml.org/sax/features/use-entity-resolver2", false);
f.setNamespaceAware(true);
f.setValidating(false);
f.setXIncludeAware(true);
r = f.newSAXParser().getXMLReader();
}
saxh = new SAXHandler(builder);
final String cat = prop.get(Prop.CATFILE);
if(!cat.isEmpty()) CatalogWrapper.set(r, cat);
r.setDTDHandler(saxh);
r.setContentHandler(saxh);
r.setProperty(LEXHANDLER, saxh);
r.setErrorHandler(saxh);
if(is != null) r.parse(is);
else r.parse(saxs.getSystemId());
} catch(final SAXParseException ex) {
final String msg = Util.info(SCANPOS_X_X, in, ex.getLineNumber(),
ex.getColumnNumber()) + COLS + ex.getMessage();
final IOException ioe = new IOException(msg);
ioe.setStackTrace(ex.getStackTrace());
throw ioe;
} catch(final ProgressException ex) {
throw ex;
} catch(final Exception ex) {
// occurs, e.g. if document encoding is invalid:
// prefix message with source id
String msg = ex.getMessage();
if(in != null) msg = '"' + in + '"' + COLS + msg;
// wrap and return original message
final IOException ioe = new IOException(msg);
ioe.setStackTrace(ex.getStackTrace());
throw ioe;
} finally {
if(is == null) return;
try {
final Reader r = is.getCharacterStream();
if(r != null) r.close();
final InputStream ist = is.getByteStream();
if(ist != null) ist.close();
} catch(final IOException ex) {
Util.debug(ex);
}
}
}
/**
* Wraps the input source with a stream which counts the number of read bytes
* and parsed lines.
* @param is input source
* @return resulting stream
* @throws IOException I/O exception
*/
private InputSource wrap(final InputSource is) throws IOException {
if(is == null) return is;
// choose input stream
final InputStream in;
if(is.getByteStream() != null) {
in = is.getByteStream();
} else if(is.getSystemId() == null || is.getSystemId().isEmpty()) {
return is;
} else if(src instanceof IOFile) {
in = new FileInputStream(src.path());
} else if(src instanceof IOContent) {
in = new ByteArrayInputStream(src.read());
} else {
return is;
}
// retrieve/estimate number of bytes to be read
length = src.length();
if(length <= 0) length = in.available();
// create wrapper
final InputSource tmp = new InputSource(new InputStream() {
final InputStream buffer = in instanceof ByteArrayInputStream ? in :
new BufferedInputStream(in);
@Override
public int read() throws IOException {
final int i = buffer.read();
if(i == '\n') ++line;
++counter;
return i;
}
@Override
public void close() throws IOException {
buffer.close();
}
});
saxs.setInputSource(tmp);
saxs.setSystemId(is.getSystemId());
return tmp;
}
@Override
public String det() {
return length == 0 ? super.det() : Util.info(SCANPOS_X_X, src.name(), line);
}
@Override
public double prog() {
return length == 0 ? saxh == null ? 0 : saxh.nodes / 3000000d % 1
: (double) counter / length;
}
}