package org.exist.fluent; import java.io.*; import java.net.*; import java.nio.*; import java.nio.charset.*; import java.util.*; import org.xml.sax.InputSource; /** * A source of data to be loaded into the database, distinguishing between XML documents * and other (binary) documents. While you can load XML documents as binary without * problem, but will not be able to query them or otherwise access their structure in the * database. * * @author <a href="mailto:piotr@ideanest.com">Piotr Kaminski</a> */ public abstract class Source { private final String oldName; protected String encoding; private Source(String oldName) { this.oldName = oldName; } /** * Set an encoding that indicates how the supplied bytes should be converted to characters * or vice-versa. Whether and how the encoding is used depends on the source. If the * encoding is needed but has not been set, the system will make a best guess or default * to UTF-8 as appropriate. * * @param characterEncoding the character encoding to use when dealing with this source * @return this source */ public Source encoding(String characterEncoding) { if (encoding != null) throw new IllegalStateException("encoding already set"); encoding = characterEncoding; return this; } final void applyOldName(Name name) { name.setOldName(oldName); } /** * A source of XML data to be loaded into the database. You should acquire instances by * using the static methods in the {@link Source} class. */ public static abstract class XML extends Source { private XML() {super(null);} private XML(String oldName) {super(oldName);} final InputSource toInputSource() throws IOException { InputSource source = createInputSource(); if (encoding != null) source.setEncoding(encoding); return source; } abstract InputSource createInputSource() throws IOException; @Override public String toString() {return "xml ";} } /** * A source of binary data to be loaded into the database. You should acquire instances by * using the static methods in the {@link Source} class. */ public static abstract class Blob extends Source { protected byte[] contents; protected int offset, length; private Blob() {super(null);} private Blob(String oldName) {super(oldName);} protected InputStream toInputStream() throws IOException { if (contents == null) createBytes(); assert contents != null; return new ByteArrayInputStream(contents, offset, length); } protected int getLength() { return length; } protected void createBytes() throws IOException {} @Override public String toString() {return "blob ";} protected void encode(CharBuffer buf) throws CharacterCodingException { Charset charset = Charset.forName(encoding == null ? "UTF-8" : encoding); ByteBuffer bb = charset.newEncoder().encode(buf); contents = bb.array(); offset = bb.arrayOffset(); length = bb.limit(); } } /** * Create a source that reads an XML document from an external file. * * @param file the XML file * @return a source that reads XML from the file */ public static Source.XML xml(final File file) { final String uri = file.toURI().toASCIIString(); return new XML(file.getName()) { @Override InputSource createInputSource() throws IOException { InputSource src = new InputSource(uri); if (encoding != null) src.setEncoding(encoding); return src; } @Override public String toString() { return super.toString() + "file '" + file.getPath() + "'"; } }; } /** * Create a source that reads a binary document from an external file. * * @param file the binary file * @return a source that reads binary from the file */ public static Source.Blob blob(final File file) { return new Blob(file.getName()) { @Override protected InputStream toInputStream() throws IOException { long fileLength = file.length(); if (fileLength > Integer.MAX_VALUE) throw new IOException("file too large"); length = (int) fileLength; return new BufferedInputStream(new FileInputStream(file)); } @Override public String toString() { return super.toString() + "file '" + file.getPath() + "'"; } }; } /** * Create a source of XML data that reads from the given input stream. Note that the * contents of the stream will need to be read <em>twice</em>, so if the stream is transient * (i.e., its contents are not available for random access, such as for a socket input stream) its * contents will be automatically saved in memory. This could be very inefficient if the * document being streamed is large. * * @param stream the input stream to read the XML from * @return a source that reads from the given input stream */ public static Source.XML xml(final InputStream stream) { return new Source.XML() { private InputStream markedStream; @Override InputSource createInputSource() throws IOException { if (markedStream == null) { if (stream.markSupported()) { markedStream = stream; } else { // TODO: if stream size exceeds some threshold, save contents to a temporary file instead markedStream = new ByteArrayInputStream(readInputStream(stream, null)); } markedStream.mark(Integer.MAX_VALUE); } markedStream.reset(); return new InputSource(markedStream); } @Override public String toString() { return super.toString() + "input stream" + (markedStream == stream ? "": " (cached)"); } }; } /** * Create a source of binary data that reads from the given input stream. At this time, * eXist doesn't support streaming into the database, so the contents of the stream will * be read entirely into memory before being persisted. * * @param stream the binary stream to read from * @return a source that reads from the given input stream */ public static Source.Blob blob(final InputStream stream) { return new Source.Blob() { @Override protected void createBytes() throws IOException { contents = readInputStream(stream, null); length = contents.length; } @Override public String toString() { return super.toString() + "input stream"; } }; } /** * Create a source of XML data that reads from the given reader. Note that the * contents of the reader will need to be read <em>twice</em>, so its * contents will be automatically saved in memory. This could be very inefficient if the * document being streamed is large. * * @param reader the reader to read the XML from * @return a source that reads from the given reader */ public static Source.XML xml(final Reader reader) { return new Source.XML() { private char[] contents; @Override InputSource createInputSource() throws IOException { if (contents == null) contents = readReader(reader, null); return new InputSource(new CharArrayReader(contents)); } @Override public String toString() { return super.toString() + "reader"; } }; } /** * Create a source of binary data that reads from the given reader. The characters * will be converted into bytes by using the encoding specified for this source, or UTF-8 * by default. At this time, eXist doesn't support streaming into the database, so the * contents of the reader will be read entirely into memory before being persisted. * * @param reader the reader to read from * @return a source that reads from the given reader */ public static Source.Blob blob(final Reader reader) { return new Source.Blob() { @Override protected void createBytes() throws IOException { encode(CharBuffer.wrap(readReader(reader, null))); } @Override public String toString() { return super.toString() + "reader"; } }; } /** * Create a source of XML data from the given byte array. If this source specifies an * encoding, it will be used to decode the characters, otherwise the encoding will be * guessed using standard XML parsing techniques. * * @param bytes the source bytes * @return a source that reads XML from the given byte array */ public static Source.XML xml(final byte[] bytes) { return new Source.XML() { @Override InputSource createInputSource() throws IOException { return new InputSource(new ByteArrayInputStream(bytes)); } @Override public String toString() { return super.toString() + "byte array [" + bytes.length + "]"; } }; } /** * Create a source of binary data from the given byte array. The bytes will be stored in * the database verbatim. * * @param bytes the source bytes * @return a source that reads binary data from the given byte array */ public static Source.Blob blob(final byte[] bytes) { return new Source.Blob() { { contents = bytes; length = bytes.length; } @Override public String toString() { return super.toString() + "byte array [" + bytes.length + "]"; } }; } /** * Create a source of XML data from the given string literal. The string should contain * an XML document, not a filename or URI. * * @param literal the contents of an XML document * @return a source that reads XML from the given literal string */ public static Source.XML xml(final String literal) { return new Source.XML() { @Override InputSource createInputSource() throws IOException { return new InputSource(new StringReader(literal)); } @Override public String toString() { return super.toString() + "literal string:\n" + literal; } }; } /** * Create a source of binary data from the given string literal. The string should contain * the actual data, not a filename or URI. The string's characters will be converted into * bytes by using the encoding specified for this source, or UTF-8 by default. * * @param literal the contents of the document * @return a source that reads binary data from the given literal string */ public static Source.Blob blob(final String literal) { return new Source.Blob() { @Override protected void createBytes() throws IOException { encode(CharBuffer.wrap(literal)); } @Override public String toString() { return super.toString() + "literal string"; } }; } /** * Create a source of XML that reads from the given URL. Note that the URL's contents * will have to be retrieved twice, so if the connection is slow or expensive it might be * worthwhile to cache them and use a different (local) source constructor. * * @param url the URL to read XML from * @return a source that reads XML from the given URL * @throws URISyntaxException if the URL syntax is not strictly spec-compliant */ public static Source.XML xml(URL url) throws URISyntaxException { final String uri = url.toURI().toASCIIString(); return new Source.XML(urlToFilename(url)) { @Override InputSource createInputSource() throws IOException { return new InputSource(uri); } @Override public String toString() { return super.toString() + "at URL '" + uri + "'"; } }; } /** * Create a source of binary data that reads from the given URL. * * @param url the URL to read binary data from * @return a source that reads binary data from the given URL */ public static Source.Blob blob(final URL url) { return new Source.Blob(urlToFilename(url)) { @Override protected InputStream toInputStream() throws IOException { URLConnection connection = url.openConnection(); connection.setAllowUserInteraction(false); connection.connect(); if (connection.getContentLength() == -1) { try { byte[] bytes = null; bytes = readInputStream(connection.getInputStream(), bytes); length = bytes.length; return new ByteArrayInputStream(bytes); } finally { connection.getInputStream().close(); } } else { length = connection.getContentLength(); return connection.getInputStream(); } } @Override public String toString() { return super.toString() + "at URL '" + url + "'"; } }; } private static String urlToFilename(URL url) { String path = url.getPath(); if (path == null || path.length() == 0) return null; int k = path.lastIndexOf('/'); if (k >= 0) path = path.substring(k+1, path.length()); return path; } private static final int CHUNK_SIZE = 16384; private static byte[] readInputStream(InputStream stream, byte[] chunk) throws IOException { PushbackInputStream in = new PushbackInputStream(stream); if (chunk == null) chunk = new byte[CHUNK_SIZE]; int totalSize = 0; List<byte[]> chunks = new LinkedList<byte[]>(); chunks.add(chunk); int k = 0; for(;;) { int n = in.read(chunk, k, chunk.length-k); if (n == -1) break; totalSize += n; k += n; if (k == chunk.length) { // probe next byte to avoid allocating another chunk if contents fit perfectly int b = in.read(); if (b == -1) break; in.unread(b); chunk = new byte[CHUNK_SIZE]; chunks.add(chunk); k = 0; } } if (chunks.size() == 1 && chunks.get(0).length == totalSize) return chunks.get(0); chunk = new byte[totalSize]; k = 0; for (byte[] a : chunks) { System.arraycopy(a, 0, chunk, k, Math.min(totalSize - k, a.length)); k += a.length; } return chunk; } private static char[] readReader(Reader reader, char[] chunk) throws IOException { PushbackReader in = new PushbackReader(reader); if (chunk == null) chunk = new char[CHUNK_SIZE]; int totalSize = 0; List<char[]> chunks = new LinkedList<char[]>(); chunks.add(chunk); int k = 0; for(;;) { int n = in.read(chunk, k, chunk.length-k); if (n == -1) break; totalSize += n; k += n; if (k == chunk.length) { // probe next byte to avoid allocating another chunk if contents fit perfectly int b = in.read(); if (b == -1) break; in.unread(b); chunk = new char[CHUNK_SIZE]; chunks.add(chunk); k = 0; } } if (chunks.size() == 1 && chunks.get(0).length == totalSize) return chunks.get(0); chunk = new char[totalSize]; k = 0; for (char[] a : chunks) { System.arraycopy(a, 0, chunk, k, Math.min(totalSize - k, a.length)); k += a.length; } return chunk; } }