package at.ac.tuwien.dsg.scaledom.io.impl; import static com.google.common.base.Preconditions.checkArgument; import static com.google.common.base.Preconditions.checkNotNull; import java.io.BufferedReader; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.io.Reader; import java.net.HttpURLConnection; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import at.ac.tuwien.dsg.scaledom.io.NodeLocation; import at.ac.tuwien.dsg.scaledom.io.ReaderFactory; import at.ac.tuwien.dsg.scaledom.io.ReaderWithSystemID; import at.ac.tuwien.dsg.scaledom.parser.XmlEventCharLocation; import at.ac.tuwien.dsg.scaledom.parser.XmlEventLocation; import at.ac.tuwien.dsg.scaledom.util.Utils; /** * <code>HttpReaderFactory</code> implementation.<br/> * This implementation provides a <code>Reader</code> based upon URL <code>InputStream</code> for * full file reading and one based upon <code>RandomAccessFile</code> for file range reading. * * @author Waldemar Hummer */ public class HttpReaderFactory extends ReaderFactory { /** Logger. */ private final static Logger log = LoggerFactory.getLogger(HttpReaderFactory.class); /** Underlying URL as connection. */ private HttpURLConnection urlConnection; /** Number of bytes per character. */ private final int numberOfBytesPerCharacter; /** Start offset of the currently obtained reader. */ private long readerOffset; /** Content length of the HTTP document. */ private Long contentLength = null; /** * Default constructor. * * @param source the underlying document source. * @throws IOException If some I/O error occurs. */ public HttpReaderFactory(final HttpDocumentSource source) throws IOException { super(source); numberOfBytesPerCharacter = Utils.getNumberOfBytesPerCharacter(source.getEncoding()); readerOffset = 0; log.debug("Encoding " + source.getEncoding() + " is used for the document. Number of bytes per character: " + numberOfBytesPerCharacter); } @Override public Reader newReader() throws IOException { final HttpDocumentSource source = (HttpDocumentSource) getDocumentSource(); readerOffset = 0; InputStream is = source.getUrl().openConnection().getInputStream(); Reader reader = new BufferedReader(new InputStreamReader(is, source.getEncoding())); return new ReaderWithSystemID(source.getUrl().toExternalForm(), reader); } @Override public Reader newReaderForLocation(final NodeLocation location) throws IOException { checkNotNull(location, "Argument location must not be null"); checkArgument(location instanceof FileNodeLocation, "Argument location must be of type FileNodeLocation"); final HttpDocumentSource source = (HttpDocumentSource) getDocumentSource(); if(contentLength == null) { urlConnection = (HttpURLConnection)source.getUrl().openConnection(); contentLength = urlConnection.getContentLengthLong(); } // Close and re-open URL connection if (urlConnection != null) { urlConnection.disconnect(); } urlConnection = (HttpURLConnection)source.getUrl().openConnection(); final FileNodeLocation fileLocation = (FileNodeLocation) location; readerOffset = fileLocation.getStartOffset(); Reader reader = new BufferedReader(new InputStreamReader( new HttpChannelRangeInputStream(urlConnection, contentLength, fileLocation.getStartOffset(), fileLocation.getEndOffset()), source.getEncoding())); return new ReaderWithSystemID(source.getUrl().toExternalForm(), reader); } @Override public void close() throws IOException { if (urlConnection != null) { urlConnection.disconnect(); } } @Override public NodeLocation eventLocationToNodeLocation(final XmlEventLocation location, final long additionalOffset) { final int sizefac = location instanceof XmlEventCharLocation ? numberOfBytesPerCharacter : 1; if (sizefac == -1) { // problem: got XmlEventCharLocation but file is variable-width encoded -> throw exception final HttpDocumentSource fds = (HttpDocumentSource) getDocumentSource(); throw new IllegalArgumentException("File is variable-width encoded (" + fds.getEncoding() + "), you must use an XmlParser implementation which is able to output byte locations."); } final long startingOffset = (location.getStartingOffset() + readerOffset - additionalOffset) * sizefac; return new FileNodeLocation(startingOffset, FileNodeLocation.OFFSET_UNKNOWN); } }