/* ==================================================================
* UrlDataCollector.java - Dec 9, 2009 9:46:41 AM
*
* Copyright 2007-2009 SolarNetwork.net Dev Team
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
* published by the Free Software Foundation; either version 2 of
* the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
* 02111-1307 USA
* ==================================================================
* $Id$
* ==================================================================
*/
package net.solarnetwork.node.io.url;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.UnsupportedEncodingException;
import java.net.MalformedURLException;
import java.net.URL;
import java.net.URLConnection;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import net.solarnetwork.node.DataCollector;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.ObjectFactory;
/**
* Implementation of {@link DataCollector} that reads lines of characters from
* a URL.
*
* <p>This class expects the configured URL to return a character data stream
* with newline characters separating records of data.
*
* <p>The configurable properties of this class are:</p>
*
* <dl class="class-properties">
* <dt>url</dt>
* <dd>The URL to access the character data from. Either this or
* {@code urlFactory} must be configured.</dd>
*
* <dt>urlFactory</dt>
* <dd>A factory for creating URL values dynamically. This allows dynamic
* URLs to be used, such as those with the current date in the path. Either
* this or {@code url} must be configured. If this is configured it will
* take precedence over any configured {@code url} value.</dd>
*
* <dt>connectionTimeout</dt>
* <dd>A timeout to use for connecting to the configured {@code url}.
* Defaults to </dd>
*
* <dt>matchExpression</dt>
* <dd>A regular expression to match against lines of data read from the URL.
* The collector will ignore all lines of data until this expression matches
* a line read from the configured URL. Once a match is found, it will stop
* reading any further data. Defaults to
* {@link #DEFAULT_MATCH_EXPRESSION}.</dd>
*
* <dt>encoding</dt>
* <dd>The character encoding to use for reading the URL data. If configured
* as <em>null</em> then this class will attempt to use the encoding
* specified by the URL connection itself. If the URL connection does not
* provide an encoding, {@link #DEFAULT_ENCODING} will be used. Defaults to
* <em>null</em>.</dd>
*
* <dt>skipToLastLine</dt>
* <dd>If <em>true</em> then read all available data from the URL and
* return the last line found, as long as it also matches the configured
* {@code matchExpression} property. This mode means that the entire URL
* data stream must be read each time {@link #collectData()} is called.
* Defaults to <em>false</em>.</dd>
* </dl>
*
* @author matt
* @version $Id$
*/
public class UrlDataCollector implements DataCollector {
/**
* The default {@code encoding} to use if {@code encoding} is not
* configured and the URL connection does not specify an encoding. */
public static final String DEFAULT_ENCODING = "UTF-8";
/** The default value for the {@code connectionTimeout} property. */
public static final int DEFAULT_CONNECTION_TIMEOUT = 15000;
/** The default value for the {@code matchExpression} property. */
public static final String DEFAULT_MATCH_EXPRESSION = "^A";
private String url = null;
private ObjectFactory<String> urlFactory = null;
private int connectionTimeout = DEFAULT_CONNECTION_TIMEOUT;
private String matchExpression = DEFAULT_MATCH_EXPRESSION;
private String encoding = null;
private boolean skipToLastLine = false;
private StringBuilder buffer = null;
private final Logger log = LoggerFactory.getLogger(UrlDataCollector.class);
@Override
public int bytesRead() {
String enc = getEncodingToUse();
try {
return buffer == null
? 0
: buffer.toString().getBytes(enc).length;
} catch (UnsupportedEncodingException e) {
throw new RuntimeException(e);
}
}
@Override
public void collectData() {
String resolvedUrl = url;
if ( urlFactory != null ) {
resolvedUrl = urlFactory.getObject();
}
URL dataUrl = null;
try {
dataUrl = new URL(resolvedUrl);
} catch ( MalformedURLException e ) {
throw new RuntimeException("Bad url configured: " +resolvedUrl);
}
if ( log.isDebugEnabled() ) {
log.debug("Connecting to URL [" +resolvedUrl +']');
}
BufferedReader reader = null;
String data = null;
String enc = null;
Pattern pat = Pattern.compile(matchExpression);
try {
URLConnection conn = dataUrl.openConnection();
conn.setConnectTimeout(connectionTimeout);
conn.setReadTimeout(connectionTimeout);
conn.setUseCaches(false);
InputStream in = conn.getInputStream();
if ( this.encoding == null ) {
enc = conn.getContentEncoding();
if ( enc != null ) {
if ( log.isTraceEnabled() ) {
log.trace("Using connection encoding [" +enc +']');
}
this.encoding = enc;
}
}
if ( enc == null ) {
enc = getEncodingToUse();
}
reader = new BufferedReader(new InputStreamReader(in, enc));
String lastLine = null;
boolean keepGoing = true;
while ( keepGoing ) {
String line = reader.readLine();
if ( line == null ) {
keepGoing = false;
if ( skipToLastLine ) {
line = lastLine;
}
}
Matcher m = pat.matcher(line);
if ( m.find() ) {
if ( log.isDebugEnabled() ) {
log.debug("Found matching data line [" +line +']');
}
data = line;
keepGoing = false;
} else {
lastLine = line;
}
}
} catch ( IOException e ) {
throw new RuntimeException(e);
} finally {
if ( reader != null ) {
try {
reader.close();
} catch ( IOException e ) {
if ( log.isWarnEnabled() ) {
log.warn("IOException closing input stream: " +e);
}
}
}
}
if ( data == null ) {
log.info("Input stream finished without finding expected data");
} else {
if ( this.buffer == null ) {
this.buffer = new StringBuilder(data);
} else {
this.buffer.append(data);
}
}
}
@Override
public byte[] getCollectedData() {
String enc = getEncodingToUse();
try {
return buffer == null
? new byte[0]
: buffer.toString().getBytes(enc);
} catch (UnsupportedEncodingException e) {
throw new RuntimeException(e);
}
}
private String getEncodingToUse() {
return this.encoding == null ? DEFAULT_ENCODING : this.encoding;
}
@Override
public String getCollectedDataAsString() {
return buffer == null ? null : buffer.toString();
}
@Override
public void stopCollecting() {
if ( buffer == null ) {
return;
}
}
/**
* @return the url
*/
public String getUrl() {
return url;
}
/**
* @param url the url to set
*/
public void setUrl(String url) {
this.url = url;
}
/**
* @return the connectionTimeout
*/
public int getConnectionTimeout() {
return connectionTimeout;
}
/**
* @param connectionTimeout the connectionTimeout to set
*/
public void setConnectionTimeout(int connectionTimeout) {
this.connectionTimeout = connectionTimeout;
}
/**
* @return the matchExpression
*/
public String getMatchExpression() {
return matchExpression;
}
/**
* @param matchExpression the matchExpression to set
*/
public void setMatchExpression(String matchExpression) {
this.matchExpression = matchExpression;
}
/**
* @return the encoding
*/
public String getEncoding() {
return encoding;
}
/**
* @param encoding the encoding to set
*/
public void setEncoding(String encoding) {
this.encoding = encoding;
}
/**
* @return the skipToLastLine
*/
public boolean isSkipToLastLine() {
return skipToLastLine;
}
/**
* @param skipToLastLine the skipToLastLine to set
*/
public void setSkipToLastLine(boolean skipToLastLine) {
this.skipToLastLine = skipToLastLine;
}
/**
* @return the urlFactory
*/
public ObjectFactory<String> getUrlFactory() {
return urlFactory;
}
/**
* @param urlFactory the urlFactory to set
*/
public void setUrlFactory(ObjectFactory<String> urlFactory) {
this.urlFactory = urlFactory;
}
}