/** * This software is licensed to you under the Apache License, Version 2.0 (the * "Apache License"). * * LinkedIn's contributions are made under the Apache License. If you contribute * to the Software, the contributions will be deemed to have been made under the * Apache License, unless you expressly indicate otherwise. Please do not make any * contributions that would be inconsistent with the Apache License. * * You may obtain a copy of the Apache License at http://www.apache.org/licenses/LICENSE-2.0 * Unless required by applicable law or agreed to in writing, this software * distributed under the Apache License is distributed on an "AS IS" BASIS, WITHOUT * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the Apache * License for the specific language governing permissions and limitations for the * software governed under the Apache License. * * © 2012 LinkedIn Corp. All Rights Reserved. */ package com.senseidb.dataprovider.http; import java.io.IOException; import java.io.InputStream; import java.util.Comparator; import java.util.Iterator; import org.apache.commons.io.IOUtils; import org.apache.http.Header; import org.apache.http.HeaderElement; import org.apache.http.HttpEntity; import org.apache.http.HttpException; import org.apache.http.HttpRequest; import org.apache.http.HttpRequestInterceptor; import org.apache.http.HttpResponse; import org.apache.http.HttpResponseInterceptor; import org.apache.http.HttpVersion; import org.apache.http.StatusLine; import org.apache.http.client.entity.GzipDecompressingEntity; import org.apache.http.client.methods.HttpGet; import org.apache.http.conn.ClientConnectionManager; import org.apache.http.conn.scheme.PlainSocketFactory; import org.apache.http.conn.scheme.Scheme; import org.apache.http.conn.scheme.SchemeRegistry; import org.apache.http.conn.ssl.SSLSocketFactory; import org.apache.http.impl.client.DefaultHttpClient; import org.apache.http.impl.conn.SingleClientConnManager; import org.apache.http.params.BasicHttpParams; import org.apache.http.params.HttpConnectionParams; import org.apache.http.params.HttpParams; import org.apache.http.params.HttpProtocolParams; import org.apache.http.protocol.HttpContext; import org.apache.log4j.Logger; import proj.zoie.api.DataConsumer.DataEvent; import proj.zoie.impl.indexing.StreamDataProvider; public abstract class HttpStreamDataProvider<D> extends StreamDataProvider<D> implements HttpDataProviderAdminMBean{ private static final Logger logger = Logger.getLogger(HttpStreamDataProvider.class); protected final String _baseUrl; private final ClientConnectionManager _httpClientManager; private DefaultHttpClient _httpclient; public static final int DEFAULT_TIMEOUT_MS = 10000; public static final int DEFAULT_RETRYTIME_MS = 5000; public static final String DEFAULT_OFFSET_PARAM = "offset"; public static final String DFEAULT_DATA_PARAM = "data"; protected final int _fetchSize; protected final String _password; protected String _offset; protected String _initialOffset; private final boolean _disableHttps; private Iterator<DataEvent<D>> _currentDataIter; private volatile boolean _stopped; private int _retryTime; private volatile long _httpGetLatency; private volatile long _responseParseLatency; public HttpStreamDataProvider(Comparator<String> versionComparator, String baseUrl,String pw,int fetchSize,String startingOffset,boolean disableHttps){ super(versionComparator); _baseUrl = baseUrl; _password = pw; _fetchSize = fetchSize; _offset = startingOffset; _disableHttps = disableHttps; _initialOffset = null; _currentDataIter = null; _stopped = true; _httpGetLatency = 0L; _responseParseLatency = 0L; Scheme http = new Scheme("http", 80, PlainSocketFactory.getSocketFactory()); SchemeRegistry sr = new SchemeRegistry(); sr.register(http); HttpParams params = new BasicHttpParams(); params.setParameter(HttpProtocolParams.PROTOCOL_VERSION, HttpVersion.HTTP_1_1); params.setParameter(HttpProtocolParams.HTTP_CONTENT_CHARSET, "UTF-8"); params.setIntParameter(HttpConnectionParams.CONNECTION_TIMEOUT,5000); // 5s conn timeout params.setIntParameter(HttpConnectionParams.SO_LINGER, 0); // no socket linger params.setBooleanParameter(HttpConnectionParams.TCP_NODELAY, true); // tcp no delay params.setIntParameter(HttpConnectionParams.SO_TIMEOUT,5000); // 5s sock timeout params.setIntParameter(HttpConnectionParams.SOCKET_BUFFER_SIZE,1024*1024); // 1mb socket buffer params.setBooleanParameter(HttpConnectionParams.SO_REUSEADDR,true); // 5s sock timeout _httpClientManager = new SingleClientConnManager(sr); _httpclient = new DefaultHttpClient(_httpClientManager,params); if (!_disableHttps){ _httpclient = HttpsClientDecorator.decorate(_httpclient); } _httpclient.addRequestInterceptor(new HttpRequestInterceptor() { public void process(final HttpRequest request, final HttpContext context) throws HttpException, IOException { if (!request.containsHeader("Accept-Encoding")) { request.addHeader("Accept-Encoding", "gzip"); } } }); _httpclient.addResponseInterceptor(new HttpResponseInterceptor() { public void process(final HttpResponse response, final HttpContext context) throws HttpException, IOException { HttpEntity entity = response.getEntity(); Header ceheader = entity.getContentEncoding(); if (ceheader != null) { HeaderElement[] codecs = ceheader.getElements(); for (int i = 0; i < codecs.length; i++) { if (codecs[i].getName().equalsIgnoreCase("gzip")) { response.setEntity(new GzipDecompressingEntity(response .getEntity())); return; } } } } }); _retryTime = DEFAULT_RETRYTIME_MS; // default retry after 5 seconds } public void setRetryTime(int retryTime){ _retryTime = retryTime; } public int getRetryTime(){ return _retryTime; } @Override public void setStartingOffset(String initialOffset){ _initialOffset = initialOffset; } protected abstract String buildGetString(String offset); protected abstract Iterator<DataEvent<D>> parse(InputStream is) throws Exception; private Iterator<DataEvent<D>> fetchBatch() throws HttpException{ InputStream stream = null; try{ HttpGet httpget = new HttpGet(buildGetString(_offset)); long getStart = System.currentTimeMillis(); HttpResponse response = _httpclient.execute(httpget); long getEnd = System.currentTimeMillis(); _httpGetLatency = getEnd-getStart; HttpEntity entity = response.getEntity(); StatusLine status = response.getStatusLine(); int statusCode = status.getStatusCode(); if (statusCode >= 400){ try { IOUtils.closeQuietly(entity.getContent()); } catch (Exception e) { logger.error(e.getMessage(),e); } throw new HttpException(status.getReasonPhrase()); } try{ stream = entity.getContent(); long parseStart = System.currentTimeMillis(); Iterator<DataEvent<D>> iter = parse(stream); long parseEnd = System.currentTimeMillis(); _responseParseLatency = parseEnd - parseStart; return iter; } catch(Exception e){ logger.error(e.getMessage(),e); httpget.abort(); throw new HttpException(e.getMessage(),e); } } catch(IOException ioe){ throw new HttpException(ioe.getMessage(),ioe); } finally{ if (stream != null){ IOUtils.closeQuietly(stream); } } } @Override public DataEvent<D> next() { if (_stopped){ return null; } if (_currentDataIter==null || !_currentDataIter.hasNext()){ while(true && !_stopped){ try{ Iterator<DataEvent<D>> data = fetchBatch(); if (data==null || !data.hasNext()){ if (logger.isDebugEnabled()){ logger.debug("no more data"); } synchronized(this){ try{ this.wait(_retryTime); return null; } catch (InterruptedException e1) { return null; } } } _currentDataIter = data; break; } catch (HttpException e) { logger.error(e.getMessage(),e); try { logger.error("retrying in "+_retryTime+"ms"); synchronized(this){ this.wait(_retryTime); } continue; } catch (InterruptedException e1) { return null; } } } } DataEvent<D> data = null; if (_currentDataIter != null && _currentDataIter.hasNext()) { data = _currentDataIter.next(); if (data!=null){ _offset = data.getVersion(); } } return data; } @Override public void reset() { if (_initialOffset!=null){ _offset = _initialOffset; } } @Override public long getHttpGetLatency() { return _httpGetLatency; } @Override public long getResponseParseLatency() { return _responseParseLatency; } @Override public void start() { super.start(); _stopped=false; } @Override public void stop() { synchronized(this){ _stopped = true; this.notifyAll(); } try{ super.stop(); } finally{ if (_httpClientManager!=null){ _httpClientManager.shutdown(); } } } }