/* * Copyright 2013 Deutsche Nationalbibliothek * * Licensed under the Apache License, Version 2.0 the "License"; * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.lobid.lodmill; import java.io.IOException; import java.io.InputStreamReader; import java.io.Reader; import java.net.URL; import java.net.URLConnection; import org.culturegraph.mf.framework.DefaultObjectPipe; import org.culturegraph.mf.framework.ObjectReceiver; import org.culturegraph.mf.framework.annotations.Description; import org.culturegraph.mf.framework.annotations.In; import org.culturegraph.mf.framework.annotations.Out; import org.slf4j.LoggerFactory; /** * Opens a {@link URLConnection} and passes a reader to the receiver. If the URL * connection eturns a HTTP status code of 420 the programm waits for some * seconds before going on. * * @author Christoph Böhme * @author Jan Schnasse * @author Pascal Christoph (dr0i) * */ @Description("Opens a http resource. Supports the setting of Accept and Accept-Charset as http header fields.") @In(String.class) @Out(java.io.Reader.class) public final class HttpOpener extends DefaultObjectPipe<String, ObjectReceiver<Reader>> implements org.culturegraph.mf.stream.source.Opener { private String encoding = "UTF-8"; private String accept = "*/*"; private final long MILLISECONDS_TO_WAIT = 5000L; /** * @param accept The accept header in the form type/subtype, e.g. text/plain. */ public void setAccept(final String accept) { this.accept = accept; } /** * @param encoding The encoding is used to encode the output and is passed as * Accept-Charset to the http connection. */ public void setEncoding(final String encoding) { this.encoding = encoding; } @Override public void process(final String urlStr) { try { final URL url = new URL(urlStr); final URLConnection con = url.openConnection(); con.addRequestProperty("Accept", accept); con.addRequestProperty("Accept-Charset", encoding); String enc = con.getContentEncoding(); if (enc == null) { enc = encoding; } getReceiver().process(new InputStreamReader(con.getInputStream(), enc)); } catch (IOException e) { LoggerFactory.getLogger(HttpOpener.class) .error("Problems with URL '" + urlStr + "'", e.getLocalizedMessage()); if (e.getLocalizedMessage().contains("420")) { try { LoggerFactory.getLogger(HttpOpener.class) .info("Wait for " + MILLISECONDS_TO_WAIT / 1000 + " sec."); Thread.sleep(MILLISECONDS_TO_WAIT); } catch (InterruptedException e1) { e1.printStackTrace(); } } } } }