/** * DataCleaner (community edition) * Copyright (C) 2014 Neopost - Customer Information Management * * This copyrighted material is made available to anyone wishing to use, modify, * copy, or redistribute it subject to the terms and conditions of the GNU * Lesser General Public License, as published by the Free Software Foundation. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License * for more details. * * You should have received a copy of the GNU Lesser General Public License * along with this distribution; if not, write to: * Free Software Foundation, Inc. * 51 Franklin Street, Fifth Floor * Boston, MA 02110-1301 USA */ package org.datacleaner.components.http; import java.nio.charset.Charset; import java.util.List; import java.util.Map; import java.util.Map.Entry; import java.util.Set; import javax.inject.Inject; import javax.inject.Named; import org.apache.http.HttpEntity; import org.apache.http.HttpEntityEnclosingRequest; import org.apache.http.HttpResponse; import org.apache.http.client.methods.HttpUriRequest; import org.apache.http.entity.StringEntity; import org.apache.http.impl.client.CloseableHttpClient; import org.apache.http.impl.client.HttpClients; import org.apache.http.protocol.HTTP; import org.apache.http.util.EntityUtils; import org.apache.metamodel.util.FileHelper; import org.datacleaner.api.Categorized; import org.datacleaner.api.Close; import org.datacleaner.api.Configured; import org.datacleaner.api.Description; import org.datacleaner.api.Initialize; import org.datacleaner.api.InputColumn; import org.datacleaner.api.InputRow; import org.datacleaner.api.MappedProperty; import org.datacleaner.api.NumberProperty; import org.datacleaner.api.OutputColumns; import org.datacleaner.api.StringProperty; import org.datacleaner.api.Transformer; import org.datacleaner.components.categories.ImproveSuperCategory; import org.datacleaner.components.categories.ReferenceDataCategory; import org.datacleaner.util.StringUtils; import org.datacleaner.util.ws.PooledServiceSession; import org.datacleaner.util.ws.ServiceResult; import com.google.common.base.Strings; @Named("HTTP request") @Categorized(value = ReferenceDataCategory.class, superCategory = ImproveSuperCategory.class) @Description("Sends a HTTP request for each record and retrieves the response as transformation output.\n" + "For each request you can have dynamic elements in the URL or in the request body that is sent. " + "Provide variable names that are unique to the URL and request body and reference them there. For instance:\n" + "<table><tr><td>URL:</td><td>http://www.google.com/?q=${term}</td></tr>" + "<tr><td>Input:</td><td>column1</td></tr>" + "<tr><td>Variable:</td><td>${term}</td></tr></table>") public class HttpRequestTransformer implements Transformer { public static final String PROPERTY_INPUT_COLUMNS = "Input"; public static final String PROPERTY_VARIABLE_NAMES = "Variable names"; private static final String PROPERTY_URL = "URL"; @Inject @Configured(value = PROPERTY_URL, order = 1) @Description("The URL to invoke. The URL will be pre-processed by replacing any variable names in it with " + "the corresponding dynamic values.") String url = "http://"; @Inject @Configured(order = 2) HttpMethod method = HttpMethod.POST; @Inject @Configured(value = PROPERTY_INPUT_COLUMNS, order = 3) InputColumn<?>[] input; @Inject @Configured(value = PROPERTY_VARIABLE_NAMES, order = 4) @MappedProperty(PROPERTY_INPUT_COLUMNS) String[] variableNames; @Inject @Configured(order = 5) @StringProperty(multiline = true, emptyString = true) @Description( "The body of the request to invoke. The request body will be pre-processed by replacing any variable names " + "in it with the corresponding dynamic values.") String requestBody = ""; @Inject @Configured(required = false, order = 100) Map<String, String> headers; @Inject @Configured(required = false, order = 101) String charset = HTTP.DEF_CONTENT_CHARSET.name(); @Inject @Configured(required = false, order = 150) @NumberProperty(negative = false, zero = false, positive = true) @Description("The maximum number of requests that may be fired at the same time.\n" + "Higher values may provide better throughput while it may also add load to the HTTP server.") int maxConcurrentRequests = 20; private CloseableHttpClient _httpClient; private PooledServiceSession<Object[]> _session; @Initialize public void init() { _httpClient = HttpClients.createSystem(); _session = new PooledServiceSession<>(maxConcurrentRequests); } @Close public void close() { FileHelper.safeClose(_httpClient, _session); } @Override public OutputColumns getOutputColumns() { final String[] columnNames = { "Response status code", "Response body" }; final Class<?>[] columnTypes = { Integer.class, String.class }; return new OutputColumns(columnNames, columnTypes); } @Override public Object[] transform(final InputRow inputRow) { final Charset usedCharset = Charset.forName(charset); final String requestBody = applyVariablesToString(this.requestBody, inputRow); final String url = applyVariablesToString(this.url, inputRow); final HttpUriRequest request = method.createRequest(url); if (!Strings.isNullOrEmpty(requestBody) && request instanceof HttpEntityEnclosingRequest) { final HttpEntity entity = new StringEntity(requestBody, usedCharset); ((HttpEntityEnclosingRequest) request).setEntity(entity); } if (headers != null) { final Set<Entry<String, String>> entries = headers.entrySet(); for (final Entry<String, String> entry : entries) { request.setHeader(entry.getKey(), entry.getValue()); } } final ServiceResult<Object[]> result = _session.invokeService(() -> { final HttpResponse response = _httpClient.execute(request); final int statusCode = response.getStatusLine().getStatusCode(); final String body = EntityUtils.toString(response.getEntity(), usedCharset); return new Object[] { statusCode, body }; }); if (!result.isSuccesfull()) { final Throwable error = result.getError(); if (error instanceof RuntimeException) { throw (RuntimeException) error; } throw new RuntimeException(error); } return result.getResponse(); } /** * Creates a string with all variable names replaced with dynamic values * coming from the {@link InputRow}'s values. * * @param str * the string to prepare with variables * @param inputRow * the input row containing the dynamic values to insert into the * string * @return */ protected String applyVariablesToString(final String str, final InputRow inputRow) { if (Strings.isNullOrEmpty(str)) { return null; } String result = str; final List<Object> values = inputRow.getValues(input); for (int i = 0; i < input.length; i++) { final Object value = values.get(i); final String valueStr; if (value == null) { valueStr = ""; } else { valueStr = value.toString(); } result = StringUtils.replaceAll(result, variableNames[i], valueStr); } return result; } public void setCharset(final String charset) { this.charset = charset; } public void setMaxConcurrentRequests(final int maxConcurrentRequests) { this.maxConcurrentRequests = maxConcurrentRequests; } public void setMethod(final HttpMethod method) { this.method = method; } public void setRequestBody(final String requestBody) { this.requestBody = requestBody; } public void setUrl(final String url) { this.url = url; } public void setHttpClient(final CloseableHttpClient httpClient) { _httpClient = httpClient; } public void setInputAndVariables(final InputColumn<?>[] input, final String[] variableNames) { this.input = input; this.variableNames = variableNames; } }