/** * DataCleaner (community edition) * Copyright (C) 2014 Neopost - Customer Information Management * * This copyrighted material is made available to anyone wishing to use, modify, * copy, or redistribute it subject to the terms and conditions of the GNU * Lesser General Public License, as published by the Free Software Foundation. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License * for more details. * * You should have received a copy of the GNU Lesser General Public License * along with this distribution; if not, write to: * Free Software Foundation, Inc. * 51 Franklin Street, Fifth Floor * Boston, MA 02110-1301 USA */ package org.datacleaner.metamodel.datahub; import static org.apache.http.HttpHeaders.ACCEPT; import static org.datacleaner.metamodel.datahub.DataHubConnectionHelper.validateReponseStatusCode; import java.util.ArrayList; import java.util.Iterator; import java.util.List; import org.apache.http.HttpEntity; import org.apache.http.HttpResponse; import org.apache.http.NameValuePair; import org.apache.http.client.methods.HttpGet; import org.apache.http.client.utils.URLEncodedUtils; import org.apache.http.message.BasicNameValuePair; import org.apache.metamodel.data.AbstractDataSet; import org.apache.metamodel.data.DefaultRow; import org.apache.metamodel.data.Row; import org.apache.metamodel.query.Query; import org.apache.metamodel.query.SelectItem; import org.apache.metamodel.schema.Table; import org.datacleaner.metamodel.datahub.utils.JsonQueryDatasetResponseParser; import org.datacleaner.util.http.MonitorHttpClient; /** * Datahub dataset */ public class DataHubDataSet extends AbstractDataSet { private static final int PAGE_SIZE = 10000; private static final String JSON_CONTENT_TYPE = "application/json"; private static final String QUERY_PARAM = "q"; private static final String FIRST_ROW_PARAM = "f"; private static final String MAX_ROW_PARAM = "m"; private final DataHubRepoConnection _connection; private final Query _query; private String _queryString; private String _uri; private boolean _paging; private Integer _nextPageFirstRow; private Integer _nextPageMaxRows; private Iterator<Object[]> _resultSetIterator; private Row _row; private boolean _endReached; /** * Constructor * * @param query * @param connection */ public DataHubDataSet(final String tenantName, final Query query, final DataHubRepoConnection connection) { super(getSelectItems(query)); final Table table = query.getFromClause().getItem(0).getTable(); _queryString = getQueryString(query, table); _query = query; _connection = connection; _uri = createEncodedUri(tenantName, table); _paging = query.getMaxRows() == null; _nextPageFirstRow = 1; _nextPageMaxRows = PAGE_SIZE; _endReached = false; _resultSetIterator = getNextPage(); } private static List<SelectItem> getSelectItems(final Query query) { return query.getSelectClause().getItems(); } /** * {@inheritDoc} */ @Override public Row getRow() { return _row; } /** * {@inheritDoc} */ @Override public boolean next() { if (!_resultSetIterator.hasNext()) { if (_paging && !_endReached) { _resultSetIterator = getNextPage(); if (!_resultSetIterator.hasNext()) { _row = null; return false; } } else { _row = null; return false; } } _row = new DefaultRow(getHeader(), _resultSetIterator.next()); return true; } private Iterator<Object[]> getNextPage() { final Integer firstRow = (_query.getFirstRow() == null ? _nextPageFirstRow : _query.getFirstRow()); final Integer maxRows = (_query.getMaxRows() == null ? _nextPageMaxRows : _query.getMaxRows()); _nextPageFirstRow = _nextPageFirstRow + _nextPageMaxRows; final String uri = _uri + createParams(firstRow, maxRows); final HttpGet request = new HttpGet(uri); request.addHeader(ACCEPT, JSON_CONTENT_TYPE); final HttpResponse response = executeRequest(request); final List<Object[]> resultSet = getResultSet(response.getEntity()); final int resultSetSize = resultSet.size(); _endReached = (resultSetSize < maxRows); return resultSet.iterator(); } private List<Object[]> getResultSet(final HttpEntity entity) { final JsonQueryDatasetResponseParser parser = new JsonQueryDatasetResponseParser(); try { return parser.parseQueryResult(entity.getContent()); } catch (final Exception e) { throw new IllegalStateException(e); } } private String createParams(final Integer firstRow, final Integer maxRows) { final List<NameValuePair> params = new ArrayList<>(); params.add(new BasicNameValuePair(QUERY_PARAM, _queryString)); params.add(new BasicNameValuePair(FIRST_ROW_PARAM, firstRow.toString())); params.add(new BasicNameValuePair(MAX_ROW_PARAM, maxRows.toString())); return URLEncodedUtils.format(params, "utf-8"); } private String createEncodedUri(final String tenantName, final Table table) { final String datastoreName = ((DataHubSchema) table.getSchema()).getDatastoreName(); return _connection.getQueryUrl(tenantName, datastoreName); } /** * Changes all occurences of <table-name>.<column-name> with just <column-name> * * @param query The original query * @param table The table from the query. * @return The Query string with the qualified table names removed. */ private String getQueryString(final Query query, final Table table) { final String queryString = query.toSql(); return queryString.replace(table.getName() + ".", ""); } private HttpResponse executeRequest(final HttpGet request) { final MonitorHttpClient httpClient = _connection.getHttpClient(); final HttpResponse response; try { response = httpClient.execute(request); } catch (final Exception e) { throw new IllegalStateException(e); } validateReponseStatusCode(response); return response; } }