/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package gobblin.source.extractor.extract.restapi;
import com.google.common.collect.ImmutableList;
import gobblin.source.extractor.exception.RestApiConnectionException;
import gobblin.source.extractor.exception.RestApiProcessingException;
import gobblin.source.extractor.utils.Utils;
import java.io.IOException;
import java.util.Iterator;
import java.util.List;
import com.google.common.base.Joiner;
import com.google.common.base.Strings;
import com.google.gson.Gson;
import com.google.gson.JsonArray;
import com.google.gson.JsonElement;
import com.google.gson.JsonObject;
import com.google.common.base.Splitter;
import gobblin.configuration.ConfigurationKeys;
import gobblin.configuration.WorkUnitState;
import gobblin.source.extractor.watermark.Predicate;
import gobblin.source.extractor.DataRecordException;
import gobblin.source.extractor.exception.HighWatermarkException;
import gobblin.source.extractor.exception.RecordCountException;
import gobblin.source.extractor.exception.SchemaException;
import gobblin.source.extractor.extract.QueryBasedExtractor;
import gobblin.source.extractor.extract.Command;
import gobblin.source.extractor.extract.CommandOutput;
import gobblin.source.extractor.extract.SourceSpecificLayer;
import gobblin.source.extractor.schema.Schema;
import gobblin.source.workunit.WorkUnit;
import lombok.extern.slf4j.Slf4j;
/**
* An implementation of rest api extractor for the sources that are using rest api
*
* @param <D> type of data record
* @param <S> type of schema
*/
@Slf4j
public abstract class RestApiExtractor extends QueryBasedExtractor<JsonArray, JsonElement>
implements SourceSpecificLayer<JsonArray, JsonElement>, RestApiSpecificLayer {
private static final Gson GSON = new Gson();
protected String instanceUrl;
protected String updatedQuery;
protected final RestApiConnector connector;
public RestApiExtractor(WorkUnitState state) {
super(state);
this.connector = getConnector(state);
}
protected abstract RestApiConnector getConnector(WorkUnitState state);
protected String buildDataQuery(String inputQuery, String entity) {
String dataQuery = null;
if (inputQuery == null && this.columnList.size() != 0) {
// if input query is null, build the query from metadata
dataQuery = "SELECT " + Joiner.on(",").join(this.columnList) + " FROM " + entity;
} else {
// if input query is not null, build the query with intersection of columns from input query and columns from Metadata
if (inputQuery != null) {
String queryLowerCase = inputQuery.toLowerCase();
int columnsStartIndex = queryLowerCase.indexOf("select ") + 7;
int columnsEndIndex = queryLowerCase.indexOf(" from ");
if (columnsStartIndex > 0 && columnsEndIndex > 0) {
String givenColumnList = inputQuery.substring(columnsStartIndex, columnsEndIndex);
dataQuery = inputQuery.replace(givenColumnList, Joiner.on(",").join(this.columnList));
} else {
dataQuery = inputQuery;
}
}
}
log.info("Updated data query: " + dataQuery);
return dataQuery;
}
@Override
public void extractMetadata(String schema, String entity, WorkUnit workUnit) throws SchemaException {
log.info("Extract Metadata using Rest Api");
JsonArray columnArray = new JsonArray();
String inputQuery = workUnitState.getProp(ConfigurationKeys.SOURCE_QUERYBASED_QUERY);
List<String> columnListInQuery = null;
JsonArray array = null;
if (!Strings.isNullOrEmpty(inputQuery)) {
columnListInQuery = Utils.getColumnListFromQuery(inputQuery);
}
String excludedColumns = workUnitState.getProp(ConfigurationKeys.SOURCE_QUERYBASED_EXCLUDED_COLUMNS);
List<String> columnListExcluded = ImmutableList.<String> of();
if (Strings.isNullOrEmpty(inputQuery) && !Strings.isNullOrEmpty(excludedColumns)) {
Splitter splitter = Splitter.on(",").omitEmptyStrings().trimResults();
columnListExcluded = splitter.splitToList(excludedColumns.toLowerCase());
}
try {
boolean success = this.connector.connect();
if (!success) {
throw new SchemaException("Failed to connect.");
}
log.debug("Connected successfully.");
List<Command> cmds = this.getSchemaMetadata(schema, entity);
CommandOutput<?, ?> response = this.connector.getResponse(cmds);
array = this.getSchema(response);
for (JsonElement columnElement : array) {
Schema obj = GSON.fromJson(columnElement, Schema.class);
String columnName = obj.getColumnName();
obj.setWaterMark(this.isWatermarkColumn(workUnitState.getProp("extract.delta.fields"), columnName));
if (this.isWatermarkColumn(workUnitState.getProp("extract.delta.fields"), columnName)) {
obj.setNullable(false);
} else if (this.getPrimarykeyIndex(workUnitState.getProp("extract.primary.key.fields"), columnName) == 0) {
// set all columns as nullable except primary key and watermark columns
obj.setNullable(true);
}
obj.setPrimaryKey(this.getPrimarykeyIndex(workUnitState.getProp("extract.primary.key.fields"), columnName));
String jsonStr = GSON.toJson(obj);
JsonObject jsonObject = GSON.fromJson(jsonStr, JsonObject.class).getAsJsonObject();
// If input query is null or provided '*' in the query select all columns.
// Else, consider only the columns mentioned in the column list
if (inputQuery == null || columnListInQuery == null
|| (columnListInQuery.size() == 1 && columnListInQuery.get(0).equals("*"))
|| (columnListInQuery.size() >= 1 && this.isMetadataColumn(columnName, columnListInQuery))) {
if (!columnListExcluded.contains(columnName.trim().toLowerCase())) {
this.columnList.add(columnName);
columnArray.add(jsonObject);
}
}
}
this.updatedQuery = buildDataQuery(inputQuery, entity);
log.info("Schema:" + columnArray);
this.setOutputSchema(columnArray);
} catch (RuntimeException | RestApiConnectionException | RestApiProcessingException | IOException
| SchemaException e) {
throw new SchemaException("Failed to get schema using rest api; error - " + e.getMessage(), e);
}
}
@Override
public long getMaxWatermark(String schema, String entity, String watermarkColumn, List<Predicate> predicateList,
String watermarkSourceFormat) throws HighWatermarkException {
log.info("Get high watermark using Rest Api");
long CalculatedHighWatermark = -1;
try {
boolean success = this.connector.connect();
if (!success) {
throw new HighWatermarkException("Failed to connect.");
}
log.debug("Connected successfully.");
List<Command> cmds = this.getHighWatermarkMetadata(schema, entity, watermarkColumn, predicateList);
CommandOutput<?, ?> response = this.connector.getResponse(cmds);
CalculatedHighWatermark = this.getHighWatermark(response, watermarkColumn, watermarkSourceFormat);
log.info("High watermark:" + CalculatedHighWatermark);
return CalculatedHighWatermark;
} catch (Exception e) {
throw new HighWatermarkException("Failed to get high watermark using rest api; error - " + e.getMessage(), e);
}
}
@Override
public long getSourceCount(String schema, String entity, WorkUnit workUnit, List<Predicate> predicateList)
throws RecordCountException {
log.info("Get source record count using Rest Api");
long count = 0;
try {
boolean success = this.connector.connect();
if (!success) {
throw new RecordCountException("Failed to connect.");
}
log.debug("Connected successfully.");
List<Command> cmds = this.getCountMetadata(schema, entity, workUnit, predicateList);
CommandOutput<?, ?> response = this.connector.getResponse(cmds);
count = getCount(response);
log.info("Source record count:" + count);
return count;
} catch (Exception e) {
throw new RecordCountException("Failed to get record count using rest api; error - " + e.getMessage(), e);
}
}
@Override
public Iterator<JsonElement> getRecordSet(String schema, String entity, WorkUnit workUnit,
List<Predicate> predicateList) throws DataRecordException {
log.debug("Get data records using Rest Api");
Iterator<JsonElement> rs = null;
List<Command> cmds;
try {
boolean success = true;
if (this.connector.isConnectionClosed()) {
success = this.connector.connect();
}
if (!success) {
throw new DataRecordException("Failed to connect.");
}
log.debug("Connected successfully.");
if (this.getPullStatus() == false) {
return null;
}
if (this.getNextUrl() == null) {
cmds = this.getDataMetadata(schema, entity, workUnit, predicateList);
} else {
cmds = RestApiConnector.constructGetCommand(this.getNextUrl());
}
CommandOutput<?, ?> response = this.connector.getResponse(cmds);
rs = this.getData(response);
return rs;
} catch (Exception e) {
throw new DataRecordException("Failed to get records using rest api; error - " + e.getMessage(), e);
}
}
@Override
public void setTimeOut(int timeOut) {
this.connector.setAuthTokenTimeout(timeOut);
}
}