/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package gobblin.source.extractor.extract;
import gobblin.source.extractor.DataRecordException;
import gobblin.source.extractor.exception.HighWatermarkException;
import gobblin.source.extractor.watermark.Predicate;
import java.io.IOException;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import gobblin.source.extractor.exception.RecordCountException;
import gobblin.source.extractor.exception.SchemaException;
import gobblin.source.workunit.WorkUnit;
/**
* An interface for source extractors
*
* @param <D> type of data record
* @param <S> type of schema
*/
public interface SourceSpecificLayer<S, D> {
/**
* Metadata to extract raw schema(like url, query)
*
* @param source schema name
* @param source entity name
* @return list of commands to get schema
* @throws SchemaException if there is anything wrong in building metadata for schema extraction
*/
public List<Command> getSchemaMetadata(String schema, String entity)
throws SchemaException;
/**
* Raw schema from the response
*
* @param response is the output from a source call
* @return S representation of the schema
* @throws SchemaException if there is anything wrong in getting raw schema
*/
public S getSchema(CommandOutput<?, ?> response)
throws SchemaException, IOException;
/**
* Metadata for high watermark(like url, query)
*
* @param source schema name
* @param source entity name
* @param water mark column
* @param lis of all predicates that needs to be applied
* @return list of commands to get the high watermark
* @throws gobblin.source.extractor.exception.HighWatermarkException if there is anything wrong in building metadata to get high watermark
*/
public List<Command> getHighWatermarkMetadata(String schema, String entity, String watermarkColumn,
List<Predicate> predicateList)
throws HighWatermarkException;
/**
* High watermark from the response
*
* @param source schema name
* @param source entity name
* @param water mark column
* @param lis of all predicates that needs to be applied
* @return high water mark from source
* @throws HighWatermarkException if there is anything wrong in building metadata to get high watermark
*/
public long getHighWatermark(CommandOutput<?, ?> response, String watermarkColumn, String predicateColumnFormat)
throws HighWatermarkException;
/**
* Metadata for record count(like url, query)
*
* @param source schema name
* @param source entity name
* @param work unit: properties
* @param lis of all predicates that needs to be applied
* @return list of commands to get the count
* @throws RecordCountException if there is anything wrong in building metadata for record counts
*/
public List<Command> getCountMetadata(String schema, String entity, WorkUnit workUnit, List<Predicate> predicateList)
throws RecordCountException;
/**
* Record count from the response
*
* @return record count
* @throws RecordCountException if there is anything wrong in getting record count
*/
public long getCount(CommandOutput<?, ?> response)
throws RecordCountException;
/**
* Metadata for data records(like url, query)
*
* @param source schema name
* @param source entity name
* @param work unit: properties
* @param list of all predicates that needs to be applied
* @return list of commands to get the data
* @throws gobblin.source.extractor.DataRecordException if there is anything wrong in building metadata for data records
*/
public List<Command> getDataMetadata(String schema, String entity, WorkUnit workUnit, List<Predicate> predicateList)
throws DataRecordException;
/**
* Set of data records from the response
*
* @return Iterator over objects of type D
* @throws DataRecordException if there is anything wrong in getting data records
*/
public Iterator<D> getData(CommandOutput<?, ?> response)
throws DataRecordException, IOException;
/**
* Data type of source
*
* @return Map of source and target data types
*/
public Map<String, String> getDataTypeMap();
/**
* Get records using source specific api (Example: bulk api in salesforce source)
* record set: data records with an iterator
*
* @param source schema name
* @param source entity name
* @param work unit: properties
* @param list of all predicates that needs to be applied
* @return iterator with set of records
* @throws SchemaException if there is anything wrong in getting data records
*/
public Iterator<D> getRecordSetFromSourceApi(String schema, String entity, WorkUnit workUnit,
List<Predicate> predicateList)
throws IOException;
}