/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package gobblin.source.extractor.extract;
import gobblin.source.extractor.DataRecordException;
import gobblin.source.extractor.watermark.WatermarkType;
import java.io.IOException;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import gobblin.source.extractor.watermark.Predicate;
import gobblin.source.extractor.exception.HighWatermarkException;
import gobblin.source.extractor.exception.RecordCountException;
import gobblin.source.extractor.exception.SchemaException;
import gobblin.source.workunit.WorkUnit;
/**
* An interface for protocol extractors
*
* @param <D> type of data record
* @param <S> type of schema
*/
public interface ProtocolSpecificLayer<S, D> {
/**
* Extract metadata(schema) from the source
*
* @param source schema name
* @param source entity name
* @param work unit
* @throws SchemaException if there is anything wrong in extracting metadata
*/
public void extractMetadata(String schema, String entity, WorkUnit workUnit)
throws SchemaException, IOException;
/**
* High water mark for the snapshot pull
* @param watermarkSourceFormat
*
* @param source schema name
* @param source entity name
* @param watermark column
* @param watermark column format
* @param list of all predicates that needs to be applied
* @return high water mark
* @throws SchemaException if there is anything wrong in getting high water mark
*/
public long getMaxWatermark(String schema, String entity, String watermarkColumn,
List<Predicate> snapshotPredicateList, String watermarkSourceFormat)
throws HighWatermarkException;
/**
* Source record count
*
* @param source schema name
* @param source entity name
* @param work unit: properties
* @param list of all predicates that needs to be applied
* @return record count
* @throws RecordCountException if there is anything wrong in getting record count
*/
public long getSourceCount(String schema, String entity, WorkUnit workUnit, List<Predicate> predicateList)
throws RecordCountException;
/**
* record set: data records with an iterator
*
* @param source schema name
* @param source entity name
* @param work unit: properties
* @param list of all predicates that needs to be applied
* @return iterator with set of records
* @throws SchemaException if there is anything wrong in getting data records
*/
public Iterator<D> getRecordSet(String schema, String entity, WorkUnit workUnit, List<Predicate> predicateList)
throws DataRecordException, IOException;
/**
* water mark source format of water mark type
* @return water mark source format(yyyyMMddHHmmss, yyyyMMdd etc.)
*/
public String getWatermarkSourceFormat(WatermarkType watermarkType);
/**
* date predicate condition for types like timestamp and date
* @return predicate condition (LastModifiedHour >= 10 and LastModifiedHour <= 20)
*/
public String getHourPredicateCondition(String column, long value, String valueFormat, String operator);
/**
* date predicate condition for types like timestamp and date
* @return predicate condition (LastModifiedDate >= 2014-01-01 and LastModifiedDate <= 2014-01-01)
*/
public String getDatePredicateCondition(String column, long value, String valueFormat, String operator);
/**
* timestamp predicate condition for types like timestamp
* @return predicate condition (LastModifiedTimestamp >= 2014-01-01T00:00:00.000Z and LastModifiedTimestamp <= 2014-01-10T15:05:00.000Z)
*/
public String getTimestampPredicateCondition(String column, long value, String valueFormat, String operator);
/**
* set timeout for the source connection
*/
public void setTimeOut(int timeOut);
/**
* Data type of source
*
* @return Map of source and target data types
*/
public Map<String, String> getDataTypeMap();
/**
* Close connection after the completion of extract whether its success or failure
* @throws Exception
*/
public void closeConnection()
throws Exception;
/**
* Get records using source specific api (Example: bulk api in salesforce source)
* record set: data records with an iterator
*
* @param source schema name
* @param source entity name
* @param work unit: properties
* @param list of all predicates that needs to be applied
* @return iterator with set of records
* @throws SchemaException if there is anything wrong in getting data records
*/
public Iterator<D> getRecordSetFromSourceApi(String schema, String entity, WorkUnit workUnit,
List<Predicate> predicateList)
throws IOException;
}