package com.aliyun.odps.udf;
import com.aliyun.odps.NotImplementedException;
import com.aliyun.odps.data.Record;
import com.aliyun.odps.exec.InputSplit;
import com.aliyun.odps.io.InputStreamSet;
import java.io.IOException;
/**
* Base extractor class, user-defined extractors shall extend from this class
**/
public abstract class Extractor {
/**
* Currently the method shall be overridden only when using Hive-compatible interfaces, i.e., when
* inputFormat/Serde are used to describe the data extraction logic.
* TODO: use this to describe general input split.
* @param split: input files described by InputSplit
* @return converted InputStreamSet
*/
public InputStreamSet splitToInputStreamSet(InputSplit split) {
throw new NotImplementedException("No default splitToInputStreamSet method implemented.");
}
/**
* Interface for setting up the extractor, implementation can be a no-op
* @param ctx: the ExecutionContext which contains context information that may be useful
* for setting up user code execution environment
* @param inputs: set of input streams, each corresponding to one input file
* @param attributes: encapsulate any attributes needed that describe the associated input data
**/
public abstract void setup(ExecutionContext ctx, InputStreamSet inputs, DataAttributes attributes);
/**
* Interface for extracting a schematized record from an input stream
* @return the extracted record, returning null indicates no more record is to be extracted
**/
public abstract Record extract() throws IOException;
/**
* Interface for operations upon extractor exit, implementation can be no-op
**/
public abstract void close();
}