/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.pig.impl.streaming;
import java.io.IOException;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.util.LineReader;
import org.apache.pig.StreamToPig;
import org.apache.pig.data.Tuple;
import org.apache.pig.impl.io.BufferedPositionedInputStream;
/**
* {@link OutputHandler} is responsible for handling the output of the
* Pig-Streaming external command.
*
* The output of the managed executable could be fetched in a
* {@link OutputType#SYNCHRONOUS} manner via its <code>stdout</code> or in an
* {@link OutputType#ASYNCHRONOUS} manner via an external file to which the
* process wrote its output.
*/
public abstract class OutputHandler {
public enum OutputType {SYNCHRONOUS, ASYNCHRONOUS}
/*
* The deserializer to be used to send data to the managed process.
*
* It is the responsibility of the concrete sub-classes to setup and
* manage the deserializer.
*/
protected StreamToPig deserializer;
protected LineReader in = null;
private BufferedPositionedInputStream istream;
/**
* Get the handled <code>OutputType</code>.
* @return the handled <code>OutputType</code>
*/
public abstract OutputType getOutputType();
// flag to mark if close() has already been called
protected boolean alreadyClosed = false;
/**
* Bind the <code>OutputHandler</code> to the <code>InputStream</code>
* from which to read the output data of the managed process.
*
* @param is <code>InputStream</code> from which to read the output data
* of the managed process
* @throws IOException
*/
public void bindTo(String fileName, BufferedPositionedInputStream is,
long offset, long end) throws IOException {
this.istream = is;
this.in = new LineReader(istream);
}
/**
* Get the next output <code>Tuple</code> of the managed process.
*
* @return the next output <code>Tuple</code> of the managed process
* @throws IOException
*/
public Tuple getNext() throws IOException {
if (in == null) {
return null;
}
Text value = new Text();
int num = in.readLine(value);
if (num <= 0) {
return null;
}
byte[] newBytes = new byte[value.getLength()];
System.arraycopy(value.getBytes(), 0, newBytes, 0, value.getLength());
return deserializer.deserialize(newBytes);
}
/**
* Close the <code>OutputHandler</code>.
* @throws IOException
*/
public synchronized void close() throws IOException {
if(!alreadyClosed) {
istream.close();
istream = null;
alreadyClosed = true;
}
}
}