package org.embulk.spi.util;
import java.util.Iterator;
import java.util.NoSuchElementException;
import java.io.Reader;
import java.io.BufferedReader;
import java.io.InputStreamReader;
import java.io.IOException;
import java.nio.charset.Charset;
import java.nio.charset.CharsetDecoder;
import java.nio.charset.CodingErrorAction;
import org.embulk.config.Task;
import org.embulk.config.Config;
import org.embulk.config.ConfigDefault;
import org.embulk.spi.FileInput;
import static java.nio.charset.StandardCharsets.UTF_8;
public class LineDecoder
implements AutoCloseable, Iterable<String>
{
// TODO optimize
public static interface DecoderTask
extends Task
{
@Config("charset")
@ConfigDefault("\"utf-8\"")
public Charset getCharset();
@Config("newline")
@ConfigDefault("\"CRLF\"")
public Newline getNewline();
}
private final FileInputInputStream inputStream;
private final BufferedReader reader;
private final Charset charset;
public LineDecoder(FileInput in, DecoderTask task)
{
this.charset = task.getCharset();
CharsetDecoder decoder = charset
.newDecoder()
.onMalformedInput(CodingErrorAction.REPLACE) // TODO configurable?
.onUnmappableCharacter(CodingErrorAction.REPLACE); // TODO configurable?
this.inputStream = new FileInputInputStream(in);
this.reader = new BufferedReader(new InputStreamReader(inputStream, decoder));
}
public boolean nextFile()
{
boolean has = inputStream.nextFile();
if (has && charset.equals(UTF_8)) {
skipBom();
}
return has;
}
private void skipBom()
{
boolean skip = false;
try {
if (charset.equals(UTF_8)) {
reader.mark(3);
int firstChar = reader.read();
if (firstChar == 0xFEFF) {
// skip BOM bytes
skip = true;
}
}
}
catch (IOException ex) {
}
finally {
if (skip) {
// firstChar is skipped
}
else {
// rollback to the marked position
try {
reader.reset();
}
catch (IOException ex) {
// unexpected
throw new RuntimeException(ex);
}
}
}
}
public String poll()
{
try {
return reader.readLine();
} catch (IOException ex) {
// unexpected
throw new RuntimeException(ex);
}
}
public void close()
{
try {
reader.close();
} catch (IOException ex) {
// unexpected
throw new RuntimeException(ex);
}
}
public Iterator<String> iterator()
{
return new Ite(this);
}
private String nextLine;
private static class Ite
implements Iterator<String>
{
private LineDecoder self;
public Ite(LineDecoder self)
{
// TODO non-static inner class causes a problem with JRuby
this.self = self;
}
@Override
public boolean hasNext()
{
if (self.nextLine != null) {
return true;
} else {
self.nextLine = self.poll();
return self.nextLine != null;
}
}
@Override
public String next()
{
if (!hasNext()) {
throw new NoSuchElementException();
}
String l = self.nextLine;
self.nextLine = null;
return l;
}
@Override
public void remove()
{
throw new UnsupportedOperationException();
}
}
}