/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package gobblin.source.extractor.filebased;
import java.io.IOException;
import java.io.InputStream;
import java.util.Iterator;
import java.util.Scanner;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Preconditions;
import gobblin.configuration.ConfigurationKeys;
import lombok.Setter;
import lombok.extern.slf4j.Slf4j;
/**
* Extends {@link FileBasedExtractor<String>} which generates {@link Iterator<String>} using {@link #token} when {@link #downloadFile(String)}.
*/
@Slf4j
public class TokenizedFileDownloader extends FileDownloader<String> {
public static final String DEFAULT_TOKEN = "\n";
@Setter
private String token;
@Setter
private String charset;
public TokenizedFileDownloader(FileBasedExtractor<?, ?> fileBasedExtractor) {
this(fileBasedExtractor, DEFAULT_TOKEN, ConfigurationKeys.DEFAULT_CHARSET_ENCODING.name());
}
public TokenizedFileDownloader(FileBasedExtractor<?, ?> fileBasedExtractor, String token, String charset) {
super(fileBasedExtractor);
this.token = token;
this.charset = charset;
}
@Override
public Iterator<String> downloadFile(String filePath)
throws IOException {
Preconditions.checkArgument(this.token != null);
try {
log.info("downloading file: " + filePath);
InputStream inputStream =
this.fileBasedExtractor.getCloser().register(this.fileBasedExtractor.getFsHelper().getFileStream(filePath));
return new RecordIterator(inputStream, this.token, this.charset);
} catch (FileBasedHelperException e) {
throw new IOException("Exception when trying to download file " + filePath, e);
}
}
@VisibleForTesting
protected static class RecordIterator implements Iterator<String> {
Scanner scanner;
public RecordIterator(InputStream inputStream, String delimiter, String charSet) {
this.scanner = new Scanner(inputStream, charSet).useDelimiter(delimiter);
}
@Override
public boolean hasNext() {
boolean hasNextRecord = this.scanner.hasNext();
if (!hasNextRecord) {
this.scanner.close();
}
return hasNextRecord;
}
@Override
public String next() {
return this.hasNext() ? this.scanner.next() : null;
}
@Override
public void remove() {
throw new UnsupportedOperationException("Remove is not supported.");
}
}
}