package com.github.zangxiaoqiang.common.hadoop;
import java.io.IOException;
import java.util.Queue;
import java.util.concurrent.ConcurrentLinkedQueue;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.Path;
public class HadoopFileIterator {
private String partitionName;
private String lowerLimit;
private String upperLimit;
private Queue<Path> folderQueue = new ConcurrentLinkedQueue<Path>();
private Queue<Path> fileQueue = new ConcurrentLinkedQueue<Path>();
public HadoopFileIterator(Path basePath, String partitionName, String lowerLimit,
String upperLimit) throws IOException {
if (partitionName != null) {
this.partitionName = partitionName;
this.lowerLimit = lowerLimit;
this.upperLimit = upperLimit;
}
fetchFolder(basePath);
}
public HadoopFileIterator(Path basePath) throws IOException {
this(basePath, null, null, null);
}
private void fetchFolder(Path path) throws IOException {
FileStatus[] files = HadoopUtil.listAll(path);
if (files == null) {
return;
}
for (FileStatus fileStatus : files) {
if (fileStatus.isDirectory()) {
Path subDirectory = fileStatus.getPath();
String partition = subDirectory.getName();
int index = partition.indexOf('=');
if (index >= 0) {
String pName = partition.substring(0, index);
String pValue = partition.substring(index + 1);
if (partitionName != null && partitionName.equals(pName)) {
if (lowerLimit != null
&& pValue.compareTo(lowerLimit) < 0) {
continue;
} else if (upperLimit != null
&& pValue.compareTo(upperLimit) > 0) {
continue;
}
}
}
folderQueue.add(subDirectory);
} else {
fileQueue.add(fileStatus.getPath());
}
}
}
public Path next() throws IOException {
while (fileQueue.isEmpty()) {
if (folderQueue.isEmpty()) {
return null;
}
Path currentFolder = folderQueue.remove();
fetchFolder(currentFolder);
}
return fileQueue.remove();
}
}