package com.yahoo.glimmer.util; /* * Copyright (c) 2012 Yahoo! Inc. All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. * You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 * Unless required by applicable law or agreed to in writing, software distributed under the License is * distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and limitations under the License. * See accompanying LICENSE file. */ import java.io.IOException; import java.io.InputStream; import java.util.Enumeration; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.compress.CompressionCodec; import org.apache.hadoop.io.compress.CompressionCodecFactory; /** * For use with {@link java.io.SequenceInputStream} to read hadoop * dir/part-?-?????.. files as a single {@link java.io.InputStream}. * * @author tep * */ public class MapReducePartInputStreamEnumeration implements Enumeration<InputStream> { private final FileSystem fileSystem; private final FileStatus[] partFileStatuses; private int partFileStatusesIndex; private final CompressionCodec codecIfAny; public MapReducePartInputStreamEnumeration(FileSystem fileSystem, Path srcPath) throws IOException { this.fileSystem = fileSystem; CompressionCodecFactory factory = new CompressionCodecFactory(fileSystem.getConf()); codecIfAny = factory.getCodec(srcPath); FileStatus srcFileStatus = fileSystem.getFileStatus(srcPath); if (srcFileStatus.isDirectory()) { // returns FileStatus objects sorted by filename. String partFilenamePattern = "part-?-?????"; if (codecIfAny != null) { partFilenamePattern += codecIfAny.getDefaultExtension(); } Path partPathGlob = new Path(srcPath, partFilenamePattern); partFileStatuses = fileSystem.globStatus(partPathGlob); } else { partFileStatuses = new FileStatus[] { srcFileStatus }; } } @Override public boolean hasMoreElements() { return partFileStatusesIndex < partFileStatuses.length; } @Override public InputStream nextElement() { FileStatus partStatus = partFileStatuses[partFileStatusesIndex++]; try { // SequenceInputStream calls InputStream.close() for us.. InputStream is = fileSystem.open(partStatus.getPath()); if (codecIfAny != null) { is = codecIfAny.createInputStream(is); } return is; } catch (IOException e) { throw new RuntimeException("Failed to open part file " + partStatus.getPath(), e); } } public void reset() { partFileStatusesIndex = 0; } public String removeCompressionSuffixIfAny(String filename) { if (codecIfAny != null) { return CompressionCodecFactory.removeSuffix(filename, codecIfAny.getDefaultExtension()); } else { return filename; } } }