DownloaderRecordReader.java example

Explorer

hipi-master
- hipi-release
  - core
    - src
      - main
        java
        org
        hipi
        image
        ByteImage.java
        FloatImage.java
        HipiImage.java
        HipiImageFactory.java
        HipiImageHeader.java
        PixelArray.java
        PixelArrayByte.java
        PixelArrayFloat.java
        RasterImage.java
        RawImage.java
        io
        CodecManager.java
        ExifDataReader.java
        ImageCodec.java
        ImageDecoder.java
        ImageEncoder.java
        JpegCodec.java
        PngCodec.java
        PpmCodec.java
        package-info.java
        package-info.java
        imagebundle
        HipiImageBundle.java
        mapreduce
        HibInputFormat.java
        HibRecordReader.java
        package-info.java
        package-info.java
        mapreduce
        BinaryOutputFormat.java
        Culler.java
        package-info.java
        opencv
        OpenCVMatWritable.java
        OpenCVUtils.java
        package-info.java
        util
        ByteUtils.java
        package-info.java
      - test
        java
        org
        hipi
        test
        BinaryOutputFormatTestCase.java
        ByteUtilsTestCase.java
        FloatImageTestCase.java
        HipiImageBundleTestCase.java
        ImageComparisonUtils.java
        JpegCodecTestCase.java
        OpenCVMatWritableTestCase.java
        OpenCVUtilsTestCase.java
        PixelArrayTestCase.java
        PngCodecTestCase.java
        PpmCodecTestCase.java
        TestUtils.java
  - tools
    - covar
      - src
        main
        java
        org
        hipi
        tools
        covar
        ComputeCovariance.java
        ComputeMean.java
        Covariance.java
        CovarianceMapper.java
        CovarianceReducer.java
        MeanMapper.java
        MeanReducer.java
    - hibDownload
      - src
        main
        java
        org
        hipi
        tools
        downloader
        Downloader.java
        DownloaderInputFormat.java
        DownloaderRecordReader.java
        DownloaderReducer.java
    - hibDump
      - src
        main
        java
        org
        hipi
        tools
        HibDump.java
    - hibImport
      - src
        main
        java
        org
        hipi
        tools
        HibImport.java
    - hibInfo
      - src
        main
        java
        org
        hipi
        tools
        HibInfo.java
    - hibToJpeg
      - src
        main
        java
        org
        hipi
        tools
        HibToJpeg.java
    - testSuite
      - src
        main
        java
        org
        hipi
        tools
        test
        HibDump.java
        test
        java
        org
        hipi
        tools
        test
        CovarTests.java
        DownloaderTests.java
        HibImportTests.java
        TestUtils.java

package org.hipi.tools.downloader;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.compress.CompressionCodec;
import org.apache.hadoop.io.compress.CompressionCodecFactory;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.lib.input.FileSplit;
import org.apache.hadoop.mapreduce.RecordReader;
import org.apache.hadoop.mapreduce.TaskAttemptContext;

import java.io.BufferedReader;
import java.io.InputStreamReader;
import java.io.IOException;

public class DownloaderRecordReader extends RecordReader<LongWritable, Text> {

 private long startLine;
 private long linesRead;
 private long numLines;
 private long linesPerRecord;
 private String urls;
 private BufferedReader reader;
    
 @Override
 public void initialize(InputSplit split, TaskAttemptContext context) throws IOException {

   // Obtain path to input list of input images and open input stream
   FileSplit fileSplit = (FileSplit)split;
   Path path = fileSplit.getPath();
   FileSystem fileSystem = path.getFileSystem(context.getConfiguration());
   FSDataInputStream fileIn = fileSystem.open(path);
   
   // Note the start and length fields in the FileSplit object are being used to 
   // convey a range of lines in the input list of image URLs
   startLine = fileSplit.getStart();
   numLines = fileSplit.getLength();
   linesRead = 0; //total lines read by this particular record reader instance
   linesPerRecord = 100; //can be modified to change key/value pair size (may improve efficiency)
   
   //If it exists, get the relevant compression codec for the FileSplit
   CompressionCodecFactory codecFactory = new CompressionCodecFactory(context.getConfiguration());
   CompressionCodec codec = codecFactory.getCodec(path);
   
   // If the codec was found, use it to create an decompressed input stream.
   // Otherwise, assume input stream is already decompressed
   if (codec != null) {
     reader = new BufferedReader(new InputStreamReader(codec.createInputStream(fileIn)));
   } else {
     reader = new BufferedReader(new InputStreamReader(fileIn));
   }
   
 }

 // Get the progress within the split
 @Override
 public float getProgress() {
   float percent = (numLines == 0 ? 0.0f : ((float)linesRead)/((float)numLines));
   return percent;
 }
 
 @Override
 public void close() throws IOException {
   reader.close();
 }
 
 @Override
 public LongWritable getCurrentKey() throws IOException, InterruptedException {
   return new LongWritable(startLine + linesRead);
 }
 
 @Override
 public Text getCurrentValue() throws IOException, InterruptedException {
   return new Text(urls);
 }
 
 @Override
 public boolean nextKeyValue() throws IOException, InterruptedException {
   
   // if the record reader has reached the end of its partition, stop now.
   if (linesRead >= numLines) {
     return false;
   }
      
   urls = "";
   String line = "";
   
   // linesPerRecord is set in the initialize() method above.
   for (int i = 0; (i < linesPerRecord) && (linesRead < numLines); i++) {

     line = reader.readLine();

     if (line == null) {
       throw new IOException("Unexpected EOF while retrieving next line from input split.");
     }

     urls += line + "\n";
     linesRead++;
   }

   return !line.isEmpty();

 }

}