package hip.ch7.bloom; import hip.ch3.avro.AvroBytesRecord; import org.apache.avro.file.DataFileStream; import org.apache.avro.generic.GenericDatumReader; import org.apache.avro.generic.GenericRecord; import org.apache.commons.io.FileUtils; import org.apache.commons.io.IOUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configured; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.util.Tool; import org.apache.hadoop.util.ToolRunner; import org.apache.hadoop.util.bloom.BloomFilter; import java.io.File; import java.io.IOException; import java.io.InputStream; public class BloomFilterDumper extends Configured implements Tool { /** * Main entry point for the example. * * @param args arguments * @throws Exception when something goes wrong */ public static void main(final String[] args) throws Exception { int res = ToolRunner.run(new Configuration(), new BloomFilterDumper(), args); System.exit(res); } /** * Dump the bloom filter. * * @param args the command-line arguments * @return the process exit code * @throws Exception if something goes wrong */ public int run(final String[] args) throws Exception { FileSystem hdfs = FileSystem.get(getConf()); Path destFile = new Path(args[0]); InputStream is = hdfs.open(destFile); System.out.println(readFromAvro(is)); return 0; } public static BloomFilter readFromAvro(InputStream is) throws IOException { DataFileStream<Object> reader = new DataFileStream<Object>( is, new GenericDatumReader<Object>()); reader.hasNext(); BloomFilter filter = new BloomFilter(); AvroBytesRecord .fromGenericRecord((GenericRecord) reader.next(), filter); IOUtils.closeQuietly(is); IOUtils.closeQuietly(reader); return filter; } public static BloomFilter fromFile(File f) throws IOException { return readFromAvro(FileUtils.openInputStream(f)); } public static BloomFilter fromPath(Configuration config, Path path) throws IOException { FileSystem hdfs = path.getFileSystem(config); return readFromAvro(hdfs.open(path)); } }