/** * Copyright 2008 - CommonCrawl Foundation * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. * **/ package org.commoncrawl.util; import java.io.IOException; import java.io.OutputStreamWriter; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.io.SequenceFile; import org.apache.hadoop.io.Writable; import com.google.gson.JsonElement; import com.google.gson.JsonParser; /** * * @author rana * */ public class SequenceFileUtils { @SuppressWarnings({ "unchecked", "deprecation" }) public static Class sniffValueTypeFromSequenceFile(FileSystem fs,Configuration conf,Path path)throws IOException { if (fs.exists(path) && fs.getFileStatus(path).isDir()) { path = new Path(path,"part-00000"); } SequenceFile.Reader reader = new SequenceFile.Reader(fs, path, conf); try { return reader.getValueClass(); } finally { reader.close(); } } public static void printContents(FileSystem fs,Configuration conf,Path path)throws IOException { SequenceFile.Reader reader = new SequenceFile.Reader(fs, path, conf); JsonParser parser = new JsonParser(); try { Writable key = (Writable) ((reader.getKeyClass() == NullWritable.class) ? NullWritable.get() : reader.getKeyClass().newInstance()); Writable value = (Writable) ((reader.getValueClass() == NullWritable.class) ? NullWritable.get() : reader.getValueClass().newInstance()); boolean more = true; do { more = reader.next(key,value); if (more) { System.out.println("Key:" + key.toString()); JsonElement jsonElement = null; try { jsonElement = parser.parse(value.toString()); } catch (Exception e) { } if (jsonElement == null) { OutputStreamWriter writer = new OutputStreamWriter(System.out, "UTF-8"); writer.write(value.toString()); writer.flush(); System.out.println(); } else { System.out.println(JSONUtils.prettyPrintJSON(jsonElement)); } } } while (more); } catch (Exception e) { e.printStackTrace(); } finally { reader.close(); } } public static void main(String[] args)throws IOException { Configuration conf = new Configuration(); Path inputPath = new Path(args[0]); FileSystem fs = FileSystem.get(inputPath.toUri(),conf); printContents(fs, conf, inputPath); } }