/** * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with this * work for additional information regarding copyright ownership. The ASF * licenses this file to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the * License for the specific language governing permissions and limitations under * the License. */ package org.apache.hadoop.io.file.tfile; import java.io.IOException; import java.io.PrintStream; import java.util.Collection; import java.util.Iterator; import java.util.LinkedHashMap; import java.util.Map; import java.util.Set; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IOUtils; import org.apache.hadoop.io.file.tfile.BCFile.BlockRegion; import org.apache.hadoop.io.file.tfile.BCFile.MetaIndexEntry; import org.apache.hadoop.io.file.tfile.TFile.TFileIndexEntry; import org.apache.hadoop.io.file.tfile.Utils.Version; /** * Dumping the information of a TFile. */ class TFileDumper { static final Log LOG = LogFactory.getLog(TFileDumper.class); private TFileDumper() { // namespace object not constructable. } private enum Align { LEFT, CENTER, RIGHT, ZERO_PADDED; static String format(String s, int width, Align align) { if (s.length() >= width) return s; int room = width - s.length(); Align alignAdjusted = align; if (room == 1) { alignAdjusted = LEFT; } if (alignAdjusted == LEFT) { return s + String.format("%" + room + "s", ""); } if (alignAdjusted == RIGHT) { return String.format("%" + room + "s", "") + s; } if (alignAdjusted == CENTER) { int half = room / 2; return String.format("%" + half + "s", "") + s + String.format("%" + (room - half) + "s", ""); } throw new IllegalArgumentException("Unsupported alignment"); } static String format(long l, int width, Align align) { if (align == ZERO_PADDED) { return String.format("%0" + width + "d", l); } return format(Long.toString(l), width, align); } static int calculateWidth(String caption, long max) { return Math.max(caption.length(), Long.toString(max).length()); } } /** * Dump information about TFile. * * @param file * Path string of the TFile * @param out * PrintStream to output the information. * @param conf * The configuration object. * @throws IOException */ static public void dumpInfo(String file, PrintStream out, Configuration conf) throws IOException { final int maxKeySampleLen = 16; Path path = new Path(file); FileSystem fs = path.getFileSystem(conf); long length = fs.getFileStatus(path).getLen(); FSDataInputStream fsdis = fs.open(path); TFile.Reader reader = new TFile.Reader(fsdis, length, conf); try { LinkedHashMap<String, String> properties = new LinkedHashMap<String, String>(); int blockCnt = reader.readerBCF.getBlockCount(); int metaBlkCnt = reader.readerBCF.metaIndex.index.size(); properties.put("BCFile Version", reader.readerBCF.version.toString()); properties.put("TFile Version", reader.tfileMeta.version.toString()); properties.put("File Length", Long.toString(length)); properties.put("Data Compression", reader.readerBCF .getDefaultCompressionName()); properties.put("Record Count", Long.toString(reader.getEntryCount())); properties.put("Sorted", Boolean.toString(reader.isSorted())); if (reader.isSorted()) { properties.put("Comparator", reader.getComparatorName()); } properties.put("Data Block Count", Integer.toString(blockCnt)); long dataSize = 0, dataSizeUncompressed = 0; if (blockCnt > 0) { for (int i = 0; i < blockCnt; ++i) { BlockRegion region = reader.readerBCF.dataIndex.getBlockRegionList().get(i); dataSize += region.getCompressedSize(); dataSizeUncompressed += region.getRawSize(); } properties.put("Data Block Bytes", Long.toString(dataSize)); if (reader.readerBCF.getDefaultCompressionName() != "none") { properties.put("Data Block Uncompressed Bytes", Long .toString(dataSizeUncompressed)); properties.put("Data Block Compression Ratio", String.format( "1:%.1f", (double) dataSizeUncompressed / dataSize)); } } properties.put("Meta Block Count", Integer.toString(metaBlkCnt)); long metaSize = 0, metaSizeUncompressed = 0; if (metaBlkCnt > 0) { Collection<MetaIndexEntry> metaBlks = reader.readerBCF.metaIndex.index.values(); boolean calculateCompression = false; for (Iterator<MetaIndexEntry> it = metaBlks.iterator(); it.hasNext();) { MetaIndexEntry e = it.next(); metaSize += e.getRegion().getCompressedSize(); metaSizeUncompressed += e.getRegion().getRawSize(); if (e.getCompressionAlgorithm() != Compression.Algorithm.NONE) { calculateCompression = true; } } properties.put("Meta Block Bytes", Long.toString(metaSize)); if (calculateCompression) { properties.put("Meta Block Uncompressed Bytes", Long .toString(metaSizeUncompressed)); properties.put("Meta Block Compression Ratio", String.format( "1:%.1f", (double) metaSizeUncompressed / metaSize)); } } properties.put("Meta-Data Size Ratio", String.format("1:%.1f", (double) dataSize / metaSize)); long leftOverBytes = length - dataSize - metaSize; long miscSize = BCFile.Magic.size() * 2 + Long.SIZE / Byte.SIZE + Version.size(); long metaIndexSize = leftOverBytes - miscSize; properties.put("Meta Block Index Bytes", Long.toString(metaIndexSize)); properties.put("Headers Etc Bytes", Long.toString(miscSize)); // Now output the properties table. int maxKeyLength = 0; Set<Map.Entry<String, String>> entrySet = properties.entrySet(); for (Iterator<Map.Entry<String, String>> it = entrySet.iterator(); it .hasNext();) { Map.Entry<String, String> e = it.next(); if (e.getKey().length() > maxKeyLength) { maxKeyLength = e.getKey().length(); } } for (Iterator<Map.Entry<String, String>> it = entrySet.iterator(); it .hasNext();) { Map.Entry<String, String> e = it.next(); out.printf("%s : %s\n", Align.format(e.getKey(), maxKeyLength, Align.LEFT), e.getValue()); } out.println(); reader.checkTFileDataIndex(); if (blockCnt > 0) { String blkID = "Data-Block"; int blkIDWidth = Align.calculateWidth(blkID, blockCnt); int blkIDWidth2 = Align.calculateWidth("", blockCnt); String offset = "Offset"; int offsetWidth = Align.calculateWidth(offset, length); String blkLen = "Length"; int blkLenWidth = Align.calculateWidth(blkLen, dataSize / blockCnt * 10); String rawSize = "Raw-Size"; int rawSizeWidth = Align.calculateWidth(rawSize, dataSizeUncompressed / blockCnt * 10); String records = "Records"; int recordsWidth = Align.calculateWidth(records, reader.getEntryCount() / blockCnt * 10); String endKey = "End-Key"; int endKeyWidth = Math.max(endKey.length(), maxKeySampleLen * 2 + 5); out.printf("%s %s %s %s %s %s\n", Align.format(blkID, blkIDWidth, Align.CENTER), Align.format(offset, offsetWidth, Align.CENTER), Align.format(blkLen, blkLenWidth, Align.CENTER), Align.format( rawSize, rawSizeWidth, Align.CENTER), Align.format(records, recordsWidth, Align.CENTER), Align.format(endKey, endKeyWidth, Align.LEFT)); for (int i = 0; i < blockCnt; ++i) { BlockRegion region = reader.readerBCF.dataIndex.getBlockRegionList().get(i); TFileIndexEntry indexEntry = reader.tfileIndex.getEntry(i); out.printf("%s %s %s %s %s ", Align.format(Align.format(i, blkIDWidth2, Align.ZERO_PADDED), blkIDWidth, Align.LEFT), Align .format(region.getOffset(), offsetWidth, Align.LEFT), Align .format(region.getCompressedSize(), blkLenWidth, Align.LEFT), Align.format(region.getRawSize(), rawSizeWidth, Align.LEFT), Align.format(indexEntry.kvEntries, recordsWidth, Align.LEFT)); byte[] key = indexEntry.key; boolean asAscii = true; int sampleLen = Math.min(maxKeySampleLen, key.length); for (int j = 0; j < sampleLen; ++j) { byte b = key[j]; if ((b < 32 && b != 9) || (b == 127)) { asAscii = false; } } if (!asAscii) { out.print("0X"); for (int j = 0; j < sampleLen; ++j) { byte b = key[i]; out.printf("%X", b); } } else { out.print(new String(key, 0, sampleLen)); } if (sampleLen < key.length) { out.print("..."); } out.println(); } } out.println(); if (metaBlkCnt > 0) { String name = "Meta-Block"; int maxNameLen = 0; Set<Map.Entry<String, MetaIndexEntry>> metaBlkEntrySet = reader.readerBCF.metaIndex.index.entrySet(); for (Iterator<Map.Entry<String, MetaIndexEntry>> it = metaBlkEntrySet.iterator(); it.hasNext();) { Map.Entry<String, MetaIndexEntry> e = it.next(); if (e.getKey().length() > maxNameLen) { maxNameLen = e.getKey().length(); } } int nameWidth = Math.max(name.length(), maxNameLen); String offset = "Offset"; int offsetWidth = Align.calculateWidth(offset, length); String blkLen = "Length"; int blkLenWidth = Align.calculateWidth(blkLen, metaSize / metaBlkCnt * 10); String rawSize = "Raw-Size"; int rawSizeWidth = Align.calculateWidth(rawSize, metaSizeUncompressed / metaBlkCnt * 10); String compression = "Compression"; int compressionWidth = compression.length(); out.printf("%s %s %s %s %s\n", Align.format(name, nameWidth, Align.CENTER), Align.format(offset, offsetWidth, Align.CENTER), Align.format(blkLen, blkLenWidth, Align.CENTER), Align.format( rawSize, rawSizeWidth, Align.CENTER), Align.format(compression, compressionWidth, Align.LEFT)); for (Iterator<Map.Entry<String, MetaIndexEntry>> it = metaBlkEntrySet.iterator(); it.hasNext();) { Map.Entry<String, MetaIndexEntry> e = it.next(); String blkName = e.getValue().getMetaName(); BlockRegion region = e.getValue().getRegion(); String blkCompression = e.getValue().getCompressionAlgorithm().getName(); out.printf("%s %s %s %s %s\n", Align.format(blkName, nameWidth, Align.LEFT), Align.format(region.getOffset(), offsetWidth, Align.LEFT), Align.format(region.getCompressedSize(), blkLenWidth, Align.LEFT), Align.format(region.getRawSize(), rawSizeWidth, Align.LEFT), Align.format(blkCompression, compressionWidth, Align.LEFT)); } } } finally { IOUtils.cleanup(LOG, reader, fsdis); } } }