/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.hadoop.hdfs.tools.offlineImageViewer; import java.io.IOException; import java.util.LinkedList; /** * File size distribution visitor. * * <h3>Description.</h3> * This is the tool for analyzing file sizes in the namespace image. * In order to run the tool one should define a range of integers * <tt>[0, maxSize]</tt> by specifying <tt>maxSize</tt> and a <tt>step</tt>. * The range of integers is divided into segments of size <tt>step</tt>: * <tt>[0, s<sub>1</sub>, ..., s<sub>n-1</sub>, maxSize]</tt>, * and the visitor calculates how many files in the system fall into * each segment <tt>[s<sub>i-1</sub>, s<sub>i</sub>)</tt>. * Note that files larger than <tt>maxSize</tt> always fall into * the very last segment. * * <h3>Input.</h3> * <ul> * <li><tt>filename</tt> specifies the location of the image file;</li> * <li><tt>maxSize</tt> determines the range <tt>[0, maxSize]</tt> of files * sizes considered by the visitor;</li> * <li><tt>step</tt> the range is divided into segments of size step.</li> * </ul> * * <h3>Output.</h3> * The output file is formatted as a tab separated two column table: * Size and NumFiles. Where Size represents the start of the segment, * and numFiles is the number of files form the image which size falls in * this segment. */ class FileDistributionVisitor extends TextWriterImageVisitor { final private LinkedList<ImageElement> elemS = new LinkedList<ImageElement>(); private final static long MAX_SIZE_DEFAULT = 0x2000000000L; // 1/8 TB = 2^37 private final static int INTERVAL_DEFAULT = 0x200000; // 2 MB = 2^21 private int[] distribution; private long maxSize; private int step; private int totalFiles; private int totalDirectories; private int totalBlocks; private long totalSpace; private long maxFileSize; private FileContext current; private boolean inInode = false; /** * File or directory information. */ private static class FileContext { String path; long fileSize; int numBlocks; int replication; } public FileDistributionVisitor(String filename, long maxSize, int step) throws IOException { super(filename, false); this.maxSize = (maxSize == 0 ? MAX_SIZE_DEFAULT : maxSize); this.step = (step == 0 ? INTERVAL_DEFAULT : step); long numIntervals = this.maxSize / this.step; if(numIntervals >= Integer.MAX_VALUE) throw new IOException("Too many distribution intervals " + numIntervals); this.distribution = new int[1 + (int)(numIntervals)]; this.totalFiles = 0; this.totalDirectories = 0; this.totalBlocks = 0; this.totalSpace = 0; this.maxFileSize = 0; } @Override void start() throws IOException {} @Override void finish() throws IOException { // write the distribution into the output file write("Size\tNumFiles\n"); for(int i = 0; i < distribution.length; i++) write(((long)i * step) + "\t" + distribution[i] + "\n"); System.out.println("totalFiles = " + totalFiles); System.out.println("totalDirectories = " + totalDirectories); System.out.println("totalBlocks = " + totalBlocks); System.out.println("totalSpace = " + totalSpace); System.out.println("maxFileSize = " + maxFileSize); super.finish(); } @Override void leaveEnclosingElement() throws IOException { ImageElement elem = elemS.pop(); if(elem != ImageElement.INODE && elem != ImageElement.INODE_UNDER_CONSTRUCTION) return; inInode = false; if(current.numBlocks < 0) { totalDirectories ++; return; } totalFiles++; totalBlocks += current.numBlocks; totalSpace += current.fileSize * current.replication; if(maxFileSize < current.fileSize) maxFileSize = current.fileSize; int high; if(current.fileSize > maxSize) high = distribution.length-1; else high = (int)Math.ceil((double)current.fileSize / step); distribution[high]++; if(totalFiles % 1000000 == 1) System.out.println("Files processed: " + totalFiles + " Current: " + current.path); } @Override void visit(ImageElement element, String value) throws IOException { if(inInode) { switch(element) { case INODE_PATH: current.path = (value.equals("") ? "/" : value); break; case REPLICATION: current.replication = Integer.valueOf(value); break; case NUM_BYTES: current.fileSize += Long.valueOf(value); break; default: break; } } } @Override void visitEnclosingElement(ImageElement element) throws IOException { elemS.push(element); if(element == ImageElement.INODE || element == ImageElement.INODE_UNDER_CONSTRUCTION) { current = new FileContext(); inInode = true; } } @Override void visitEnclosingElement(ImageElement element, ImageElement key, String value) throws IOException { elemS.push(element); if(element == ImageElement.INODE || element == ImageElement.INODE_UNDER_CONSTRUCTION) inInode = true; else if(element == ImageElement.BLOCKS) current.numBlocks = Integer.parseInt(value); } }