/*
* This file or a portion of this file is licensed under the terms of
* the Globus Toolkit Public License, found in file ../GTPL, or at
* http://www.globus.org/toolkit/download/license.html. This notice must
* appear in redistributions of this file, with or without modification.
*
* Redistributions of this Software, with or without modification, must
* reproduce the GTPL in: (1) the Software, or (2) the Documentation or
* some other similar material which is provided with the Software (if
* any).
*
* Copyright 1999-2004 University of Chicago and The University of
* Southern California. All rights reserved.
*/
package org.griphyn.vdl.euryale;
import java.io.*;
import java.util.*;
/**
* This file factory generates a stream of submit files in a dynamically
* determinable directory structure. By default, a 2-level subdirectory
* structure is assumed, which should be able to accomodate about 500k files.
*
* <pre>
* mult=16, offset=30, fpd=254: nr=15 => l=1
* mult=16, offset=30, fpd=254: nr=4047 => l=2
* mult=16, offset=30, fpd=254: nr=1028222 => l=3
* </pre>
*
* With the given multiplicator, offset and files per directory, nr is smallest
* number of jobs at which a level change to l occurs.
*
* @author Kavitha Ranganathan
* @author Jens-S. Vöckler
* @author Yong Zhao
* @version $Revision$
*
* @see DAX2DAG
*/
public class HashedFileFactory extends FlatFileFactory {
/**
* Determines dynamically the number of directory levels required to
* accomodate a certain number of files.
*
* <pre>
* levels = |log ( tf * m + offset )|
* fpd
* </pre>
*
* @param totalFiles is the total number of files estimated to generate
* @param multiplicator is a corrective factor to account for files that are
* created by the run-time system on the fly. For Euryale and Pegasus it is
* safe to assume a multiplicator of at least 8.
* @param filesPerDirectory is the optimal maximum number of directory
* entries in any directory. The value of 254 for Linux ext2, and thus ext3,
* is a safe bet.
* @param offset is the number of (expected) files in the top level.
* @return the number of directory levels necessary to accomodate the given
* number of files.
*/
public static int calculateLevels(int totalFiles,
int multiplicator,
int filesPerDirectory,
int offset) {
// total files to accomodate, including ones cropping up later
long total = totalFiles * multiplicator + offset;
// "count" the levels
// return (int) Math.floor( Math.log(total) / Math.log(filesPerDirectory) );
int levels = 0;
while (total > filesPerDirectory) {
++levels;
total /= filesPerDirectory;
}
return levels;
}
/**
* Counts the number of times the structured virtual constructor was called.
*
* @see #getCount()
*/
protected int m_count;
/**
* Contains the total number of directory levels. Defaults to a reasonable
* level for hashing.
*/
protected int m_levels = 2;
/**
* Number of entries per level. The number 254 is optimized for the Linux
* VFS ext2, and consequently ext3, which works fastest, if the number of
* entries per directory, including dot and dotdot, don't exceed 256.
*/
protected int m_filesPerDirectory = 254;
/**
* Multiplicative factor to estimate the number of result leaf filenames for
* each virtual constructor invocation. We assume that Euryale produces ~12
* files per submit file. It is better to err on the larger side than
* makeing the multiplicator too small.
*/
protected int m_multiplicator = 16;
/**
* Offset of files expected to reside at the top level directory. This is
* counted in addition to the directories being created.
*/
protected int m_offset = 30;
/**
* Helping structure to avoid repeated memory requests. Stores the directory
* number for each level.
*
* @see #createFile( String )
*/
protected int mh_level[];
/**
* Helping structure to avoid repeated memory requests. Stores the digits
* necessary to create one level's directory name.
*
* @see #format( int )
*/
protected StringBuffer mh_buffer;
/**
* Helping structure to avoid repeated memory requests. Stores the number of
* digits for hexadecimal formatting.
*
* @see #createFile( String )
*/
protected int mh_digits;
/**
* Resets the helper structures after changing layout parameters. You will
* also need to call this function after you invoked the virtual
* constructors, but want to change parameter pertaining to the directory
* structure. The structured file count will also be reset!
*/
public void reset() {
super.reset();
m_count = 0;
mh_level = new int[m_levels];
mh_digits = (int) Math.ceil(Math.log(m_filesPerDirectory) / Math.log(16));
mh_buffer = new StringBuffer(mh_digits);
}
/**
* Constructor: Creates the base directory and employs sanity checks.
*
* @param baseDirectory is the place where the other dirs are created, and
* where the DAG file resides.
* @throws IOException if the location is not a writable directory, or
* cannot be created as such.
*/
public HashedFileFactory(File baseDirectory)
throws IOException {
super(baseDirectory);
reset();
}
/**
* Constructor: Creates the directory and employs sanity checks.
*
* @param baseDirectory is the place where the other dirs are created, and
* where the DAG file resides.
* @throws IOException if the location is not a writable directory, or
* cannot be created as such.
*/
public HashedFileFactory(String baseDirectory)
throws IOException {
super(baseDirectory);
reset();
}
/**
* Constructor: Creates the base directory and employs sanity checks.
*
* @param baseDirectory is the place where the other dirs are created, and
* where the DAG file resides.
* @param totalFiles is the number of files to support, and the number of
* times, the virtual constructor is expected to be called.
* @throws IOException if the location is not a writable directory, or
* cannot be created as such.
*/
public HashedFileFactory(File baseDirectory, int totalFiles)
throws IOException {
super(baseDirectory);
m_levels = calculateLevels(totalFiles,
m_multiplicator,
m_filesPerDirectory,
m_offset);
reset();
}
/**
* Constructor: Creates the directory and employs sanity checks.
*
* @param baseDirectory is the place where the other dirs are created, and
* where the DAG file resides.
* @param totalFiles is the number of files to support, and the number of
* times, the virtual constructor is expected to be called.
* @throws IOException if the location is not a writable directory, or
* cannot be created as such.
*/
public HashedFileFactory(String baseDirectory, int totalFiles)
throws IOException {
super(baseDirectory);
m_levels = calculateLevels(totalFiles,
m_multiplicator,
m_filesPerDirectory,
m_offset);
reset();
}
/**
* Converts the given integer into hexadecimal notation, using the given
* number of digits, prefixing with zeros as necessary.
*
* @param number is the number to format.
* @return a string of appropriate length, filled with leading zeros,
* representing the number hexadecimally.
*/
public String format(int number) {
mh_buffer.delete(0, mh_digits);
mh_buffer.append(Integer.toHexString(number).toUpperCase());
while (mh_buffer.length() < mh_digits) {
mh_buffer.insert(0, '0');
}
return mh_buffer.toString();
}
/**
* Virtual constructor: Creates the next file with the given basename.
*
* @param basename is the filename to create. Don't specify dirs here.
*
* @return a relative File structure (relative to the base directory)
* which points to the new file.
* @throws java.io.IOException
*
* @see #getCount()
*/
@Override
public File createRelativeFile(String basename)
throws IOException{
File f = this.createFile(basename);
StringBuffer relative = new StringBuffer();
//figure out the relative path
//from the base directory
File base = this.getBaseDirectory();
Stack<String> s = new Stack();
File parent = null;
while( (parent = f.getParentFile()) != null ){
String child = f.getName();
s.push( child );
if( parent.equals( base ) ){
//have the relative path created by
//poping the stack
String comp = null;
while( !s.isEmpty() ){
comp = s.pop();
relative.append( comp );
if( !s.empty() ){
relative.append( File.separator );
}
}
break;
}
f = parent;
}
//System.out.println( relative );
return new File( relative.toString() );
}
/**
* Creates the next file with the given basename. This is the factory
* standard virtual constructor. Once invoked, the directory structure can
* not be changed any more.
*
* @param basename is the filename to create. Don't specify dirs here.
* @return a File structure which points to the new file. Nothing is created
* through this method, and creation may still fail.
* @see #getCount()
*/
public File createFile(String basename)
throws IOException {
// calculate the directory which this goes into
////int estimate = m_count++ * m_multiplicator;
int estimate = (m_count++ * m_multiplicator) + m_offset;
for (int i = m_levels - 1; i >= 0; --i) {
estimate /= m_filesPerDirectory;
mh_level[i] = estimate % m_filesPerDirectory;
}
if (estimate > m_filesPerDirectory) {
throw new RuntimeException("ERROR! Wrap-around of generator.");
}
//create the base directory if required
File d = createDirectory();
// return position in new (or old) directory
return new File(d, basename);
}
/**
* Creates a directory for the hashed file directory structure on the submit
* host.
*
*
* @return the File structure to the created directory
*
* @throws IOException the exception.
*/
protected File createDirectory() throws IOException {
// create directory, as necessary
File d = getBaseDirectory();
for (int i = 0; i < m_levels; ++i) {
d = new File(d, format(mh_level[i]));
if (d.exists()) {
if (!d.isDirectory()) {
throw new IOException(d.getPath() + " is not a directory");
}
} else {
if (!d.mkdir()) {
throw new IOException("unable to create directory " + d.getPath());
}
}
}
return d;
}
/**
* Returns the number of times the regular virtual constructor for
* structured entries was called.
*
* @return the count for createFile invocations.
* @see #createFile( String )
*/
public int getCount() {
return m_count;
}
/**
* Accessor: Obtains the total number of directory levels.
*
* @return the total number of directory levels chosen.
*/
public int getLevels() {
return m_levels;
}
/**
* Accessor: Sets the number of directory levels. Note that this modificator
* can only be called before the virtual constructor is called the first
* time.
*
* @param levels is the number of directory levels to use
* @throws VTorInUseException if the virtual constructor is already in use.
* @throws IllegalArgumentException if the argument is less than zero.
* @see #getLevels()
*/
public void setLevels(int levels) {
if (m_count != 0) {
throw new VTorInUseException();
}
if (levels < 0) {
throw new IllegalArgumentException();
}
m_levels = levels;
reset();
}
/**
* Accessor: Sets the number of directory levels. Note that this modificator
* can only be called before the virtual constructor is called the first
* time. It takes as argument the total number of expected files instead of
* the level.
*
* @param totalFiles is the total number of files to accomodate.
* @throws VTorInUseException if the virtual constructor is already in use.
* @throws IllegalArgumentException if the argument is less than zero.
* @see #getLevels()
*/
public void setLevelsFromTotals(int totalFiles) {
if (m_count != 0) {
throw new VTorInUseException();
}
if (totalFiles < 0) {
throw new IllegalArgumentException();
}
m_levels = calculateLevels(totalFiles,
m_multiplicator,
m_filesPerDirectory,
m_offset);
reset();
}
/**
* Accessor: Obtains the number of entries per directory.
*
* @return the chosen number of entries per directory excluding the dot and
* dotdot files.
*/
public int getFilesPerDirectory() {
return m_filesPerDirectory;
}
/**
* Accessor: Sets the optimal maximum number of files per directory
* excluding dot and dotdot. For a Linux ext2 and thus ext3 system, the
* optimal maximum number is 254.
*
* @param entries is the number of optimal maximum entries per dir.
* @throws VTorInUseException if the virtual constructor is already in use.
* @throws IllegalArgumentException if the argument is less than one.
* @see #getFilesPerDirectory()
*/
public void setFilesPerDirectory(int entries) {
if (m_count != 0) {
throw new VTorInUseException();
}
if (entries <= 0) {
throw new IllegalArgumentException();
}
m_filesPerDirectory = entries;
reset();
}
/**
* Accessor: Obtains the multiplicative factor for an estimation of total
* files from calls to the virtual constructor.
*
* @return the multiplicator.
* @see #setMultiplicator(int)
*/
public int getMultiplicator() {
return m_multiplicator;
}
/**
* Accessor: Sets the multiplicative factor to account for files which may
* be created without calling the virtual constructor.
*
* @param multiplicator is the new multiplicator.
* @throws VTorInUseException if the virtual constructor is already in use.
* @throws IllegalArgumentException if the argument is less than one.
* @see #getMultiplicator()
*/
public void setMultiplicator(int multiplicator) {
if (m_count != 0) {
throw new VTorInUseException();
}
if (multiplicator < 1) {
throw new IllegalArgumentException();
}
m_multiplicator = multiplicator;
reset();
}
/**
* Accessor: Obtains the offset for an estimation of total files from calls
* to the virtual constructor.
*
* @return the offset
* @see #setOffset(int)
*/
public int getOffset() {
return m_offset;
}
/**
* Accessor: Sets the offset for files which may be created without calling
* the virtual constructor.
*
* @param offset is the new offset
* @throws VTorInUseException if the virtual constructor is already in use.
* @throws IllegalArgumentException if the argument is less than zero.
* @see #getOffset()
*/
public void setOffset(int offset) {
if (m_count != 0) {
throw new VTorInUseException();
}
if (offset < 0) {
throw new IllegalArgumentException();
}
m_offset = offset;
reset();
}
/**
* test function
*/
public static void main(String arg[])
throws Exception {
if (arg.length == 0) {
// no arguments, spit out at which point levels change
HashedFileFactory def = new HashedFileFactory("/tmp");
int level = 0;
for (int i = 1; i < 4; ++i) {
def.reset();
int nr = 1;
for (int n = i; n > 0; n--) {
nr *= def.getFilesPerDirectory();
}
nr -= def.getOffset();
nr /= def.getMultiplicator();
for (int j = -2; j < Integer.MAX_VALUE; ++j) {
int n = nr + j;
def.reset();
def.setLevelsFromTotals(n);
if (level < def.getLevels()) {
++level;
System.out.println("mult=" + def.getMultiplicator()
+ ", offset=" + def.getOffset()
+ ", fpd=" + def.getFilesPerDirectory()
+ ": nr=" + n + " => l=" + level);
break;
}
}
}
} else {
// arguments, assume numeric strings
for (int i = 0; i < arg.length; ++i) {
int nr = Integer.parseInt(arg[i]);
HashedFileFactory hff = new HashedFileFactory("/tmp");
hff.setLevelsFromTotals(nr);
System.out.println();
System.out.println("filesPerDirectory = " + hff.getFilesPerDirectory());
System.out.println("multiplicator = " + hff.getMultiplicator());
System.out.println("offset = " + hff.getOffset());
System.out.println("totalFiles = " + nr);
System.out.println("levels = " + hff.getLevels());
File f = hff.createFile("ID000001");
System.out.println("example = \"" + f.getAbsolutePath() + "\"");
}
System.out.println();
}
}
}