/*******************************************************************************
* Copyright (c) 2011 EclipseSource and others.
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the Eclipse Public License v1.0
* which accompanies this distribution, and is available at
* http://www.eclipse.org/legal/epl-v10.html
*
* Contributors:
* EclipseSource - initial API and implementation
******************************************************************************/
package com.eclipsesource.tabris.demos.enron;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.List;
class EnronDatasetIndexer {
private final File root;
private int fileCount;
private int dirCount;
public static void main( String[] args ) {
if( args.length == 0 ) {
System.err.println( "Missing root directory" );
System.exit( 42 );
}
File root = new File( args[ 0 ] );
System.out.println( "Indexing " + root.getAbsolutePath() + " ..." );
EnronDatasetIndexer indexer = new EnronDatasetIndexer( root );
try {
indexer.index();
} catch( IOException exception ) {
throw new RuntimeException( exception );
}
System.out.println( "Done." );
System.out.println( "Processed directories: " + indexer.dirCount );
System.out.println( "Processed files: " + indexer.fileCount );
System.out.println( "Total: " + ( indexer.dirCount + indexer.fileCount ) );
}
public EnronDatasetIndexer( File root ) {
this.root = root;
fileCount = 0;
dirCount = 0;
}
public void index() throws IOException {
index( root );
}
private int index( File file ) throws IOException {
int count = 0;
File indexFile = getIndexFile( file );
if( indexFile.exists() ) {
count = readChildCountFromIndexFile( indexFile );
} else {
File[] children = file.listFiles();
if( children == null ) {
throw new RuntimeException( "no child count available for " + file.getAbsolutePath() );
}
List<FileEntry> list = new ArrayList<FileEntry>();
for( File child : children ) {
if( !".index".equals( child.getName() ) ) {
count++;
count( child );
if( child.isDirectory() ) {
int childCount = index( child );
list.add( new FileEntry( child.getName(), child.getName(), child.getName(), 'd', childCount ) );
} else {
String[] displayTexts = getDisplayTexts( child );
list.add( new FileEntry( child.getName(), displayTexts[ 0 ], displayTexts[ 1 ], 'f', 0 ) );
}
}
}
createIndexFile( indexFile, list );
}
return count;
}
private String[] getDisplayTexts( File child ) {
String[] result = new String[] { "[No Subject]", "[Unknown]" };
BufferedReader bufferedReader = null;
try {
FileReader reader = new FileReader( child );
bufferedReader = new BufferedReader( reader );
String line;
String subject = "Subject: ";
String from = "From: ";
boolean fromFound = false;
boolean subjectFound = false;
while( ( line = bufferedReader.readLine() ) != null && ( !fromFound || !subjectFound ) ) {
int indexOfSubject = line.indexOf( subject );
if( indexOfSubject != -1 ) {
subjectFound = true;
String subjectText = line.substring( indexOfSubject + subject.length(), line.length() );
if( !"".equals( subjectText ) ) {
result[ 0 ] = subjectText;
}
}
int indexOfFrom = line.indexOf( from );
if( indexOfFrom != -1 ) {
fromFound = true;
String fromText = line.substring( indexOfFrom + from.length(), line.length() );
if( !"".equals( fromText ) ) {
result[ 1 ] = fromText;
}
}
}
} catch ( IOException e ) {
// do nothing
} finally {
if( bufferedReader != null ) {
try {
bufferedReader.close();
} catch( IOException e ) {
throw new IllegalStateException( e );
}
}
}
return result;
}
private void count( File file ) {
if( file.isDirectory() ) {
dirCount++;
} else {
fileCount++;
}
}
private static void createIndexFile( File indexFile, List<FileEntry> list ) throws IOException {
sortFileList( list );
String string = createString( list );
writeToFile( indexFile, string );
}
private static File getIndexFile( File file ) {
File indexFile = new File( file, ".index" );
return indexFile;
}
private static void sortFileList( List<FileEntry> list ) {
Collections.sort( list, new Comparator<FileEntry>() {
public int compare( FileEntry file1, FileEntry file2 ) {
int result = 0;
if( file1.type < file2.type ) {
result = -1;
} else if( file1.type > file2.type ) {
result = 1;
} else if( file1.name.endsWith( "." ) && file2.name.endsWith( "." ) ) {
int number1 = Integer.parseInt( file1.name.substring( 0, file1.name.length() - 1 ) );
int number2 = Integer.parseInt( file2.name.substring( 0, file2.name.length() - 1 ) );
if( number1 < number2 ) {
result = -1;
} else if( number1 > number2 ) {
result = 1;
}
} else {
result = file1.name.compareTo( file2.name );
}
return result;
}
} );
}
private static String createString( List<FileEntry> list ) {
StringBuffer buffer = new StringBuffer();
for( FileEntry file : list ) {
buffer.append( file.type );
buffer.append( "\t" );
buffer.append( file.name );
buffer.append( "\t" );
buffer.append( file.subject );
buffer.append( "\t" );
buffer.append( file.from );
buffer.append( "\t" );
buffer.append( file.count );
buffer.append( "\n" );
}
return buffer.toString();
}
private static void writeToFile( File file, String string ) throws IOException {
BufferedWriter writer = new BufferedWriter( new FileWriter( file ) );
try {
writer.write( string );
} finally {
writer.close();
}
}
private static int readChildCountFromIndexFile( File indexFile ) throws IOException {
int count = 0;
BufferedReader reader = new BufferedReader( new FileReader( indexFile ) );
try {
boolean done = false;
while( !done ) {
String line = reader.readLine();
if( line == null ) {
done = true;
} else if( line.trim().length() > 0 ) {
count++;
}
}
} finally {
reader.close();
}
return count;
}
private static class FileEntry {
final String name;
final int count;
final char type;
final String subject;
final String from;
FileEntry( String name, String subject, String from, char type, int count ) {
this.name = name;
this.subject = subject;
this.from = from;
this.count = count;
this.type = type;
}
}
}