/* * Copyright (c) 2007-2010 Concurrent, Inc. All Rights Reserved. * * Project and contact information: http://www.cascading.org/ * * This file is part of the Cascading project. * * Cascading is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * Cascading is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with Cascading. If not, see <http://www.gnu.org/licenses/>. */ package cascading.util; import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; import java.io.FileWriter; import java.io.IOException; import java.io.ObjectInputStream; import java.io.ObjectOutputStream; import java.io.ObjectStreamClass; import java.io.PrintStream; import java.io.PrintWriter; import java.io.StringWriter; import java.io.Writer; import java.lang.reflect.Constructor; import java.lang.reflect.Field; import java.lang.reflect.Method; import java.util.ArrayList; import java.util.Collection; import java.util.List; import java.util.Map; import java.util.zip.GZIPInputStream; import java.util.zip.GZIPOutputStream; import cascading.flow.FlowElement; import cascading.flow.FlowException; import cascading.flow.Scope; import cascading.operation.BaseOperation; import cascading.operation.Operation; import cascading.pipe.Pipe; import cascading.scheme.Scheme; import cascading.tap.Tap; import org.apache.commons.codec.binary.Base64; import org.apache.commons.codec.digest.DigestUtils; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.mapred.JobConf; import org.apache.log4j.Logger; import org.jgrapht.ext.DOTExporter; import org.jgrapht.ext.EdgeNameProvider; import org.jgrapht.ext.IntegerNameProvider; import org.jgrapht.ext.MatrixExporter; import org.jgrapht.ext.VertexNameProvider; import org.jgrapht.graph.SimpleDirectedGraph; /** Class Util provides reusable operations. */ public class Util { /** Field LOG */ private static final Logger LOG = Logger.getLogger( Util.class ); /** * This method serializes the given Object instance and retunrs a String Base64 representation. * * @param object to be serialized * @return String */ public static String serializeBase64( Object object ) throws IOException { return serializeBase64( object, true ); } public static String serializeBase64( Object object, boolean compress ) throws IOException { ByteArrayOutputStream bytes = new ByteArrayOutputStream(); ObjectOutputStream out = new ObjectOutputStream( compress ? new GZIPOutputStream( bytes ) : bytes ); try { out.writeObject( object ); } finally { out.close(); } return new String( Base64.encodeBase64( bytes.toByteArray() ) ); } /** * This method deserializes the Base64 encoded String into an Object instance. * * @param string * @return an Object */ public static Object deserializeBase64( String string ) throws IOException { return deserializeBase64( string, true ); } public static Object deserializeBase64( String string, boolean decompress ) throws IOException { if( string == null || string.length() == 0 ) return null; ObjectInputStream in = null; try { ByteArrayInputStream bytes = new ByteArrayInputStream( Base64.decodeBase64( string.getBytes() ) ); in = new ObjectInputStream( decompress ? new GZIPInputStream( bytes ) : bytes ) { @Override protected Class<?> resolveClass( ObjectStreamClass desc ) throws IOException, ClassNotFoundException { try { return Class.forName( desc.getName(), false, Thread.currentThread().getContextClassLoader() ); } catch( ClassNotFoundException exception ) { return super.resolveClass( desc ); } } }; return in.readObject(); } catch( ClassNotFoundException exception ) { throw new FlowException( "unable to deserialize data", exception ); } finally { if( in != null ) in.close(); } } /** * This method creates a globally unique HEX value seeded by the given string. * * @param seed * @return a String */ public static String createUniqueID( String seed ) { String base = String.format( "%s%d%.10f", seed, System.currentTimeMillis(), Math.random() ); return DigestUtils.md5Hex( base ); } /** * This method joins the values in the given list with the delim String value. * * @param list * @param delim * @return String */ public static String join( int[] list, String delim ) { return join( list, delim, false ); } public static String join( int[] list, String delim, boolean printNull ) { StringBuffer buffer = new StringBuffer(); int count = 0; for( Object s : list ) { if( count != 0 ) buffer.append( delim ); if( printNull || s != null ) buffer.append( s ); count++; } return buffer.toString(); } public static String join( String delim, String... strings ) { return join( delim, false, strings ); } public static String join( String delim, boolean printNull, String... strings ) { return join( strings, delim, printNull ); } /** * This method joins the values in the given list with the delim String value. * * @param list * @param delim * @return a String */ public static String join( Object[] list, String delim ) { return join( list, delim, false ); } public static String join( Object[] list, String delim, boolean printNull ) { StringBuffer buffer = new StringBuffer(); int count = 0; for( Object s : list ) { if( count != 0 ) buffer.append( delim ); if( printNull || s != null ) buffer.append( s ); count++; } return buffer.toString(); } /** * This method joins each value in the collection with a tab character as the delimiter. * * @param collection * @return a String */ public static String join( Collection collection ) { return join( collection, "\t" ); } /** * This method joins each valuein the collection with the given delimiter. * * @param collection * @param delim * @return a String */ public static String join( Collection collection, String delim ) { return join( collection, delim, false ); } public static String join( Collection collection, String delim, boolean printNull ) { StringBuffer buffer = new StringBuffer(); join( buffer, collection, delim, printNull ); return buffer.toString(); } /** * This method joins each value in the collection with the given delimiter. All results are appended to the * given {@link StringBuffer} instance. * * @param buffer * @param collection * @param delim */ public static void join( StringBuffer buffer, Collection collection, String delim ) { join( buffer, collection, delim, false ); } public static void join( StringBuffer buffer, Collection collection, String delim, boolean printNull ) { int count = 0; for( Object s : collection ) { if( count != 0 ) buffer.append( delim ); if( printNull || s != null ) buffer.append( s ); count++; } } public static String[] removeNulls( String... strings ) { List<String> list = new ArrayList<String>(); for( String string : strings ) { if( string != null ) list.add( string ); } return list.toArray( new String[ list.size() ] ); } public static Collection<String> quote( Collection<String> collection, String quote ) { List<String> list = new ArrayList<String>(); for( String string : collection ) list.add( quote + string + quote ); return list; } public static String print( Collection collection, String delim ) { StringBuffer buffer = new StringBuffer(); print( buffer, collection, delim ); return buffer.toString(); } public static void print( StringBuffer buffer, Collection collection, String delim ) { int count = 0; for( Object s : collection ) { if( count != 0 ) buffer.append( delim ); buffer.append( "[" ); buffer.append( s ); buffer.append( "]" ); count++; } } /** * This method attempts to remove any username and password from the given url String. * * @param url * @return a String */ public static String sanitizeUrl( String url ) { if( url == null ) return null; return url.replaceAll( "(?<=//).*:.*@", "" ) + "\"]"; } /** * This methdo attempts to remove duplicate consecutive forward slashes from the given url. * * @param url * @return a String */ public static String normalizeUrl( String url ) { if( url == null ) return null; return url.replaceAll( "([^:]/)/{2,}", "$1/" ); } /** * This method returns the {@link Object#toString()} of the given object, or an empty String if the object * is null. * * @param object * @return a String */ public static String toNull( Object object ) { if( object == null ) return ""; return object.toString(); } /** * This method truncates the given String value to the given size, but appends an ellipse ("...") if the * String is larger than maxSize. * * @param string * @param maxSize * @return a String */ public static String truncate( String string, int maxSize ) { string = toNull( string ); if( string.length() <= maxSize ) return string; return String.format( "%s...", string.subSequence( 0, maxSize - 3 ) ); } public static <A> A getProperty( Map<Object, Object> properties, String key, A defaultValue ) { if( properties == null ) return defaultValue; A value = (A) properties.get( key ); return value == null ? defaultValue : value; } public static String printGraph( SimpleDirectedGraph graph ) { StringWriter writer = new StringWriter(); printGraph( writer, graph ); return writer.toString(); } public static void printGraph( PrintStream out, SimpleDirectedGraph graph ) { PrintWriter printWriter = new PrintWriter( out ); printGraph( printWriter, graph ); } public static void printGraph( String filename, SimpleDirectedGraph graph ) { try { Writer writer = new FileWriter( filename ); printGraph( writer, graph ); writer.close(); } catch( IOException exception ) { exception.printStackTrace(); } } @SuppressWarnings({"unchecked"}) private static void printGraph( Writer writer, SimpleDirectedGraph graph ) { DOTExporter dot = new DOTExporter( new IntegerNameProvider(), new VertexNameProvider() { public String getVertexName( Object object ) { return object.toString().replaceAll( "\"", "\'" ); } }, new EdgeNameProvider<Object>() { public String getEdgeName( Object object ) { return object.toString().replaceAll( "\"", "\'" ); } } ); dot.export( writer, graph ); } public static void printMatrix( PrintStream out, SimpleDirectedGraph<FlowElement, Scope> graph ) { new MatrixExporter().exportAdjacencyMatrix( new PrintWriter( out ), graph ); } /** * This method removes all nulls from the given List. * * @param list */ @SuppressWarnings({"StatementWithEmptyBody"}) public static void removeAllNulls( List list ) { while( list.remove( null ) ) ; } public static String formatTrace( Scheme scheme, String message ) { if( scheme == null ) return message; String trace = scheme.getTrace(); if( trace == null ) return message; return "[" + truncate( scheme.toString(), 25 ) + "][" + trace + "] " + message; } /** * Method formatRawTrace does not include the pipe name * * @param pipe of type Pipe * @param message of type String * @return String */ public static String formatRawTrace( Pipe pipe, String message ) { if( pipe == null ) return message; String trace = pipe.getTrace(); if( trace == null ) return message; return "[" + trace + "] " + message; } public static String formatTrace( Pipe pipe, String message ) { if( pipe == null ) return message; String trace = pipe.getTrace(); if( trace == null ) return message; return "[" + truncate( pipe.getName(), 25 ) + "][" + trace + "] " + message; } public static String formatTrace( Tap tap, String message ) { if( tap == null ) return message; String trace = tap.getTrace(); if( trace == null ) return message; return "[" + truncate( tap.toString(), 25 ) + "][" + trace + "] " + message; } public static String formatTrace( Operation operation, String message ) { if( !( operation instanceof BaseOperation ) ) return message; String trace = ( (BaseOperation) operation ).getTrace(); if( trace == null ) return message; return "[" + trace + "] " + message; } public static String captureDebugTrace( Class type ) { StackTraceElement[] stackTrace = Thread.currentThread().getStackTrace(); for( int i = 3; i < stackTrace.length; i++ ) { StackTraceElement stackTraceElement = stackTrace[ i ]; Package aPackage = type.getPackage(); if( aPackage != null && stackTraceElement.getClassName().startsWith( aPackage.getName() ) ) continue; return stackTraceElement.toString(); } return null; } public static Class findMainClass( Class defaultType ) { StackTraceElement[] stackTrace = Thread.currentThread().getStackTrace(); for( StackTraceElement stackTraceElement : stackTrace ) { if( stackTraceElement.getMethodName().equals( "main" ) && !stackTraceElement.getClassName().startsWith( "org.apache.hadoop" ) ) { try { LOG.info( "resolving application jar from found main method on: " + stackTraceElement.getClassName() ); return Thread.currentThread().getContextClassLoader().loadClass( stackTraceElement.getClassName() ); } catch( ClassNotFoundException exception ) { LOG.warn( "unable to load class while discovering application jar: " + stackTraceElement.getClassName(), exception ); } } } LOG.info( "using default application jar, may cause class not found exceptions on the cluster" ); return defaultType; } public static void writeDOT( Writer writer, SimpleDirectedGraph graph, IntegerNameProvider vertexIdProvider, VertexNameProvider vertexNameProvider, EdgeNameProvider edgeNameProvider ) { new DOTExporter( vertexIdProvider, vertexNameProvider, edgeNameProvider ).export( writer, graph ); } public interface RetryOperator<T> { T operate() throws Exception; boolean rethrow( Exception exception ); } public static <T> T retry( Logger logger, int retries, int secondsDelay, String message, RetryOperator<T> operator ) throws Exception { Exception saved = null; for( int i = 0; i < retries; i++ ) { try { return operator.operate(); } catch( Exception exception ) { if( operator.rethrow( exception ) ) { logger.warn( message + ", but not retrying", exception ); throw exception; } saved = exception; logger.warn( message + ", attempt: " + ( i + 1 ), exception ); try { Thread.sleep( secondsDelay * 1000 ); } catch( InterruptedException exception1 ) { // do nothing } } } logger.warn( message + ", done retrying after attempts: " + retries, saved ); throw saved; } public static Object createProtectedObject( Class type, Object[] parameters, Class[] parameterTypes ) { try { Constructor constructor = type.getDeclaredConstructor( parameterTypes ); constructor.setAccessible( true ); return constructor.newInstance( parameters ); } catch( Exception exception ) { exception.printStackTrace(); throw new FlowException( "unable to instantiate type: " + type.getName(), exception ); } } public static Thread getHDFSShutdownHook() { Exception caughtException = null; try { // we must init the FS so the finalizer is registered FileSystem.getLocal( new JobConf() ); Field field = FileSystem.class.getDeclaredField( "clientFinalizer" ); field.setAccessible( true ); Thread finalizer = (Thread) field.get( null ); if( finalizer != null ) Runtime.getRuntime().removeShutdownHook( finalizer ); return finalizer; } catch( NoSuchFieldException exception ) { caughtException = exception; } catch( IllegalAccessException exception ) { caughtException = exception; } catch( IOException exception ) { caughtException = exception; } LOG.info( "unable to find and remove client hdfs shutdown hook, received exception: " + caughtException.getClass().getName() ); return null; } public static Object invokeStaticMethod( Class type, String methodName, Object[] parameters, Class[] parameterTypes ) { try { Method method = type.getDeclaredMethod( methodName, parameterTypes ); method.setAccessible( true ); return method.invoke( null, parameters ); } catch( Exception exception ) { throw new FlowException( "unable to invoke static method: " + type.getName() + "." + methodName, exception ); } } }