/*
* Copyright 2011 Internet Archive
*
* Licensed under the Apache License, Version 2.0 (the "License"); you
* may not use this file except in compliance with the License. You
* may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied. See the License for the specific language governing
* permissions and limitations under the License.
*/
package org.archive.bacon;
import java.io.*;
import org.apache.pig.EvalFunc;
import org.apache.pig.data.DataBag;
import org.apache.pig.data.DataType;
import org.apache.pig.data.Tuple;
import org.apache.pig.impl.logicalLayer.schema.Schema;
/**
* Calculate the length of all strings in the given input.
*
* If the input is a string, its length is returned. If the input is
* null or a non-string type, 0 is returned.
*
* If the input is a tuple or bag, it is traversed recursively and the
* total length of all strings found within is returned. Non-string
* types in the tuple/bag structure are ignored.
*
*/
public class StringLength extends EvalFunc<Long>
{
public StringLength( )
throws IOException
{
}
public Long exec( Tuple input )
throws IOException
{
Long length = length( input );
return length;
}
public long length( Object input )
throws IOException
{
if ( input == null ) return 0L;
long length = 0L;
if ( input instanceof String )
{
String s = (String) input;
return s.length( );
}
else if ( input instanceof Tuple )
{
Tuple tuple = (Tuple) input;
for ( Object o : tuple.getAll( ) )
{
length += length( o );
}
}
else if ( input instanceof DataBag )
{
DataBag bag = (DataBag) input;
for ( Tuple t : bag )
{
length += length( t );
}
}
return length;
}
@SuppressWarnings("deprecation")
@Override
public Schema outputSchema(Schema input)
{
return new Schema( new Schema.FieldSchema( null, DataType.LONG ) );
}
}