/* * Copyright 2011 Internet Archive * * Licensed under the Apache License, Version 2.0 (the "License"); you * may not use this file except in compliance with the License. You * may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or * implied. See the License for the specific language governing * permissions and limitations under the License. */ package org.archive.bacon; import java.io.*; import org.apache.pig.EvalFunc; import org.apache.pig.data.DataBag; import org.apache.pig.data.DataType; import org.apache.pig.data.Tuple; import org.apache.pig.impl.logicalLayer.schema.Schema; /** * Catenate all strings in the given input, with a delimiter string. * Kinda/sorta approximates Python str.join(). * * If any input object is a tuple or bag, it is traversed recursively * and the all strings found within are catenated, separated by the * given delimiter. */ public class Catenate extends EvalFunc<String> { public Catenate( ) throws IOException { } public String exec( Tuple input ) throws IOException { if ( input == null || input.size() < 2 ) return null; String delim = input.get(0).toString(); StringBuilder sb = new StringBuilder(); for ( int i = 1 ; i < input.size() ; i++ ) { cat( sb, input.get(i), delim ); } return sb.toString(); } public void cat( StringBuilder sb, Object input, String delim ) throws IOException { if ( input == null ) return; if ( input instanceof Tuple ) { Tuple tuple = (Tuple) input; for ( Object o : tuple.getAll( ) ) { cat( sb, o, delim ); } } else if ( input instanceof DataBag ) { DataBag bag = (DataBag) input; for ( Tuple t : bag ) { for ( Object o : t.getAll( ) ) { cat( sb, o, delim ); } } } else { String s = input.toString(); s = s.trim(); if ( s.length() > 0 ) { sb.append( s ).append( delim ); } } } @SuppressWarnings("deprecation") @Override public Schema outputSchema(Schema input) { return new Schema( new Schema.FieldSchema( null, DataType.CHARARRAY ) ); } }