/*
* Copyright 2011 Internet Archive
*
* Licensed under the Apache License, Version 2.0 (the "License"); you
* may not use this file except in compliance with the License. You
* may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied. See the License for the specific language governing
* permissions and limitations under the License.
*/
package org.archive.bacon.url;
import java.io.*;
import java.net.*;
import org.apache.pig.EvalFunc;
import org.apache.pig.data.Tuple;
import org.apache.pig.impl.util.WrappedIOException;
/**
* Simple Pig EvalFunc which takes a chararray assumed to be a URL and
* returns the host name.
*/
public class Host extends EvalFunc<String>
{
public String exec( Tuple input )
throws IOException
{
if ( input == null || input.size() == 0 ) return null;
try
{
URL u = new URL( (String) input.get(0) );
String host = u.getHost( );
// If host cannot be determined, return empty string.
if ( host == null ) host = "";
// Ensure i18n hosts are in Unicode format.
host = java.net.IDN.toUnicode( host, java.net.IDN.ALLOW_UNASSIGNED );
return host;
}
catch ( MalformedURLException mue )
{
// If not a valid URL, just return an empty string.
return "";
}
catch ( Exception e )
{
throw WrappedIOException.wrap("Caught exception processing input row ", e);
}
}
}