/*
* Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the
* NOTICE file distributed with this work for additional information regarding copyright ownership. The ASF
* licenses this file to you under the Apache License, Version 2.0 (the "License"); you may not use this file
* except in compliance with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed under the License is
* distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and limitations under the License.
*/
package org.apache.pig.piggybank.evaluation.util.apachelogparser;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;
import org.apache.pig.EvalFunc;
import org.apache.pig.FuncSpec;
import org.apache.pig.data.DataType;
import org.apache.pig.data.Tuple;
import org.apache.pig.impl.logicalLayer.FrontendException;
import org.apache.pig.impl.logicalLayer.schema.Schema;
/**
* HostExtractor takes a url and returns the host. For example,
*
* http://sports.espn.go.com/mlb/recap?gameId=281009122
*
* leads to
*
* sports.espn.go.com
*
* Pig latin usage looks like
*
* host = FOREACH row GENERATE
* org.apache.pig.piggybank.evaluation.util.apachelogparser.HostExtractor(referer);
*/
public class HostExtractor extends EvalFunc<String> {
@Override
public String exec(Tuple input) throws IOException {
if (input == null || input.size() == 0)
return null;
String str="";
try{
str = (String)input.get(0);
return new URL(str).getHost().toLowerCase();
} catch (MalformedURLException me) {
System.err.println("piggybank.evaluation.util.apachelogparser.HostExtractor: "+
"url parsing exception for "+str);
return null;
} catch (Exception e) {
throw new IOException("Caught exception processing input row ", e);
}
}
@Override
public List<FuncSpec> getArgToFuncMapping() throws FrontendException {
List<FuncSpec> funcList = new ArrayList<FuncSpec>();
funcList.add(new FuncSpec(this.getClass().getName(),
new Schema(new Schema.FieldSchema(null, DataType.CHARARRAY))));
return funcList;
}
}