/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.pig.newplan.logical.relational; import java.io.IOException; import java.util.List; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.mapreduce.Job; import org.apache.pig.LoadFunc; import org.apache.pig.SLoadFunc; import org.apache.pig.LoadMetadata; import org.apache.pig.PigException; import org.apache.pig.ResourceSchema; import org.apache.pig.data.DataType; import org.apache.pig.impl.PigContext; import org.apache.pig.impl.io.FileSpec; import org.apache.pig.impl.logicalLayer.FrontendException; import org.apache.pig.impl.logicalLayer.schema.Schema; import org.apache.pig.impl.util.ObjectSerializer; import org.apache.pig.newplan.Operator; import org.apache.pig.newplan.PlanVisitor; import org.apache.pig.newplan.logical.Util; public class LOLoad extends LogicalRelationalOperator { private LogicalSchema scriptSchema; private FileSpec fs; private transient SLoadFunc loadFunc; //private transient SLoadFunc sloadFunc; //AVK transient private Configuration conf; private LogicalSchema determinedSchema; private List<Integer> requiredFields = null; private boolean castInserted = false; private LogicalSchema uidOnlySchema; private String schemaFile = null; private String signature = null; private long limit = -1; /** * * @param loader FuncSpec for load function to use for this load. * @param schema schema user specified in script, or null if not * specified. * @param plan logical plan this load is part of. */ public LOLoad(FileSpec loader, LogicalSchema schema, LogicalPlan plan, Configuration conf) { super("LOLoad", plan); scriptSchema = schema; fs = loader; if (loader != null) schemaFile = loader.getFileName(); this.conf = conf; } public String getSchemaFile() { return schemaFile; } public SLoadFunc getLoadFunc() throws FrontendException { try { if (loadFunc == null && fs!=null) { loadFunc = (SLoadFunc)PigContext.instantiateFuncFromSpec(fs.getFuncSpec()); loadFunc.setUDFContextSignature(signature); } return loadFunc; }catch (ClassCastException cce) { throw new FrontendException(this, fs.getFuncSpec() + " should implement the LoadFunc interface.", 2236); } } //AVK /* public SLoadFunc getSLoadFunc() throws FrontendException { try { if (sloadFunc == null && fs!=null) { sloadFunc = (SLoadFunc)PigContext.instantiateFuncFromSpec(fs.getFuncSpec()); sloadFunc.setUDFContextSignature(signature); } return sloadFunc; }catch (ClassCastException cce) { throw new FrontendException(this, fs.getFuncSpec() + " should implement the LoadFunc interface.", 2236); } }*/ public void setScriptSchema(LogicalSchema schema) { scriptSchema = schema; } public void setRequiredFields(List<Integer> requiredFields) { this.requiredFields = requiredFields; } /** * Get the schema for this load. The schema will be either be what was * given by the user in the script or what the load functions getSchema * call returned. Otherwise null will be returned, indicating that the * schema is unknown. * @return schema, or null if unknown */ @Override public LogicalSchema getSchema() throws FrontendException { if (schema != null) return schema; LogicalSchema originalSchema = null; if (determinedSchema==null) { determinedSchema = getSchemaFromMetaData(); } if (scriptSchema != null && determinedSchema != null) { originalSchema = LogicalSchema.merge(scriptSchema, determinedSchema, LogicalSchema.MergeMode.LoadForEach); } else if (scriptSchema != null) originalSchema = scriptSchema; else if (determinedSchema != null) originalSchema = determinedSchema; if (isCastInserted()) { for (int i=0;i<originalSchema.size();i++) { LogicalSchema.LogicalFieldSchema fs = originalSchema.getField(i); if(determinedSchema == null) { // Reset the loads field schema to byte array so that it // will reflect reality. fs.type = DataType.BYTEARRAY; } else { // Reset the type to what determinedSchema says it is fs.type = determinedSchema.getField(i).type; } } } if (originalSchema!=null) { uidOnlySchema = originalSchema.mergeUid(uidOnlySchema); } if (requiredFields!=null) { schema = new LogicalSchema(); for (int i=0;i<originalSchema.size();i++) { if (requiredFields.contains(i)) schema.addField(originalSchema.getField(i)); } } else schema = originalSchema; return schema; } private LogicalSchema getSchemaFromMetaData() throws FrontendException { if (getLoadFunc()!=null && getLoadFunc() instanceof LoadMetadata) { try { ResourceSchema resourceSchema = ((LoadMetadata)loadFunc).getSchema(getFileSpec().getFileName(), new Job(conf)); Schema oldSchema = Schema.getPigSchema(resourceSchema); return Util.translateSchema(oldSchema); } catch (IOException e) { throw new FrontendException( this, "Cannot get schema from loadFunc " + loadFunc.getClass().getName(), 2245, e); } } return null; } @Override public void setAlias(String alias) { super.setAlias(alias); // set the schema in this method using the new alias assigned storeScriptSchema(); if (signature==null) signature = alias; } /** * This method will store the scriptSchema:Schema using ObjectSerializer to * the current configuration.<br/> * The schema can be retrieved by load functions or UDFs to know the schema * the user entered in the as clause.<br/> * The name format is:<br/> * * <pre> * ${UDFSignature}.scriptSchema = ObjectSerializer.serialize(scriptSchema) * </pre> * <p/> * Note that this is not the schema the load function returns but will * always be the as clause schema.<br/> * That is a = LOAD 'input' as (a:chararray, b:chararray)<br/> * The schema will be (a:chararray, b:chararray)<br/> * <p/> * * TODO Find better solution to make script schema available to LoadFunc see * https://issues.apache.org/jira/browse/PIG-1717 */ private void storeScriptSchema() { String alias = getAlias(); if (!(conf == null || alias == null || scriptSchema == null)) { try { conf.set(alias + ".scriptSchema", ObjectSerializer .serialize(Util.translateSchema(scriptSchema))); } catch (IOException ioe) { int errCode = 1018; String msg = "Problem serializing script schema"; FrontendException fee = new FrontendException(this, msg, errCode, PigException.INPUT, false, null, ioe); throw new RuntimeException(fee); } } } public FileSpec getFileSpec() { return fs; } @Override public void accept(PlanVisitor v) throws FrontendException { if (!(v instanceof LogicalRelationalNodesVisitor)) { throw new FrontendException("Expected LogicalPlanVisitor", 2223); } ((LogicalRelationalNodesVisitor)v).visit(this); } public LogicalSchema getDeterminedSchema() { return determinedSchema; } @Override public boolean isEqual(Operator other) throws FrontendException { if (other != null && other instanceof LOLoad) { LOLoad ol = (LOLoad)other; if (!checkEquality(ol)) return false; if (fs == null) { if (ol.fs == null) { return true; }else{ return false; } } return fs.equals(ol.fs); } else { return false; } } public void setCastInserted(boolean flag) { castInserted = flag; } public boolean isCastInserted() { return castInserted; } public Configuration getConfiguration() { return conf; } @Override public void resetUid() { uidOnlySchema = null; } @Override public String toString(){ String str = super.toString(); return (str + "RequiredFields:" + requiredFields); } public String getSignature() { return signature; } /*** * This method is called by Pig logical planner to setup UDFContext signature. * So that loadFunc can use signature to store its own configurations in UDFContext. * This is not intend to be called by users */ public void setSignature(String signature) { this.signature = signature; loadFunc.setUDFContextSignature(signature); //sloadFunc.setUDFContextSignature(signature); //AVK } public LogicalSchema getScriptSchema() { return scriptSchema; } public long getLimit() { return limit; } public void setLimit(long limit) { this.limit = limit; } }