/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.pig.newplan.logical.relational; import java.io.IOException; import java.util.List; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.mapreduce.Job; import org.apache.pig.LoadFunc; import org.apache.pig.LoadMetadata; import org.apache.pig.ResourceSchema; import org.apache.pig.data.DataType; import org.apache.pig.impl.PigContext; import org.apache.pig.impl.io.FileSpec; import org.apache.pig.impl.logicalLayer.FrontendException; import org.apache.pig.impl.logicalLayer.schema.Schema; import org.apache.pig.newplan.Operator; import org.apache.pig.newplan.PlanVisitor; import org.apache.pig.newplan.logical.Util; public class LOLoad extends LogicalRelationalOperator { private LogicalSchema scriptSchema; private FileSpec fs; private transient LoadFunc loadFunc; transient private Configuration conf; private LogicalSchema determinedSchema; private List<Integer> requiredFields = null; private boolean castInserted = false; private LogicalSchema uidOnlySchema; /** * * @param loader FuncSpec for load function to use for this load. * @param schema schema user specified in script, or null if not * specified. * @param plan logical plan this load is part of. */ public LOLoad(FileSpec loader, LogicalSchema schema, LogicalPlan plan, Configuration conf) { super("LOLoad", plan); scriptSchema = schema; fs = loader; this.conf = conf; } public LoadFunc getLoadFunc() throws FrontendException { try { if (loadFunc == null && fs!=null) { loadFunc = (LoadFunc)PigContext.instantiateFuncFromSpec(fs.getFuncSpec()); loadFunc.setUDFContextSignature(getAlias()); } return loadFunc; }catch (ClassCastException cce) { throw new FrontendException(fs.getFuncSpec() + " should implement the LoadFunc interface.", 2236); } } public void setScriptSchema(LogicalSchema schema) { scriptSchema = schema; } public void setRequiredFields(List<Integer> requiredFields) { this.requiredFields = requiredFields; } /** * Get the schema for this load. The schema will be either be what was * given by the user in the script or what the load functions getSchema * call returned. Otherwise null will be returned, indicating that the * schema is unknown. * @return schema, or null if unknown */ @Override public LogicalSchema getSchema() throws FrontendException { if (schema != null) return schema; LogicalSchema originalSchema = null; if (determinedSchema==null) { determinedSchema = getSchemaFromMetaData(); } if (scriptSchema != null && determinedSchema != null) { originalSchema = LogicalSchema.merge(scriptSchema, determinedSchema); } else if (scriptSchema != null) originalSchema = scriptSchema; else if (determinedSchema != null) originalSchema = determinedSchema; if (isCastInserted()) { for (int i=0;i<originalSchema.size();i++) { LogicalSchema.LogicalFieldSchema fs = originalSchema.getField(i); if(determinedSchema == null) { // Reset the loads field schema to byte array so that it // will reflect reality. fs.type = DataType.BYTEARRAY; } else { // Reset the type to what determinedSchema says it is fs.type = determinedSchema.getField(i).type; } } } if (originalSchema!=null) { uidOnlySchema = originalSchema.mergeUid(uidOnlySchema); } if (requiredFields!=null) { schema = new LogicalSchema(); for (int i=0;i<originalSchema.size();i++) { if (requiredFields.contains(i)) schema.addField(originalSchema.getField(i)); } } else schema = originalSchema; return schema; } private LogicalSchema getSchemaFromMetaData() throws FrontendException { if (getLoadFunc()!=null && getLoadFunc() instanceof LoadMetadata) { try { ResourceSchema resourceSchema = ((LoadMetadata)loadFunc).getSchema(getFileSpec().getFileName(), new Job(conf)); Schema oldSchema = Schema.getPigSchema(resourceSchema); return Util.translateSchema(oldSchema); } catch (IOException e) { throw new FrontendException("Cannot get schema from loadFunc " + loadFunc.getClass().getName(), 2245, e); } } return null; } public FileSpec getFileSpec() { return fs; } @Override public void accept(PlanVisitor v) throws FrontendException { if (!(v instanceof LogicalRelationalNodesVisitor)) { throw new FrontendException("Expected LogicalPlanVisitor", 2223); } ((LogicalRelationalNodesVisitor)v).visit(this); } public LogicalSchema getDeterminedSchema() { return determinedSchema; } @Override public boolean isEqual(Operator other) throws FrontendException { if (other != null && other instanceof LOLoad) { LOLoad ol = (LOLoad)other; if (!checkEquality(ol)) return false; if (fs == null) { if (ol.fs == null) { return true; }else{ return false; } } return fs.equals(ol.fs); } else { return false; } } public void setCastInserted(boolean flag) { castInserted = flag; } public boolean isCastInserted() { return castInserted; } public Configuration getConfiguration() { return conf; } @Override public void resetUid() { uidOnlySchema = null; } }