/*********************************************************************************************************************** * Copyright (C) 2010-2013 by the Stratosphere project (http://stratosphere.eu) * * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the * specific language governing permissions and limitations under the License. **********************************************************************************************************************/ package eu.stratosphere.compiler.dag; import java.util.ArrayList; import java.util.Collections; import java.util.List; import java.util.Map; import eu.stratosphere.api.common.io.FileInputFormat; import eu.stratosphere.api.common.io.InputFormat; import eu.stratosphere.api.common.io.NonParallelInput; import eu.stratosphere.api.common.io.statistics.BaseStatistics; import eu.stratosphere.api.common.operators.base.GenericDataSourceBase; import eu.stratosphere.api.common.operators.Operator; import eu.stratosphere.compiler.DataStatistics; import eu.stratosphere.compiler.PactCompiler; import eu.stratosphere.compiler.costs.CostEstimator; import eu.stratosphere.compiler.costs.Costs; import eu.stratosphere.compiler.plan.PlanNode; import eu.stratosphere.compiler.plan.SourcePlanNode; import eu.stratosphere.configuration.Configuration; import eu.stratosphere.util.Visitor; /** * The optimizer's internal representation of a data source. */ public class DataSourceNode extends OptimizerNode { private final boolean sequentialInput; /** * Creates a new DataSourceNode for the given contract. * * @param pactContract * The data source contract object. */ public DataSourceNode(GenericDataSourceBase<?, ?> pactContract) { super(pactContract); if (pactContract.getUserCodeWrapper().getUserCodeClass() == null) { throw new IllegalArgumentException("Input format has not been set."); } if (NonParallelInput.class.isAssignableFrom(pactContract.getUserCodeWrapper().getUserCodeClass())) { setDegreeOfParallelism(1); setSubtasksPerInstance(1); this.sequentialInput = true; } else { this.sequentialInput = false; } } /** * Gets the contract object for this data source node. * * @return The contract. */ @Override public GenericDataSourceBase<?, ?> getPactContract() { return (GenericDataSourceBase<?, ?>) super.getPactContract(); } @Override public String getName() { return "Data Source"; } @Override public boolean isMemoryConsumer() { return false; } @Override public void setDegreeOfParallelism(int degreeOfParallelism) { // if unsplittable, DOP remains at 1 if (!this.sequentialInput) { super.setDegreeOfParallelism(degreeOfParallelism); } } @Override public void setSubtasksPerInstance(int instancesPerMachine) { // if unsplittable, DOP remains at 1 if (!this.sequentialInput) { super.setSubtasksPerInstance(instancesPerMachine); } } @Override public List<PactConnection> getIncomingConnections() { return Collections.<PactConnection>emptyList(); } @Override public void setInput(Map<Operator<?>, OptimizerNode> contractToNode) {} @Override protected void computeOperatorSpecificDefaultEstimates(DataStatistics statistics) { // see, if we have a statistics object that can tell us a bit about the file if (statistics != null) { // instantiate the input format, as this is needed by the statistics InputFormat<?, ?> format = null; String inFormatDescription = "<unknown>"; try { format = getPactContract().getFormatWrapper().getUserCodeObject(); Configuration config = getPactContract().getParameters(); config.setClassLoader(format.getClass().getClassLoader()); format.configure(config); } catch (Throwable t) { if (PactCompiler.LOG.isWarnEnabled()) { PactCompiler.LOG.warn("Could not instantiate InputFormat to obtain statistics." + " Limited statistics will be available.", t); } return; } try { inFormatDescription = format.toString(); } catch (Throwable t) {} // first of all, get the statistics from the cache final String statisticsKey = getPactContract().getStatisticsKey(); final BaseStatistics cachedStatistics = statistics.getBaseStatistics(statisticsKey); BaseStatistics bs = null; try { bs = format.getStatistics(cachedStatistics); } catch (Throwable t) { if (PactCompiler.LOG.isWarnEnabled()) { PactCompiler.LOG.warn("Error obtaining statistics from input format: " + t.getMessage(), t); } } if (bs != null) { final long len = bs.getTotalInputSize(); if (len == BaseStatistics.SIZE_UNKNOWN) { if (PactCompiler.LOG.isInfoEnabled()) { PactCompiler.LOG.info("Compiler could not determine the size of input '" + inFormatDescription + "'. Using default estimates."); } } else if (len >= 0) { this.estimatedOutputSize = len; } final long card = bs.getNumberOfRecords(); if (card != BaseStatistics.NUM_RECORDS_UNKNOWN) { this.estimatedNumRecords = card; } } } } @Override public void computeInterestingPropertiesForInputs(CostEstimator estimator) { // no children, so nothing to compute } @Override public void computeUnclosedBranchStack() { // because there are no inputs, there are no unclosed branches. this.openBranches = Collections.emptyList(); } @Override public List<PlanNode> getAlternativePlans(CostEstimator estimator) { if (this.cachedPlans != null) { return this.cachedPlans; } SourcePlanNode candidate = new SourcePlanNode(this, "DataSource("+this.getPactContract().getName()+")"); candidate.updatePropertiesWithUniqueSets(getUniqueFields()); final Costs costs = new Costs(); if (FileInputFormat.class.isAssignableFrom(getPactContract().getFormatWrapper().getUserCodeClass()) && this.estimatedOutputSize >= 0) { estimator.addFileInputCost(this.estimatedOutputSize, costs); } candidate.setCosts(costs); // since there is only a single plan for the data-source, return a list with that element only List<PlanNode> plans = new ArrayList<PlanNode>(1); plans.add(candidate); this.cachedPlans = plans; return plans; } @Override public boolean isFieldConstant(int input, int fieldNumber) { return false; } @Override public void accept(Visitor<OptimizerNode> visitor) { if (visitor.preVisit(this)) { visitor.postVisit(this); } } }