/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.pig.builtin; import java.io.IOException; import java.util.Random; import org.apache.pig.EvalFunc; import org.apache.pig.PigConstants; import org.apache.pig.StaticDataCleanup; import org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigMapReduce; import org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.MRConfiguration; import org.apache.pig.data.Tuple; import org.apache.pig.impl.logicalLayer.schema.Schema; import org.apache.pig.data.DataType; /** * Return a random double value. Whatever arguments are passed to this UDF * are ignored. */ @Nondeterministic public class RANDOM extends EvalFunc<Double>{ private Random r = null; public RANDOM() { } public RANDOM(String seed) { r = new Random(Long.parseLong(seed)); } @Override public Double exec(Tuple input) throws IOException { if( r == null ) { int jobidhash = PigMapReduce.sJobConfInternal.get().get(MRConfiguration.JOB_ID).hashCode(); int taskIndex = Integer.valueOf(PigMapReduce.sJobConfInternal.get().get(PigConstants.TASK_INDEX)); // XOR-ing 3 separate values // |<-----32 bits---->|<----32 bits----->| // |-jobidhash(int)---|-jobidhash(int)---| // | |---taskIndex(int)--| // |----------seedUniquifier (long)------| // | | // |<-- Only 48 bits used ----->| // | by java.util.Random | // | | // // Reason for repeating jobidhash and shifting taskIndex is, seed // too close to each others would produce very similar values. // // Goal of this method is to produce a pseudo-random values that // would // (1) Produce a same sequence of peusdo-random variables for attempts from same jobid/vertexid and taskid // (2) When taskid, jobid, or vertexid(tez) differ, they should produce a different random sequence // (3) When Random is called more than once inside the script, they should also produce different random values // e.g. B = FOREACH A generate RANDOM(), RANDOM(); // r = new Random( (((long) jobidhash) << 32 | (jobidhash & 0xffffffffL)) ^ ((long) taskIndex << 16) ^ seedUniquifier); // L'Ecuyer, "Tables of Linear Congruential Generators of // Different Sizes and Good Lattice Structure", 1999 seedUniquifier *= 4292484099903637661L; } return r.nextDouble(); } @Override public Schema outputSchema(Schema input) { return new Schema(new Schema.FieldSchema(getSchemaName(this.getClass().getName().toLowerCase(), input), DataType.DOUBLE)); } // Taking the initial seed value from java.util.Random private static long seedUniquifier = 8682522807148012L; @StaticDataCleanup public static void resetSeedUniquifier() { seedUniquifier = 8682522807148012L; } }