/* * Copyright 2012, Facebook, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.facebook.LinkBench; import java.io.File; import java.io.FileNotFoundException; import java.util.ArrayList; import java.util.InputMismatchException; import java.util.Locale; import java.util.NavigableMap; import java.util.NoSuchElementException; import java.util.Properties; import java.util.Random; import java.util.Scanner; import java.util.TreeMap; import org.apache.log4j.Logger; import com.facebook.LinkBench.distributions.PiecewiseLinearDistribution; /* * This class simulates the real distribution based on statistical data. */ public class RealDistribution extends PiecewiseLinearDistribution { /** The locale used for number formats, etc in distribution file */ private static final Locale INPUT_FILE_LOCALE = Locale.ENGLISH; public static final String DISTRIBUTION_CONFIG = "realdist"; private static final Logger logger = Logger.getLogger(ConfigUtil.LINKBENCH_LOGGER); /* params to shuffler for link degree */ public static final long NLINKS_SHUFFLER_SEED = 20343988438726021L; public static final int NLINKS_SHUFFLER_GROUPS = 1024; /* shufflers to generate distributions uncorrelated to above */ public static final long UNCORR_SHUFFLER_SEED = 53238253823453L; public static final int UNCORR_SHUFFLER_GROUPS = 1024; /* Shufflers for requests that are correlated with link degree */ public static final long WRITE_CORR_SHUFFLER_SEED = NLINKS_SHUFFLER_SEED; public static final int WRITE_CORR_SHUFFLER_GROUPS = NLINKS_SHUFFLER_GROUPS; public static final long READ_CORR_SHUFFLER_SEED = NLINKS_SHUFFLER_SEED; public static final int READ_CORR_SHUFFLER_GROUPS = NLINKS_SHUFFLER_GROUPS; /* Shufflers for requests that are uncorrelated with link degree */ public static final long WRITE_UNCORR_SHUFFLER_SEED = UNCORR_SHUFFLER_SEED; public static final int WRITE_UNCORR_SHUFFLER_GROUPS = UNCORR_SHUFFLER_GROUPS; public static final long READ_UNCORR_SHUFFLER_SEED = UNCORR_SHUFFLER_SEED; public static final int READ_UNCORR_SHUFFLER_GROUPS = UNCORR_SHUFFLER_GROUPS; public static final long NODE_READ_SHUFFLER_SEED = 4766565305853767165L; public static final int NODE_READ_SHUFFLER_GROUPS = 1024; public static final long NODE_UPDATE_SHUFFLER_SEED = NODE_READ_SHUFFLER_SEED; public static final int NODE_UPDATE_SHUFFLER_GROUPS = NODE_READ_SHUFFLER_GROUPS; public static final long NODE_DELETE_SHUFFLER_SEED = NODE_READ_SHUFFLER_SEED; public static final int NODE_DELETE_SHUFFLER_GROUPS = NODE_READ_SHUFFLER_GROUPS; public static enum DistributionType { LINKS, LINK_READS, LINK_READS_UNCORR, LINK_WRITES, LINK_WRITES_UNCORR, NODE_READS, NODE_UPDATES, NODE_DELETES, } private DistributionType type = null; public RealDistribution() { this.type = null; } @Override public void init(long min, long max, Properties props, String keyPrefix) { this.min = min; this.max = max; String dist = ConfigUtil.getPropertyRequired(props, keyPrefix + DISTRIBUTION_CONFIG); DistributionType configuredType; if (dist.equals("link_reads")) { configuredType = DistributionType.LINK_READS; } else if (dist.equals("link_writes")) { configuredType = DistributionType.LINK_WRITES; } else if (dist.equals("node_reads")) { configuredType = DistributionType.NODE_READS; } else if (dist.equals("node_writes")) { configuredType = DistributionType.NODE_UPDATES; } else if (dist.equals("links")) { configuredType = DistributionType.LINKS; } else { throw new RuntimeException("Invalid distribution type for " + "RealDistribution: " + dist); } init(props, min, max, configuredType); } /* * Initialize this with one of the empirical distribution types * This will automatically load the data file if needed */ public void init(Properties props, long min, long max, DistributionType type) { loadOneShot(props); switch (type) { case LINKS: init(min, max, nlinks_cdf, null, null, nlinks_expected_val); break; case LINK_WRITES: init(min, max, link_nwrites_cdf, nwrites_cs, nwrites_right_points, link_nwrites_expected_val); break; case LINK_READS: init(min, max, link_nreads_cdf, link_nreads_cs, link_nreads_right_points, link_nreads_expected_val); break; case NODE_UPDATES: init(min, max, node_nwrites_cdf, nwrites_cs, nwrites_right_points, node_nwrites_expected_val); break; case NODE_READS: init(min, max, node_nreads_cdf, node_nreads_cs, node_nreads_right_points, node_nreads_expected_val); break; default: throw new RuntimeException("Unknown distribution type: " + type); } } private static ArrayList<Point> nlinks_cdf, link_nreads_cdf, link_nwrites_cdf, node_nreads_cdf, node_nwrites_cdf; private static double[] link_nreads_cs, nwrites_cs, node_nreads_cs, node_nwrites_cs; /** * These right_points arrays are used to keep track of state of * the id1 generation, with each cell holding the next id to * return. These are shared between RealDistribution instances * and different threads. * * It is not clear that this works entirely as intended and it * certainly is non-deterministic when multiple threads are * involved. */ private static long[] link_nreads_right_points, nwrites_right_points, node_nreads_right_points, node_nwrites_right_points; private static double nlinks_expected_val, link_nreads_expected_val, link_nwrites_expected_val, node_nreads_expected_val, node_nwrites_expected_val; /* * This method loads data from data file into memory; * must be called before any getNlinks or getNextId1s; * must be declared as synchronized method to prevent race condition. */ public static synchronized void loadOneShot(Properties props) { if (nlinks_cdf == null) { try { getStatisticalData(props); } catch (FileNotFoundException e) { throw new RuntimeException(e); } } } /* * This method get the area below the distribution nreads_ccdf or * nwrite_ccdf. This helps to determine the number of nreads after which * the generating distribution would be approximately equal to real * distribution. * * Keep in mind the because the number of id1s is constant, the * generating #reads distribution keeps changing. It starts at "100% 0", * keeps growing and eventually at some point (after certain number of * reads) it should be equal to the real #reads distribution. * * Because the number of id1s is constant (equal to maxid1 - startid1), * the total number of reads is also a constant, according to the * following fomular: * * (number of reads) = (number of id1s) x (area below nreads_pdf) * * To illustrate, consider the following nreads_pdf distribution: * 60%=0; 20%=1; 10%=2; 10%=3; and there are 100 id1s. * * The number of reads would be a constanst: * 100 * (20% * 1 + 10% * 2 + 10% * 3) = 100 * 80%. * The multiplication factor (20% * 1 + 10% * 2 + 10% * 3) is what we * want this method to return. * * If we already have the ccdf (comlementary cumulative distribution * function): 40%>=1; 20%>=2; 10%>=3; and its cumulative sum: * [40%, 40%+20%, 40%+20%+10%] = [40%, 60%, 80%], then just need to * return the last cumulative sum (80%). */ static double getArea(DistributionType type) { if (type == DistributionType.LINK_READS) return link_nreads_cs[link_nreads_cs.length - 1]; else if (type == DistributionType.LINK_WRITES) return nwrites_cs[nwrites_cs.length - 1]; else return 0; } //helper function: private static ArrayList<Point> readCDF(String filePath, Scanner scanner) { ArrayList<Point> points = new ArrayList<Point>(); while (scanner.hasNextInt()) { int value = scanner.nextInt(); // File on disk has percentages try { double percent = scanner.nextDouble(); double probability = percent / 100; Point temp = new Point(value, probability); points.add(temp); } catch (InputMismatchException ex) { throw new LinkBenchConfigError("Expected to find floating point " + "value in input file" + filePath + " but found token \"" + scanner.next() + "\""); } catch (NoSuchElementException ex) { throw new LinkBenchConfigError("Expected to find floating point " + "value in input file" + filePath + " but found end of file"); } } return points; } //convert CDF from ArrayList<Point> to Map static NavigableMap<Integer, Double> getCDF(DistributionType dist) { ArrayList<Point> points = dist == DistributionType.LINKS ? nlinks_cdf : dist == DistributionType.LINK_READS? link_nreads_cdf : dist == DistributionType.LINK_WRITES ? link_nwrites_cdf : dist == DistributionType.NODE_READS ? node_nreads_cdf : dist == DistributionType.NODE_UPDATES ? node_nwrites_cdf : null; if (points == null) return null; TreeMap<Integer, Double> map = new TreeMap<Integer, Double>(); for (Point point : points) { map.put(point.value, point.probability); } return map; } /* * This method reads from data_file nlinks, nreads, nwrites discreate * cumulative distribution function (CDF) and produces corresponding * pdf and ccdf. * * The data file is generated by LinkBenchConfigGenerator, and can be * located by parameter data_file in the config file. * * CDF is returned under the form of an array whose elements are pairs of * value and the cumulative distribution at that value i.e. <x, CDF(x)>. */ private static void getStatisticalData(Properties props) throws FileNotFoundException { String filename = ConfigUtil.getPropertyRequired(props, Config.DISTRIBUTION_DATA_FILE); // If relative path, should be relative to linkbench home directory String fileAbsPath; if (new File(filename).isAbsolute()) { fileAbsPath = filename; } else { String linkBenchHome = ConfigUtil.findLinkBenchHome(); if (linkBenchHome == null) { throw new RuntimeException("Data file config property " + Config.DISTRIBUTION_DATA_FILE + " was specified using a relative path, but linkbench home" + " directory was not specified through environment var " + ConfigUtil.linkbenchHomeEnvVar); } else { fileAbsPath = linkBenchHome + File.separator + filename; } } logger.info("Loading real distribution data from " + fileAbsPath); Scanner scanner = new Scanner(new File(fileAbsPath)); scanner.useLocale(INPUT_FILE_LOCALE); while (scanner.hasNext()) { String type = scanner.next(); if (type.equals("nlinks")) { nlinks_cdf = readCDF(fileAbsPath, scanner); nlinks_expected_val = expectedValue(nlinks_cdf); } else if (type.equals("link_nreads")) { link_nreads_cdf = readCDF(fileAbsPath, scanner); double[] nreads_pdf = getPDF(link_nreads_cdf); double[] nreads_ccdf = getCCDF(nreads_pdf); link_nreads_cs = getCumulativeSum(nreads_ccdf); link_nreads_right_points = new long[link_nreads_cs.length]; for (int i = 0; i < link_nreads_right_points.length; ++i) { link_nreads_right_points[i] = 0; } link_nreads_expected_val = expectedValue(link_nreads_cdf); } else if (type.equals("link_nwrites")) { link_nwrites_cdf = readCDF(fileAbsPath, scanner); double[] nwrites_pdf = getPDF(link_nwrites_cdf); double[] nwrites_ccdf = getCCDF(nwrites_pdf); nwrites_cs = getCumulativeSum(nwrites_ccdf); nwrites_right_points = new long[nwrites_cs.length]; for (int i = 0; i < nwrites_right_points.length; ++i) { nwrites_right_points[i] = 0; } link_nwrites_expected_val = expectedValue(link_nwrites_cdf); } else if (type.equals("node_nreads")) { node_nreads_cdf = readCDF(fileAbsPath, scanner); double[] node_nreads_pdf = getPDF(node_nreads_cdf); double[] node_nreads_ccdf = getCCDF(node_nreads_pdf); node_nreads_cs = getCumulativeSum(node_nreads_ccdf); node_nreads_right_points = new long[node_nreads_cs.length]; for (int i = 0; i < node_nreads_right_points.length; ++i) { node_nreads_right_points[i] = 0; } node_nreads_expected_val = expectedValue(node_nreads_cdf); } else if (type.equals("node_nwrites")) { node_nwrites_cdf = readCDF(fileAbsPath, scanner); double[] node_nwrites_pdf = getPDF(node_nwrites_cdf); double[] node_nwrites_ccdf = getCCDF(node_nwrites_pdf); node_nwrites_cs = getCumulativeSum(node_nwrites_ccdf); node_nwrites_right_points = new long[node_nwrites_cs.length]; for (int i = 0; i < node_nwrites_right_points.length; ++i) { node_nwrites_right_points[i] = 0; } node_nwrites_expected_val = expectedValue(node_nwrites_cdf); } else { throw new RuntimeException("Unexpected token in distribution file, " + "expected name of next distribution: \"" + type + "\""); } } } static long getNlinks(long id1, long startid1, long maxid1) { // simple workload balancing return (long)expectedCount(startid1, maxid1, id1, nlinks_cdf); } @Override public long choose(Random rng) { if (type == DistributionType.LINKS) { throw new RuntimeException("choose not supported for LINKS"); } return super.choose(rng); } public static InvertibleShuffler getShuffler(DistributionType type, long n) { switch (type) { case LINK_READS: return new InvertibleShuffler(READ_CORR_SHUFFLER_SEED, READ_CORR_SHUFFLER_GROUPS, n); case LINK_READS_UNCORR: return new InvertibleShuffler(READ_UNCORR_SHUFFLER_SEED, READ_UNCORR_SHUFFLER_GROUPS, n); case LINK_WRITES: return new InvertibleShuffler(WRITE_CORR_SHUFFLER_SEED, WRITE_CORR_SHUFFLER_GROUPS, n); case LINK_WRITES_UNCORR: return new InvertibleShuffler(WRITE_UNCORR_SHUFFLER_SEED, WRITE_UNCORR_SHUFFLER_GROUPS, n); case NODE_READS: return new InvertibleShuffler(NODE_READ_SHUFFLER_SEED, NODE_READ_SHUFFLER_GROUPS, n); case NODE_UPDATES: return new InvertibleShuffler(NODE_UPDATE_SHUFFLER_SEED, NODE_UPDATE_SHUFFLER_GROUPS, n); case NODE_DELETES: return new InvertibleShuffler(NODE_DELETE_SHUFFLER_SEED, NODE_DELETE_SHUFFLER_GROUPS, n); case LINKS: return new InvertibleShuffler(NLINKS_SHUFFLER_SEED, NLINKS_SHUFFLER_GROUPS, n); default: return null; } } }