VectorBinaryAssign.java example

Explorer
mahout-commits-master
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.mahout.math;

import org.apache.mahout.math.Vector.Element;
import org.apache.mahout.math.function.DoubleDoubleFunction;
import org.apache.mahout.math.set.OpenIntHashSet;

import java.util.Iterator;

/**
 * Abstract class encapsulating different algorithms that perform the Vector operations assign().
 * x.assign(y, f), for x and y Vectors and f a DoubleDouble function:
 * - applies the function f to every element in x and y, f(xi, yi)
 * - assigns xi = f(xi, yi) for all indices i
 *
 * The names of variables, methods and classes used here follow the following conventions:
 * The vector being assigned to (the left hand side) is called this or x.
 * The right hand side is called that or y.
 * The function to be applied is called f.
 *
 * The different algorithms take into account the different characteristics of vector classes:
 * - whether the vectors support sequential iteration (isSequential())
 * - whether the vectors support constant-time additions (isAddConstantTime())
 * - what the lookup cost is (getLookupCost())
 * - what the iterator advancement cost is (getIteratorAdvanceCost())
 *
 * The names of the actual classes (they're nested in VectorBinaryAssign) describe the used for assignment.
 * The most important optimization is iterating just through the nonzeros (only possible if f(0, 0) = 0).
 * There are 4 main possibilities:
 * - iterating through the nonzeros of just one vector and looking up the corresponding elements in the other
 * - iterating through the intersection of nonzeros (those indices where both vectors have nonzero values)
 * - iterating through the union of nonzeros (those indices where at least one of the vectors has a nonzero value)
 * - iterating through all the elements in some way (either through both at the same time, both one after the other,
 *   looking up both, looking up just one).
 * Then, there are two additional sub-possibilities:
 * - if a new value can be added to x in constant time (isAddConstantTime()), the *Inplace updates are used
 * - otherwise (really just for SequentialAccessSparseVectors right now), the *Merge updates are used, where
 *   a sorted list of (index, value) pairs is merged into the vector at the end.
 *
 * The internal details are not important and a particular algorithm should generally not be called explicitly.
 * The best one will be selected through assignBest(), which is itself called through Vector.assign().
 *
 * See https://docs.google.com/document/d/1g1PjUuvjyh2LBdq2_rKLIcUiDbeOORA1sCJiSsz-JVU/edit# for a more detailed
 * explanation.
 */
public abstract class VectorBinaryAssign {
  public static final VectorBinaryAssign[] OPERATIONS = {
    new AssignNonzerosIterateThisLookupThat(),
    new AssignNonzerosIterateThatLookupThisMergeUpdates(),
    new AssignNonzerosIterateThatLookupThisInplaceUpdates(),

    new AssignIterateIntersection(),

    new AssignIterateUnionSequentialMergeUpdates(),
    new AssignIterateUnionSequentialInplaceUpdates(),
    new AssignIterateUnionRandomMergeUpdates(),
    new AssignIterateUnionRandomInplaceUpdates(),

    new AssignAllIterateSequentialMergeUpdates(),
    new AssignAllIterateSequentialInplaceUpdates(),
    new AssignAllIterateThisLookupThatMergeUpdates(),
    new AssignAllIterateThisLookupThatInplaceUpdates(),
    new AssignAllIterateThatLookupThisMergeUpdates(),
    new AssignAllIterateThatLookupThisInplaceUpdates(),
    new AssignAllLoopMergeUpdates(),
    new AssignAllLoopInplaceUpdates(),
  };

  /**
   * Returns true iff we can use this algorithm to apply f to x and y component-wise and assign the result to x.
   */
  public abstract boolean isValid(Vector x, Vector y, DoubleDoubleFunction f);

  /**
   * Estimates the cost of using this algorithm to compute the assignment. The algorithm is assumed to be valid.
   */
  public abstract double estimateCost(Vector x, Vector y, DoubleDoubleFunction f);

  /**
   * Main method that applies f to x and y component-wise assigning the results to x. It returns the modified vector,
   * x.
   */
  public abstract Vector assign(Vector x, Vector y, DoubleDoubleFunction f);

  /**
   * The best operation is the least expensive valid one.
   */
  public static VectorBinaryAssign getBestOperation(Vector x, Vector y, DoubleDoubleFunction f) {
    int bestOperationIndex = -1;
    double bestCost = Double.POSITIVE_INFINITY;
    for (int i = 0; i < OPERATIONS.length; ++i) {
      if (OPERATIONS[i].isValid(x, y, f)) {
        double cost = OPERATIONS[i].estimateCost(x, y, f);
        if (cost < bestCost) {
          bestCost = cost;
          bestOperationIndex = i;
        }
      }
    }
    return OPERATIONS[bestOperationIndex];
  }

  /**
   * This is the method that should be used when assigning. It selects the best algorithm and applies it.
   * Note that it does NOT invalidate the cached length of the Vector and should only be used through the wrapprs
   * in AbstractVector.
   */
  public static Vector assignBest(Vector x, Vector y, DoubleDoubleFunction f) {
    return getBestOperation(x, y, f).assign(x, y, f);
  }

  /**
   * If f(0, y) = 0, the zeros in x don't matter and we can simply iterate through the nonzeros of x.
   * To get the corresponding element of y, we perform a lookup.
   * There are no *Merge or *Inplace versions because in this case x cannot become more dense because of f, meaning
   * all changes will occur at indices whose values are already nonzero.
   */
  public static class AssignNonzerosIterateThisLookupThat extends VectorBinaryAssign {

    @Override
    public boolean isValid(Vector x, Vector y, DoubleDoubleFunction f) {
      return f.isLikeLeftMult();
    }

    @Override
    public double estimateCost(Vector x, Vector y, DoubleDoubleFunction f) {
      return x.getNumNondefaultElements() * x.getIteratorAdvanceCost() * y.getLookupCost();
    }

    @Override
    public Vector assign(Vector x, Vector y, DoubleDoubleFunction f) {
      for (Element xe : x.nonZeroes()) {
        xe.set(f.apply(xe.get(), y.getQuick(xe.index())));
      }
      return x;
    }
  }

  /**
   * If f(x, 0) = x, the zeros in y don't matter and we can simply iterate through the nonzeros of y.
   * We get the corresponding element of x through a lookup and update x inplace.
   */
  public static class AssignNonzerosIterateThatLookupThisInplaceUpdates extends VectorBinaryAssign {

    @Override
    public boolean isValid(Vector x, Vector y, DoubleDoubleFunction f) {
      return f.isLikeRightPlus();
    }

    @Override
    public double estimateCost(Vector x, Vector y, DoubleDoubleFunction f) {
      return y.getNumNondefaultElements() * y.getIteratorAdvanceCost() * x.getLookupCost() * x.getLookupCost();
    }

    @Override
    public Vector assign(Vector x, Vector y, DoubleDoubleFunction f) {
      for (Element ye : y.nonZeroes()) {
        x.setQuick(ye.index(), f.apply(x.getQuick(ye.index()), ye.get()));
      }
      return x;
    }
  }

  /**
   * If f(x, 0) = x, the zeros in y don't matter and we can simply iterate through the nonzeros of y.
   * We get the corresponding element of x through a lookup and update x by merging.
   */
  public static class AssignNonzerosIterateThatLookupThisMergeUpdates extends VectorBinaryAssign {

    @Override
    public boolean isValid(Vector x, Vector y, DoubleDoubleFunction f) {
      return f.isLikeRightPlus() && y.isSequentialAccess() && !x.isAddConstantTime();
    }

    @Override
    public double estimateCost(Vector x, Vector y, DoubleDoubleFunction f) {
      return y.getNumNondefaultElements() * y.getIteratorAdvanceCost() * y.getLookupCost();
    }

    @Override
    public Vector assign(Vector x, Vector y, DoubleDoubleFunction f) {
      OrderedIntDoubleMapping updates = new OrderedIntDoubleMapping(false);
      for (Element ye : y.nonZeroes()) {
        updates.set(ye.index(), f.apply(x.getQuick(ye.index()), ye.get()));
      }
      x.mergeUpdates(updates);
      return x;
    }
  }

  /**
   * If f(x, 0) = x and f(0, y) = 0 the zeros in x and y don't matter and we can iterate through the nonzeros
   * in both x and y.
   * This is only possible if both x and y support sequential access.
   */
  public static class AssignIterateIntersection extends VectorBinaryAssign {

    @Override
    public boolean isValid(Vector x, Vector y, DoubleDoubleFunction f) {
      return f.isLikeLeftMult() && f.isLikeRightPlus() && x.isSequentialAccess() && y.isSequentialAccess();
    }

    @Override
    public double estimateCost(Vector x, Vector y, DoubleDoubleFunction f) {
      return Math.min(x.getNumNondefaultElements() * x.getIteratorAdvanceCost(),
          y.getNumNondefaultElements() * y.getIteratorAdvanceCost());
    }

    @Override
    public Vector assign(Vector x, Vector y, DoubleDoubleFunction f) {
      Iterator<Vector.Element> xi = x.nonZeroes().iterator();
      Iterator<Vector.Element> yi = y.nonZeroes().iterator();
      Vector.Element xe = null;
      Vector.Element ye = null;
      boolean advanceThis = true;
      boolean advanceThat = true;
      while (true) {
        if (advanceThis) {
          if (xi.hasNext()) {
            xe = xi.next();
          } else {
            break;
          }
        }
        if (advanceThat) {
          if (yi.hasNext()) {
            ye = yi.next();
          } else {
            break;
          }
        }
        if (xe.index() == ye.index()) {
          xe.set(f.apply(xe.get(), ye.get()));
          advanceThis = true;
          advanceThat = true;
        } else {
          if (xe.index() < ye.index()) { // f(x, 0) = 0
            advanceThis = true;
            advanceThat = false;
          } else { // f(0, y) = 0
            advanceThis = false;
            advanceThat = true;
          }
        }
      }
      return x;
    }
  }

  /**
   * If f(0, 0) = 0 we can iterate through the nonzeros in either x or y.
   * In this case we iterate through them in parallel and update x by merging. Because we're iterating through
   * both vectors at the same time, x and y need to support sequential access.
   */
  public static class AssignIterateUnionSequentialMergeUpdates extends VectorBinaryAssign {

    @Override
    public boolean isValid(Vector x, Vector y, DoubleDoubleFunction f) {
      return !f.isDensifying() && x.isSequentialAccess() && y.isSequentialAccess() && !x.isAddConstantTime();
    }

    @Override
    public double estimateCost(Vector x, Vector y, DoubleDoubleFunction f) {
      return Math.max(x.getNumNondefaultElements() * x.getIteratorAdvanceCost(),
          y.getNumNondefaultElements() * y.getIteratorAdvanceCost());
    }

    @Override
    public Vector assign(Vector x, Vector y, DoubleDoubleFunction f) {
      Iterator<Vector.Element> xi = x.nonZeroes().iterator();
      Iterator<Vector.Element> yi = y.nonZeroes().iterator();
      Vector.Element xe = null;
      Vector.Element ye = null;
      boolean advanceThis = true;
      boolean advanceThat = true;
      OrderedIntDoubleMapping updates = new OrderedIntDoubleMapping(false);
      while (true) {
        if (advanceThis) {
          if (xi.hasNext()) {
            xe = xi.next();
          } else {
            xe = null;
          }
        }
        if (advanceThat) {
          if (yi.hasNext()) {
            ye = yi.next();
          } else {
            ye = null;
          }
        }
        if (xe != null && ye != null) { // both vectors have nonzero elements
          if (xe.index() == ye.index()) {
            xe.set(f.apply(xe.get(), ye.get()));
            advanceThis = true;
            advanceThat = true;
          } else {
            if (xe.index() < ye.index()) { // f(x, 0)
              xe.set(f.apply(xe.get(), 0));
              advanceThis = true;
              advanceThat = false;
            } else {
              updates.set(ye.index(), f.apply(0, ye.get()));
              advanceThis = false;
              advanceThat = true;
            }
          }
        } else if (xe != null) { // just the first one still has nonzeros
          xe.set(f.apply(xe.get(), 0));
          advanceThis = true;
          advanceThat = false;
        } else if (ye != null) { // just the second one has nonzeros
          updates.set(ye.index(), f.apply(0, ye.get()));
          advanceThis = false;
          advanceThat = true;
        } else { // we're done, both are empty
          break;
        }
      }
      x.mergeUpdates(updates);
      return x;
    }
  }

  /**
   * If f(0, 0) = 0 we can iterate through the nonzeros in either x or y.
   * In this case we iterate through them in parallel and update x inplace. Because we're iterating through
   * both vectors at the same time, x and y need to support sequential access.
   */
  public static class AssignIterateUnionSequentialInplaceUpdates extends VectorBinaryAssign {

    @Override
    public boolean isValid(Vector x, Vector y, DoubleDoubleFunction f) {
      return !f.isDensifying() && x.isSequentialAccess() && y.isSequentialAccess() && x.isAddConstantTime();
    }

    @Override
    public double estimateCost(Vector x, Vector y, DoubleDoubleFunction f) {
      return Math.max(x.getNumNondefaultElements() * x.getIteratorAdvanceCost(),
          y.getNumNondefaultElements() * y.getIteratorAdvanceCost());
    }

    @Override
    public Vector assign(Vector x, Vector y, DoubleDoubleFunction f) {
      Iterator<Vector.Element> xi = x.nonZeroes().iterator();
      Iterator<Vector.Element> yi = y.nonZeroes().iterator();
      Vector.Element xe = null;
      Vector.Element ye = null;
      boolean advanceThis = true;
      boolean advanceThat = true;
      while (true) {
        if (advanceThis) {
          if (xi.hasNext()) {
            xe = xi.next();
          } else {
            xe = null;
          }
        }
        if (advanceThat) {
          if (yi.hasNext()) {
            ye = yi.next();
          } else {
            ye = null;
          }
        }
        if (xe != null && ye != null) { // both vectors have nonzero elements
          if (xe.index() == ye.index()) {
            xe.set(f.apply(xe.get(), ye.get()));
            advanceThis = true;
            advanceThat = true;
          } else {
            if (xe.index() < ye.index()) { // f(x, 0)
              xe.set(f.apply(xe.get(), 0));
              advanceThis = true;
              advanceThat = false;
            } else {
              x.setQuick(ye.index(), f.apply(0, ye.get()));
              advanceThis = false;
              advanceThat = true;
            }
          }
        } else if (xe != null) { // just the first one still has nonzeros
          xe.set(f.apply(xe.get(), 0));
          advanceThis = true;
          advanceThat = false;
        } else if (ye != null) { // just the second one has nonzeros
          x.setQuick(ye.index(), f.apply(0, ye.get()));
          advanceThis = false;
          advanceThat = true;
        } else { // we're done, both are empty
          break;
        }
      }
      return x;
    }
  }

  /**
   * If f(0, 0) = 0 we can iterate through the nonzeros in either x or y.
   * In this case, we iterate through the nozeros of x and y alternatively (this works even when one of them
   * doesn't support sequential access). Since we're merging the results into x, when iterating through y, the
   * order of iteration matters and y must support sequential access.
   */
  public static class AssignIterateUnionRandomMergeUpdates extends VectorBinaryAssign {

    @Override
    public boolean isValid(Vector x, Vector y, DoubleDoubleFunction f) {
      return !f.isDensifying() && !x.isAddConstantTime() && y.isSequentialAccess();
    }

    @Override
    public double estimateCost(Vector x, Vector y, DoubleDoubleFunction f) {
      return Math.max(x.getNumNondefaultElements() * x.getIteratorAdvanceCost() * y.getLookupCost(),
          y.getNumNondefaultElements() * y.getIteratorAdvanceCost() * x.getLookupCost());
    }

    @Override
    public Vector assign(Vector x, Vector y, DoubleDoubleFunction f) {
      OpenIntHashSet visited = new OpenIntHashSet();
      for (Element xe : x.nonZeroes()) {
        xe.set(f.apply(xe.get(), y.getQuick(xe.index())));
        visited.add(xe.index());
      }
      OrderedIntDoubleMapping updates = new OrderedIntDoubleMapping(false);
      for (Element ye : y.nonZeroes()) {
        if (!visited.contains(ye.index())) {
          updates.set(ye.index(), f.apply(x.getQuick(ye.index()), ye.get()));
        }
      }
      x.mergeUpdates(updates);
      return x;
    }
  }

  /**
   * If f(0, 0) = 0 we can iterate through the nonzeros in either x or y.
   * In this case, we iterate through the nozeros of x and y alternatively (this works even when one of them
   * doesn't support sequential access). Because updates to x are inplace, neither x, nor y need to support
   * sequential access.
   */
  public static class AssignIterateUnionRandomInplaceUpdates extends VectorBinaryAssign {

    @Override
    public boolean isValid(Vector x, Vector y, DoubleDoubleFunction f) {
      return !f.isDensifying() && x.isAddConstantTime();
    }

    @Override
    public double estimateCost(Vector x, Vector y, DoubleDoubleFunction f) {
      return Math.max(x.getNumNondefaultElements() * x.getIteratorAdvanceCost() * y.getLookupCost(),
          y.getNumNondefaultElements() * y.getIteratorAdvanceCost() * x.getLookupCost());
    }
    @Override
    public Vector assign(Vector x, Vector y, DoubleDoubleFunction f) {
      OpenIntHashSet visited = new OpenIntHashSet();
      for (Element xe : x.nonZeroes()) {
        xe.set(f.apply(xe.get(), y.getQuick(xe.index())));
        visited.add(xe.index());
      }
      for (Element ye : y.nonZeroes()) {
        if (!visited.contains(ye.index())) {
          x.setQuick(ye.index(), f.apply(x.getQuick(ye.index()), ye.get()));
        }
      }
      return x;
    }
  }

  public static class AssignAllIterateSequentialMergeUpdates extends VectorBinaryAssign {

    @Override
    public boolean isValid(Vector x, Vector y, DoubleDoubleFunction f) {
      return x.isSequentialAccess() && y.isSequentialAccess() && !x.isAddConstantTime() && !x.isDense() && !y.isDense();
    }

    @Override
    public double estimateCost(Vector x, Vector y, DoubleDoubleFunction f) {
      return Math.max(x.size() * x.getIteratorAdvanceCost(), y.size() * y.getIteratorAdvanceCost());
    }

    @Override
    public Vector assign(Vector x, Vector y, DoubleDoubleFunction f) {
      Iterator<Vector.Element> xi = x.all().iterator();
      Iterator<Vector.Element> yi = y.all().iterator();
      OrderedIntDoubleMapping updates = new OrderedIntDoubleMapping(false);
      while (xi.hasNext() && yi.hasNext()) {
        Element xe = xi.next();
        updates.set(xe.index(), f.apply(xe.get(), yi.next().get()));
      }
      x.mergeUpdates(updates);
      return x;
    }
  }

  public static class AssignAllIterateSequentialInplaceUpdates extends VectorBinaryAssign {

    @Override
    public boolean isValid(Vector x, Vector y, DoubleDoubleFunction f) {
      return x.isSequentialAccess() && y.isSequentialAccess() && x.isAddConstantTime()
          && !x.isDense() && !y.isDense();
    }

    @Override
    public double estimateCost(Vector x, Vector y, DoubleDoubleFunction f) {
      return Math.max(x.size() * x.getIteratorAdvanceCost(), y.size() * y.getIteratorAdvanceCost());
    }

    @Override
    public Vector assign(Vector x, Vector y, DoubleDoubleFunction f) {
      Iterator<Vector.Element> xi = x.all().iterator();
      Iterator<Vector.Element> yi = y.all().iterator();
      while (xi.hasNext() && yi.hasNext()) {
        Element xe = xi.next();
        x.setQuick(xe.index(), f.apply(xe.get(), yi.next().get()));
      }
      return x;
    }
  }

  public static class AssignAllIterateThisLookupThatMergeUpdates extends VectorBinaryAssign {

    @Override
    public boolean isValid(Vector x, Vector y, DoubleDoubleFunction f) {
      return !x.isAddConstantTime() && !x.isDense();
    }

    @Override
    public double estimateCost(Vector x, Vector y, DoubleDoubleFunction f) {
      return x.size() * x.getIteratorAdvanceCost() * y.getLookupCost();
    }

    @Override
    public Vector assign(Vector x, Vector y, DoubleDoubleFunction f) {
      OrderedIntDoubleMapping updates = new OrderedIntDoubleMapping(false);
      for (Element xe : x.all()) {
        updates.set(xe.index(), f.apply(xe.get(), y.getQuick(xe.index())));
      }
      x.mergeUpdates(updates);
      return x;
    }
  }

  public static class AssignAllIterateThisLookupThatInplaceUpdates extends VectorBinaryAssign {

    @Override
    public boolean isValid(Vector x, Vector y, DoubleDoubleFunction f) {
      return x.isAddConstantTime() && !x.isDense();
    }

    @Override
    public double estimateCost(Vector x, Vector y, DoubleDoubleFunction f) {
      return x.size() * x.getIteratorAdvanceCost() * y.getLookupCost();
    }

    @Override
    public Vector assign(Vector x, Vector y, DoubleDoubleFunction f) {
      for (Element xe : x.all()) {
        x.setQuick(xe.index(), f.apply(xe.get(), y.getQuick(xe.index())));
      }
      return x;
    }
  }

  public static class AssignAllIterateThatLookupThisMergeUpdates extends VectorBinaryAssign {

    @Override
    public boolean isValid(Vector x, Vector y, DoubleDoubleFunction f) {
      return !x.isAddConstantTime() && !y.isDense();
    }

    @Override
    public double estimateCost(Vector x, Vector y, DoubleDoubleFunction f) {
      return y.size() * y.getIteratorAdvanceCost() * x.getLookupCost();
    }

    @Override
    public Vector assign(Vector x, Vector y, DoubleDoubleFunction f) {
      OrderedIntDoubleMapping updates = new OrderedIntDoubleMapping(false);
      for (Element ye : y.all()) {
        updates.set(ye.index(), f.apply(x.getQuick(ye.index()), ye.get()));
      }
      x.mergeUpdates(updates);
      return x;
    }
  }

  public static class AssignAllIterateThatLookupThisInplaceUpdates extends VectorBinaryAssign {

    @Override
    public boolean isValid(Vector x, Vector y, DoubleDoubleFunction f) {
      return x.isAddConstantTime() && !y.isDense();
    }

    @Override
    public double estimateCost(Vector x, Vector y, DoubleDoubleFunction f) {
      return y.size() * y.getIteratorAdvanceCost() * x.getLookupCost();
    }

    @Override
    public Vector assign(Vector x, Vector y, DoubleDoubleFunction f) {
      for (Element ye : y.all()) {
        x.setQuick(ye.index(), f.apply(x.getQuick(ye.index()), ye.get()));
      }
      return x;
    }
  }

  public static class AssignAllLoopMergeUpdates extends VectorBinaryAssign {

    @Override
    public boolean isValid(Vector x, Vector y, DoubleDoubleFunction f) {
      return !x.isAddConstantTime();
    }

    @Override
    public double estimateCost(Vector x, Vector y, DoubleDoubleFunction f) {
      return x.size() * x.getLookupCost() * y.getLookupCost();
    }

    @Override
    public Vector assign(Vector x, Vector y, DoubleDoubleFunction f) {
      OrderedIntDoubleMapping updates = new OrderedIntDoubleMapping(false);
      for (int i = 0; i < x.size(); ++i) {
        updates.set(i, f.apply(x.getQuick(i), y.getQuick(i)));
      }
      x.mergeUpdates(updates);
      return x;
    }
  }

  public static class AssignAllLoopInplaceUpdates extends VectorBinaryAssign {

    @Override
    public boolean isValid(Vector x, Vector y, DoubleDoubleFunction f) {
      return x.isAddConstantTime();
    }

    @Override
    public double estimateCost(Vector x, Vector y, DoubleDoubleFunction f) {
      return x.size() * x.getLookupCost() * y.getLookupCost();
    }

    @Override
    public Vector assign(Vector x, Vector y, DoubleDoubleFunction f) {
      for (int i = 0; i < x.size(); ++i) {
        x.setQuick(i, f.apply(x.getQuick(i), y.getQuick(i)));
      }
      return x;
    }
  }
}