/** * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.mahout.math; import com.google.common.collect.AbstractIterator; import com.google.common.primitives.Doubles; import org.apache.mahout.math.function.Functions; import java.util.Arrays; import java.util.Iterator; /** * <p> * Implements vector that only stores non-zero doubles as a pair of parallel arrays (OrderedIntDoubleMapping), * one int[], one double[]. If there are <b>k</b> non-zero elements in the vector, this implementation has * O(log(k)) random-access read performance, and O(k) random-access write performance, which is far below that * of the hashmap based {@link org.apache.mahout.math.RandomAccessSparseVector RandomAccessSparseVector}. This * class is primarily used for operations where the all the elements will be accessed in a read-only fashion * sequentially: methods which operate not via get() or set(), but via iterateNonZero(), such as (but not limited * to) :</p> * <ul> * <li>dot(Vector)</li> * <li>addTo(Vector)</li> * </ul> * <p> * Note that the Vector passed to these above methods may (and currently, are) be used in a random access fashion, * so for example, calling SequentialAccessSparseVector.dot(SequentialAccessSparseVector) is slow. * TODO: this need not be the case - both are ordered, so this should be very fast if implmented in this class * </p> * * See {@link OrderedIntDoubleMapping} */ public class SequentialAccessSparseVector extends AbstractVector { private OrderedIntDoubleMapping values; /** For serialization purposes only. */ public SequentialAccessSparseVector() { super(0); } public SequentialAccessSparseVector(int cardinality) { this(cardinality, cardinality / 8); // arbitrary estimate of 'sparseness' } public SequentialAccessSparseVector(int cardinality, int size) { super(cardinality); values = new OrderedIntDoubleMapping(size); } public SequentialAccessSparseVector(Vector other) { this(other.size(), other.getNumNondefaultElements()); if (other.isSequentialAccess()) { Iterator<Element> it = other.iterateNonZero(); Element e; while (it.hasNext() && (e = it.next()) != null) { set(e.index(), e.get()); } } else { // If the incoming Vector to copy is random, then adding items // from the Iterator can degrade performance dramatically if // the number of elements is large as this Vector tries to stay // in order as items are added, so it's better to sort the other // Vector's elements by index and then add them to this copySortedRandomAccessSparseVector(other); } } // Sorts a RandomAccessSparseVectors Elements before adding them to this private int copySortedRandomAccessSparseVector(Vector other) { int elementCount = other.getNumNondefaultElements(); OrderedElement[] sortableElements = new OrderedElement[elementCount]; Iterator<Element> it = other.iterateNonZero(); Element e; int s=0; while (it.hasNext() && (e = it.next()) != null) { sortableElements[s++] = new OrderedElement(e.index(), e.get()); } Arrays.sort(sortableElements); for (int i = 0; i < sortableElements.length; i++) { values.getIndices()[i] = sortableElements[i].index; values.getValues()[i] = sortableElements[i].value; } values = new OrderedIntDoubleMapping(values.getIndices(), values.getValues(), elementCount); return elementCount; } public SequentialAccessSparseVector(SequentialAccessSparseVector other, boolean shallowCopy) { super(other.size()); values = shallowCopy ? other.values : other.values.clone(); } public SequentialAccessSparseVector(SequentialAccessSparseVector other) { this(other.size(), other.getNumNondefaultElements()); values = other.values.clone(); } private SequentialAccessSparseVector(int cardinality, OrderedIntDoubleMapping values) { super(cardinality); this.values = values; } @Override protected Matrix matrixLike(int rows, int columns) { return new SparseRowMatrix(rows, columns); } @Override public SequentialAccessSparseVector clone() { return new SequentialAccessSparseVector(size(), values.clone()); } @Override public Vector assign(Vector other) { int size = size(); if (size != other.size()) { throw new CardinalityException(size, other.size()); } if (other instanceof SequentialAccessSparseVector) { values = ((SequentialAccessSparseVector)other).values.clone(); } else { values = new OrderedIntDoubleMapping(); Iterator<Element> othersElems = other.iterateNonZero(); while (othersElems.hasNext()) { Element elem = othersElems.next(); setQuick(elem.index(), elem.get()); } } return this; } @Override public String toString() { StringBuilder result = new StringBuilder(); result.append('{'); Iterator<Element> it = iterateNonZero(); while (it.hasNext()) { Element e = it.next(); result.append(e.index()); result.append(':'); result.append(e.get()); result.append(','); } if (result.length() > 1) { result.setCharAt(result.length() - 1, '}'); } return result.toString(); } /** * @return false */ @Override public boolean isDense() { return false; } /** * @return true */ @Override public boolean isSequentialAccess() { return true; } @Override public double getQuick(int index) { return values.get(index); } @Override public void setQuick(int index, double value) { lengthSquared = -1; values.set(index, value); } @Override public int getNumNondefaultElements() { return values.getNumMappings(); } @Override public SequentialAccessSparseVector like() { return new SequentialAccessSparseVector(size(), values.getNumMappings()); } @Override public Iterator<Element> iterateNonZero() { return new NonDefaultIterator(); } @Override public Iterator<Element> iterator() { return new AllIterator(); } @Override public Vector minus(Vector that) { if (size() != that.size()) { throw new CardinalityException(size(), that.size()); } // Here we compute "that - this" since it's not fast to randomly access "this" // and then invert at the end Vector result = that.clone(); Iterator<Element> iter = this.iterateNonZero(); while (iter.hasNext()) { Element thisElement = iter.next(); int index = thisElement.index(); result.setQuick(index, that.getQuick(index) - thisElement.get()); } result.assign(Functions.NEGATE); return result; } private final class NonDefaultIterator extends AbstractIterator<Element> { private final NonDefaultElement element = new NonDefaultElement(); @Override protected Element computeNext() { int numMappings = values.getNumMappings(); if (numMappings <= 0 || element.getNextOffset() >= numMappings) { return endOfData(); } element.advanceOffset(); return element; } } private final class AllIterator extends AbstractIterator<Element> { private final AllElement element = new AllElement(); @Override protected Element computeNext() { int numMappings = values.getNumMappings(); if (numMappings <= 0 || element.getNextIndex() > values.getIndices()[numMappings - 1]) { return endOfData(); } element.advanceIndex(); return element; } } private final class NonDefaultElement implements Element { private int offset = -1; void advanceOffset() { offset++; } int getNextOffset() { return offset + 1; } @Override public double get() { return values.getValues()[offset]; } @Override public int index() { return values.getIndices()[offset]; } @Override public void set(double value) { lengthSquared = -1; values.getValues()[offset] = value; } } private final class AllElement implements Element { private int index = -1; private int nextOffset; void advanceIndex() { index++; if (index > values.getIndices()[nextOffset]) { nextOffset++; } } int getNextIndex() { return index + 1; } @Override public double get() { if (index == values.getIndices()[nextOffset]) { return values.getValues()[nextOffset]; } return OrderedIntDoubleMapping.DEFAULT_VALUE; } @Override public int index() { return index; } @Override public void set(double value) { lengthSquared = -1; if (index == values.getIndices()[nextOffset]) { values.getValues()[nextOffset] = value; } else { // Yes, this works; the offset into indices of the new value's index will still be nextOffset values.set(index, value); } } } // Comparable Element for sorting Elements by index private static final class OrderedElement implements Comparable<OrderedElement> { private final int index; private final double value; OrderedElement(int index, double value) { this.index = index; this.value = value; } @Override public int compareTo(OrderedElement that) { // both indexes are positive, and neither can be Integer.MAX_VALUE (otherwise there would be // an array somewhere with Integer.MAX_VALUE + 1 elements) return this.index - that.index; } @Override public int hashCode() { return index ^ Doubles.hashCode(value); } @Override public boolean equals(Object o) { if (!(o instanceof OrderedElement)) { return false; } OrderedElement other = (OrderedElement) o; return index == other.index && value == other.value; } } }