/*********************************************************************************************************************** * Copyright (C) 2010-2013 by the Stratosphere project (http://stratosphere.eu) * * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the * specific language governing permissions and limitations under the License. **********************************************************************************************************************/ package eu.stratosphere.test.recordJobs.kmeans.udfs; import java.io.DataInput; import java.io.DataOutput; import java.io.IOException; import eu.stratosphere.types.Key; /** * Implements a feature vector as a multi-dimensional point. Coordinates of that point * (= the features) are stored as double values. The distance between two feature vectors is * the Euclidian distance between the points. */ public final class CoordVector implements Key<CoordVector> { private static final long serialVersionUID = 1L; // coordinate array private double[] coordinates; /** * Initializes a blank coordinate vector. Required for deserialization! */ public CoordVector() { coordinates = null; } /** * Initializes a coordinate vector. * * @param coordinates The coordinate vector of a multi-dimensional point. */ public CoordVector(Double[] coordinates) { this.coordinates = new double[coordinates.length]; for (int i = 0; i < coordinates.length; i++) { this.coordinates[i] = coordinates[i]; } } /** * Initializes a coordinate vector. * * @param coordinates The coordinate vector of a multi-dimensional point. */ public CoordVector(double[] coordinates) { this.coordinates = coordinates; } /** * Returns the coordinate vector of a multi-dimensional point. * * @return The coordinate vector of a multi-dimensional point. */ public double[] getCoordinates() { return this.coordinates; } /** * Sets the coordinate vector of a multi-dimensional point. * * @param point The dimension values of the point. */ public void setCoordinates(double[] coordinates) { this.coordinates = coordinates; } /** * Computes the Euclidian distance between this coordinate vector and a * second coordinate vector. * * @param cv The coordinate vector to which the distance is computed. * @return The Euclidian distance to coordinate vector cv. If cv has a * different length than this coordinate vector, -1 is returned. */ public double computeEuclidianDistance(CoordVector cv) { // check coordinate vector lengths if (cv.coordinates.length != this.coordinates.length) { return -1.0; } double quadSum = 0.0; for (int i = 0; i < this.coordinates.length; i++) { double diff = this.coordinates[i] - cv.coordinates[i]; quadSum += diff*diff; } return Math.sqrt(quadSum); } @Override public void read(DataInput in) throws IOException { int length = in.readInt(); this.coordinates = new double[length]; for (int i = 0; i < length; i++) { this.coordinates[i] = in.readDouble(); } } @Override public void write(DataOutput out) throws IOException { out.writeInt(this.coordinates.length); for (int i = 0; i < this.coordinates.length; i++) { out.writeDouble(this.coordinates[i]); } } /** * Compares this coordinate vector to another key. * * @return -1 if the other key is not of type CoordVector. If the other * key is also a CoordVector but its length differs from this * coordinates vector, -1 is return if this coordinate vector is * smaller and 1 if it is larger. If both coordinate vectors * have the same length, the coordinates of both are compared. * If a coordinate of this coordinate vector is smaller than the * corresponding coordinate of the other vector -1 is returned * and 1 otherwise. If all coordinates are identical 0 is * returned. */ @Override public int compareTo(CoordVector o) { // check if both coordinate vectors have identical lengths if (o.coordinates.length > this.coordinates.length) { return -1; } else if (o.coordinates.length < this.coordinates.length) { return 1; } // compare all coordinates for (int i = 0; i < this.coordinates.length; i++) { if (o.coordinates[i] > this.coordinates[i]) { return -1; } else if (o.coordinates[i] < this.coordinates[i]) { return 1; } } return 0; } }