/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.mahout.h2obindings.ops; import water.MRTask; import water.fvec.Frame; import water.fvec.Vec; import water.fvec.Chunk; import org.apache.mahout.h2obindings.drm.H2ODrm; /** * R-like cbind like operator, on two DRMs */ public class Cbind { /** * Combine the columns of two DRMs A and B to create a new DRM. * * @param drmA DRM representing matrix A. * @param drmB DRM representing matrix B. * @return new DRM containing columns of A and B adjacent. */ public static H2ODrm exec(H2ODrm drmA, H2ODrm drmB) { Frame fra = drmA.frame; Vec keysa = drmA.keys; Frame frb = drmB.frame; Vec keysb = drmB.keys; // If A and B are similarly partitioned, .. if (fra.anyVec().group() == frb.anyVec().group()) { // .. then, do a light weight zip() return zip(fra, keysa, frb, keysb); } else { // .. else, do a heavy weight join() which involves moving data over the wire return join(fra, keysa, frb, keysb); } } /** Light weight zip(), no data movement */ private static H2ODrm zip(final Frame fra, final Vec keysa, final Frame frb, final Vec keysb) { // Create a new Vec[] to hold the concatenated list of A and B's column vectors Vec vecs[] = new Vec[fra.vecs().length + frb.vecs().length]; int d = 0; // fill A's column vectors for (Vec vfra : fra.vecs()) { vecs[d++] = vfra; } // and B's for (Vec vfrb : frb.vecs()) { vecs[d++] = vfrb; } // and create a new Frame with the combined list of column Vecs Frame fr = new Frame(vecs); /* Finally, inherit A's string labels into the result */ return new H2ODrm(fr, keysa); } /** Heavy weight join(), involves moving data */ private static H2ODrm join(final Frame fra, final Vec keysa, final Frame frb, final Vec keysb) { // The plan is to re-organize B to be "similarly partitioned as A", and then zip() Vec bvecs[] = new Vec[frb.vecs().length]; for (int i = 0; i < bvecs.length; i++) { // First create column Vecs which are similarly partitioned as A bvecs[i] = fra.anyVec().makeZero(); } // Next run an MRTask on the new vectors, and fill each cell (initially 0) // by pulling in appropriate values from B (frb) new MRTask() { public void map(Chunk chks[]) { int chunkSize = chks[0].len(); long start = chks[0].start(); Vec vecs[] = frb.vecs(); for (int r = 0; r < chunkSize; r++) { for (int c = 0; c < chks.length; c++) { // assert va.atStr(start+r) == vb.atStr(start+r) chks[c].set(r, vecs[c].at(start + r)); } } } }.doAll(bvecs); // now that bvecs[] is compatible, just zip'em'up return zip(fra, keysa, new Frame(bvecs), null); } }