/* * Copyright [2012-2015] PayPal Software Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package ml.shifu.shifu.core.binning; import ml.shifu.shifu.core.autotype.CountAndFrequentItemsWritable; import org.apache.hadoop.io.Writable; import java.io.DataInput; import java.io.DataOutput; import java.io.IOException; import java.nio.charset.Charset; import java.util.ArrayList; import java.util.Arrays; import java.util.List; /** * {@link BinningInfoWritable} is to store column statistics collected from mapper and aggregated in reducer. */ public class BinningInfoWritable implements Writable { private boolean isNumeric = true; private int columnNum; private List<Double> binBoundaries; private List<String> binCategories; private long[] binCountPos; private long[] binCountNeg; private double[] binWeightPos; private double[] binWeightNeg; private double min = Double.MAX_VALUE; private double max = Double.MIN_VALUE; private double sum = 0.0d; private double squaredSum = 0.0d; private double tripleSum = 0.0d; private double quarticSum = 0.0d; private long missingCount = 0L; private long totalCount = 0L; private double[] xMultiY = null; private CountAndFrequentItemsWritable cfiw = new CountAndFrequentItemsWritable(); /** * @return the binBoundaries */ public List<Double> getBinBoundaries() { return binBoundaries; } /** * @param binBoundaries * the binBoundaries to set */ public void setBinBoundaries(List<Double> binBoundaries) { this.binBoundaries = binBoundaries; } /** * @return the columnNum */ public int getColumnNum() { return columnNum; } /** * @return the binCountPos */ public long[] getBinCountPos() { return binCountPos; } /** * @return the binCountNeg */ public long[] getBinCountNeg() { return binCountNeg; } /** * @return the binWeightPos */ public double[] getBinWeightPos() { return binWeightPos; } /** * @return the binWeightNeg */ public double[] getBinWeightNeg() { return binWeightNeg; } /** * @return the min */ public double getMin() { return min; } /** * @return the max */ public double getMax() { return max; } /** * @return the sum */ public double getSum() { return sum; } /** * @return the squaredSum */ public double getSquaredSum() { return squaredSum; } /** * @return the missingCount */ public long getMissingCount() { return missingCount; } /** * @return the totalCount */ public long getTotalCount() { return totalCount; } /** * @param columnNum * the columnNum to set */ public void setColumnNum(int columnNum) { this.columnNum = columnNum; } /** * @param binCountPos * the binCountPos to set */ public void setBinCountPos(long[] binCountPos) { this.binCountPos = binCountPos; } /** * @param binCountNeg * the binCountNeg to set */ public void setBinCountNeg(long[] binCountNeg) { this.binCountNeg = binCountNeg; } /** * @param binWeightPos * the binWeightPos to set */ public void setBinWeightPos(double[] binWeightPos) { this.binWeightPos = binWeightPos; } /** * @param binWeightNeg * the binWeightNeg to set */ public void setBinWeightNeg(double[] binWeightNeg) { this.binWeightNeg = binWeightNeg; } /** * @param min * the min to set */ public void setMin(double min) { this.min = min; } /** * @param max * the max to set */ public void setMax(double max) { this.max = max; } /** * @param sum * the sum to set */ public void setSum(double sum) { this.sum = sum; } /** * @param squaredSum * the squaredSum to set */ public void setSquaredSum(double squaredSum) { this.squaredSum = squaredSum; } /** * @param missingCount * the missingCount to set */ public void setMissingCount(long missingCount) { this.missingCount = missingCount; } /** * @param totalCount * the totalCount to set */ public void setTotalCount(long totalCount) { this.totalCount = totalCount; } /** * @return the binCategories */ public List<String> getBinCategories() { return binCategories; } /** * @param binCategories * the binCategories to set */ public void setBinCategories(List<String> binCategories) { this.binCategories = binCategories; } /** * @return the isNumeric */ public boolean isNumeric() { return isNumeric; } /** * @param isNumeric * the isNumeric to set */ public void setNumeric(boolean isNumeric) { this.isNumeric = isNumeric; } @Override public void write(DataOutput out) throws IOException { out.writeBoolean(this.isNumeric); out.writeInt(this.columnNum); out.writeDouble(this.max); out.writeDouble(this.min); out.writeDouble(this.sum); out.writeDouble(this.squaredSum); out.writeDouble(this.tripleSum); out.writeDouble(this.quarticSum); out.writeLong(this.missingCount); out.writeLong(this.totalCount); out.writeInt(this.binCountPos.length); for(int i = 0; i < this.binCountPos.length; i++) { out.writeLong(this.binCountPos[i]); } out.writeInt(this.binCountNeg.length); for(int i = 0; i < this.binCountNeg.length; i++) { out.writeLong(this.binCountNeg[i]); } out.writeInt(this.binWeightPos.length); for(int i = 0; i < this.binWeightPos.length; i++) { out.writeDouble(this.binWeightPos[i]); } out.writeInt(this.binWeightNeg.length); for(int i = 0; i < this.binWeightNeg.length; i++) { out.writeDouble(this.binWeightNeg[i]); } if(this.isNumeric) { out.writeInt(this.binBoundaries.size()); for(int i = 0; i < this.binBoundaries.size(); i++) { out.writeDouble(this.binBoundaries.get(i)); } if(this.xMultiY != null) { out.writeInt(this.xMultiY.length); for(double d: this.xMultiY) { out.writeDouble(d); } } else { out.writeInt(0); } } else { out.writeInt(this.binCategories.size()); for(int i = 0; i < this.binCategories.size(); i++) { String bin = this.binCategories.get(i); byte[] bytes = bin.getBytes(Charset.forName("UTF-8")); out.writeInt(bytes.length); for(int j = 0; j < bytes.length; j++) { out.writeByte(bytes[j]); } } // xMultiY computation is in the reducer computation } this.cfiw.write(out); } @Override public void readFields(DataInput in) throws IOException { this.isNumeric = in.readBoolean(); this.columnNum = in.readInt(); this.max = in.readDouble(); this.min = in.readDouble(); this.sum = in.readDouble(); this.squaredSum = in.readDouble(); this.tripleSum = in.readDouble(); this.quarticSum = in.readDouble(); this.missingCount = in.readLong(); this.totalCount = in.readLong(); int size = in.readInt(); this.binCountPos = new long[size]; for(int i = 0; i < size; i++) { this.binCountPos[i] = in.readLong(); } size = in.readInt(); this.binCountNeg = new long[size]; for(int i = 0; i < size; i++) { this.binCountNeg[i] = in.readLong(); } size = in.readInt(); this.binWeightPos = new double[size]; for(int i = 0; i < size; i++) { this.binWeightPos[i] = in.readDouble(); } size = in.readInt(); this.binWeightNeg = new double[size]; for(int i = 0; i < size; i++) { this.binWeightNeg[i] = in.readDouble(); } if(this.isNumeric) { size = in.readInt(); this.binBoundaries = new ArrayList<Double>(size); for(int i = 0; i < size; i++) { this.binBoundaries.add(in.readDouble()); } int xMultiYSize = in.readInt(); if(xMultiYSize != 0) { this.xMultiY = new double[xMultiYSize]; for(int i = 0; i < xMultiYSize; i++) { this.xMultiY[i] = in.readDouble(); } } } else { size = in.readInt(); this.binCategories = new ArrayList<String>(size); for(int i = 0; i < size; i++) { int bytesSize = in.readInt(); byte[] bytes = new byte[bytesSize]; for(int j = 0; j < bytesSize; j++) { bytes[j] = in.readByte(); } this.binCategories.add(new String(bytes, Charset.forName("UTF-8"))); } } this.cfiw = new CountAndFrequentItemsWritable(); this.cfiw.readFields(in); } /** * @return the tripleSum */ public double getTripleSum() { return tripleSum; } /** * @param tripleSum * the tripleSum to set */ public void setTripleSum(double tripleSum) { this.tripleSum = tripleSum; } /** * @return the quarticSum */ public double getQuarticSum() { return quarticSum; } /** * @param quarticSum * the quarticSum to set */ public void setQuarticSum(double quarticSum) { this.quarticSum = quarticSum; } /** * @return the xMultiY */ public double[] getxMultiY() { return xMultiY; } /** * @param xMultiY * the xMultiY to set */ public void setxMultiY(double[] xMultiY) { this.xMultiY = xMultiY; } /** * @return the cfiw */ public CountAndFrequentItemsWritable getCfiw() { return cfiw; } /** * @param cfiw * the cfiw to set */ public void setCfiw(CountAndFrequentItemsWritable cfiw) { this.cfiw = cfiw; } /* * (non-Javadoc) * * @see java.lang.Object#toString() */ @Override public String toString() { return "BinningInfoWritable [isNumeric=" + isNumeric + ", columnNum=" + columnNum + ", binBoundaries=" + binBoundaries + ", binCategories=" + binCategories + ", binCountPos=" + Arrays.toString(binCountPos) + ", binCountNeg=" + Arrays.toString(binCountNeg) + ", binWeightPos=" + Arrays.toString(binWeightPos) + ", binWeightNeg=" + Arrays.toString(binWeightNeg) + ", min=" + min + ", max=" + max + ", sum=" + sum + ", squaredSum=" + squaredSum + ", tripleSum=" + tripleSum + ", quarticSum=" + quarticSum + ", missingCount=" + missingCount + ", totalCount=" + totalCount + "]"; } }