package hex.glm; import hex.DataInfo; import hex.DataInfo.Row; import hex.FrameTask2; import hex.glm.GLMModel.GLMParameters; import hex.glm.GLMModel.GLMParameters.Link; import hex.glm.GLMModel.GLMWeightsFun; import hex.glm.GLMModel.GLMWeights; import hex.gram.Gram; import hex.glm.GLMModel.GLMParameters.Family; import water.H2O.H2OCountedCompleter; import water.*; import water.fvec.*; import water.util.ArrayUtils; import water.util.FrameUtils; import water.util.MathUtils; import water.util.MathUtils.BasicStats; import java.util.Arrays; /** * All GLM related distributed tasks: * * YMUTask - computes response means on actual datasets (if some rows are ignored - e.g ignoring rows with NA and/or doing cross-validation) * GLMGradientTask - computes gradient at given Beta, used by L-BFGS, for KKT condition check * GLMLineSearchTask - computes residual deviance(s) at given beta(s), used by line search (both L-BFGS and IRLSM) * GLMIterationTask - used by IRLSM to compute Gram matrix and response t(X) W X, t(X)Wz * * @author tomasnykodym */ public abstract class GLMTask { static class NullDevTask extends MRTask<NullDevTask> { double _nullDev; final double [] _ymu; final GLMWeightsFun _glmf; final boolean _hasWeights; final boolean _hasOffset; public NullDevTask(GLMWeightsFun glmf, double [] ymu, boolean hasWeights, boolean hasOffset) { _glmf = glmf; _ymu = ymu; _hasWeights = hasWeights; _hasOffset = hasOffset; } @Override public void map(Chunk [] chks) { int i = 0; int len = chks[0]._len; Chunk w = _hasWeights?chks[i++]:new C0DChunk(1.0,len); Chunk o = _hasOffset?chks[i++]:new C0DChunk(0.0,len); Chunk r = chks[i]; if(_glmf._family != Family.multinomial) { double ymu = _glmf.link(_ymu[0]); for (int j = 0; j < len; ++j) _nullDev += w.atd(j)*_glmf.deviance(r.atd(j), _glmf.linkInv(ymu + o.atd(j))); } else { throw H2O.unimpl(); } } @Override public void reduce(NullDevTask ndt) {_nullDev += ndt._nullDev;} } static class GLMResDevTask extends FrameTask2<GLMResDevTask> { final GLMWeightsFun _glmf; final double [] _beta; double _resDev = 0; long _nobs; double _likelihood; public GLMResDevTask(Key jobKey, DataInfo dinfo,GLMParameters parms, double [] beta) { super(null,dinfo, jobKey); _glmf = new GLMWeightsFun(parms); _beta = beta; _sparseOffset = _sparse?GLM.sparseOffset(_beta,_dinfo):0; } private transient GLMWeights _glmw; private final double _sparseOffset; @Override public boolean handlesSparseData(){return true;} @Override public void chunkInit() { _glmw = new GLMWeights(); } @Override protected void processRow(Row r) { _glmf.computeWeights(r.response(0), r.innerProduct(_beta) + _sparseOffset, r.offset, r.weight, _glmw); _resDev += _glmw.dev; _likelihood += _glmw.l; ++_nobs; } @Override public void reduce(GLMResDevTask gt) {_nobs += gt._nobs; _resDev += gt._resDev; _likelihood += gt._likelihood;} public double avgDev(){return _resDev/_nobs;} public double dev(){return _resDev;} } static class GLMResDevTaskMultinomial extends FrameTask2<GLMResDevTaskMultinomial> { final double [][] _beta; double _likelihood; final int _nclasses; long _nobs; public GLMResDevTaskMultinomial(Key jobKey, DataInfo dinfo, double [] beta, int nclasses) { super(null,dinfo, jobKey); _beta = ArrayUtils.convertTo2DMatrix(beta,beta.length/nclasses); _nclasses = nclasses; } @Override public boolean handlesSparseData(){return true;} private transient double [] _sparseOffsets; @Override public void chunkInit() { _sparseOffsets = MemoryManager.malloc8d(_nclasses); if(_sparse) for(int c = 0; c < _nclasses; ++c) _sparseOffsets[c] = GLM.sparseOffset(_beta[c],_dinfo); } @Override protected void processRow(Row r) { _nobs++; double sumExp = 0; for(int c = 0; c < _nclasses; ++c) sumExp += Math.exp(r.innerProduct(_beta[c]) + _sparseOffsets[c]); int c = (int)r.response(0); _likelihood -= r.weight * ((r.innerProduct(_beta[c]) + _sparseOffsets[c]) - Math.log(sumExp)); } @Override public void reduce(GLMResDevTaskMultinomial gt) {_nobs += gt._nobs; _likelihood += gt._likelihood;} public double avgDev(){return _likelihood*2/_nobs;} public double dev(){return _likelihood*2;} } static class WeightedSDTask extends MRTask<WeightedSDTask> { final int _weightId; final double [] _mean; public double [] _varSum; public WeightedSDTask(int wId, double [] mean){ _weightId = wId; _mean = mean; } @Override public void map(Chunk [] chks){ double [] weights = null; if(_weightId != - 1){ weights = MemoryManager.malloc8d(chks[_weightId]._len); chks[_weightId].getDoubles(weights,0,weights.length); chks = ArrayUtils.remove(chks,_weightId); } _varSum = MemoryManager.malloc8d(_mean.length); double [] vals = MemoryManager.malloc8d(chks[0]._len); int [] ids = MemoryManager.malloc4(chks[0]._len); for(int c = 0; c < _mean.length; ++c){ double mu = _mean[c]; int n = chks[c].getSparseDoubles(vals,ids); double s = 0; for(int i = 0; i < n; ++i) { double d = vals[i]; if(Double.isNaN(d)) // NAs are either skipped or replaced with mean (i.e. can also be skipped) continue; d = d - mu; if(_weightId != -1) s += weights[ids[i]]*d*d; else s += d*d; } _varSum[c] = s; } } public void reduce(WeightedSDTask t){ ArrayUtils.add(_varSum,t._varSum); } } static public class YMUTask extends MRTask<YMUTask> { double _yMin = Double.POSITIVE_INFINITY, _yMax = Double.NEGATIVE_INFINITY; final int _responseId; final int _weightId; final int _offsetId; final int _nums; // number of numeric columns final int _numOff; final boolean _skipNAs; final boolean _computeWeightedMeanSigmaResponse; private BasicStats _basicStats; private BasicStats _basicStatsResponse; double [] _yMu; final int _nClasses; private double [] _predictorSDs; private final boolean _expandedResponse; // true iff family == multinomial and response has been maually expanded into binary columns public double [] predictorMeans(){return _basicStats.mean();} public double [] predictorSDs(){ if(_predictorSDs != null) return _predictorSDs; return (_predictorSDs = _basicStats.sigma()); } public double [] responseMeans(){return _basicStatsResponse.mean();} public double [] responseSDs(){ return _basicStatsResponse.sigma(); } public YMUTask(DataInfo dinfo, int nclasses, boolean computeWeightedMeanSigmaResponse, boolean skipNAs, boolean haveResponse, boolean expandedResponse) { _nums = dinfo._nums; _numOff = dinfo._cats; _responseId = haveResponse ? dinfo.responseChunkId(0) : -1; _weightId = dinfo._weights?dinfo.weightChunkId():-1; _offsetId = dinfo._offset?dinfo.offsetChunkId():-1; _nClasses = nclasses; _computeWeightedMeanSigmaResponse = computeWeightedMeanSigmaResponse; _skipNAs = skipNAs; _expandedResponse = _nClasses == 1 || expandedResponse; } @Override public void setupLocal(){} @Override public void map(Chunk [] chunks) { _yMu = new double[_nClasses]; double [] ws = MemoryManager.malloc8d(chunks[0].len()); if(_weightId != -1) chunks[_weightId].getDoubles(ws,0,ws.length); else Arrays.fill(ws,1); boolean changedWeights = false; if(_skipNAs) { // first find the rows to skip, need to go over all chunks including categoricals double [] vals = MemoryManager.malloc8d(chunks[0]._len); int [] ids = MemoryManager.malloc4(vals.length); for (int i = 0; i < chunks.length; ++i) { int n = vals.length; if(chunks[i].isSparseZero()) n = chunks[i].getSparseDoubles(vals,ids); else chunks[i].getDoubles(vals,0,n); for (int r = 0; r < n; ++r) { if (ws[r] != 0 && Double.isNaN(vals[r])) { ws[r] = 0; changedWeights = true; } } } if(changedWeights && _weightId != -1) chunks[_weightId].set(ws); } Chunk response = _responseId < 0 ? null : chunks[_responseId]; double [] numsResponse = null; _basicStats = new BasicStats(_nums); if(_computeWeightedMeanSigmaResponse) { _basicStatsResponse = new BasicStats(_nClasses); numsResponse = MemoryManager.malloc8d(_nClasses); } // compute basic stats for numeric predictors for(int i = 0; i < _nums; ++i) { Chunk c = chunks[i + _numOff]; double w; for (int r = c.nextNZ(-1); r < c._len; r = c.nextNZ(r)) { if ((w = ws[r]) == 0) continue; double d = c.atd(r); _basicStats.add(d, w, i); } } if (response == null) return; long nobs = 0; double wsum = 0; for(double w:ws) { if(w != 0)++nobs; wsum += w; } _basicStats.setNobs(nobs,wsum); // compute the mean for the response // autoexpand categoricals into binary vecs for(int r = 0; r < response._len; ++r) { double w; if((w = ws[r]) == 0) continue; if(_computeWeightedMeanSigmaResponse) { //FIXME: Add support for subtracting offset from response if(_expandedResponse) { for (int i = 0; i < _nClasses; ++i) numsResponse[i] = chunks[chunks.length - _nClasses + i].atd(r); } else { Arrays.fill(numsResponse,0); double d = response.atd(r); if(Double.isNaN(d)) Arrays.fill(numsResponse,Double.NaN); else numsResponse[(int)d] = 1; } _basicStatsResponse.add(numsResponse,w); } double d = response.atd(r); if(!Double.isNaN(d)) { if (_nClasses > 2) _yMu[(int) d] += w; else _yMu[0] += w*d; if (d < _yMin) _yMin = d; if (d > _yMax) _yMax = d; } } if(_basicStatsResponse != null)_basicStatsResponse.setNobs(nobs,wsum); for(int i = 0; i < _nums; ++i) { if(chunks[i+_numOff].isSparseZero()) _basicStats.fillSparseZeros(i); else if(chunks[i+_numOff].isSparseNA()) _basicStats.fillSparseNAs(i); } } @Override public void postGlobal() { ArrayUtils.mult(_yMu,1.0/_basicStats._wsum); } @Override public void reduce(YMUTask ymt) { if(ymt._basicStats.nobs() > 0 && ymt._basicStats.nobs() > 0) { ArrayUtils.add(_yMu,ymt._yMu); if(_yMin > ymt._yMin) _yMin = ymt._yMin; if(_yMax < ymt._yMax) _yMax = ymt._yMax; _basicStats.reduce(ymt._basicStats); if(_computeWeightedMeanSigmaResponse) _basicStatsResponse.reduce(ymt._basicStatsResponse); } else if (_basicStats.nobs() == 0) { _yMu = ymt._yMu; _yMin = ymt._yMin; _yMax = ymt._yMax; _basicStats = ymt._basicStats; _basicStatsResponse = ymt._basicStatsResponse; } } public double wsum() {return _basicStats._wsum;} public long nobs() {return _basicStats.nobs();} } static double computeMultinomialEtas(double [] etas, double [] exps) { double maxRow = ArrayUtils.maxValue(etas); double sumExp = 0; int K = etas.length; for(int c = 0; c < K; ++c) { double x = Math.exp(etas[c] - maxRow); sumExp += x; exps[c+1] = x; } double reg = 1.0/(sumExp); for(int c = 0; c < etas.length; ++c) exps[c+1] *= reg; exps[0] = 0; exps[0] = ArrayUtils.maxIndex(exps)-1; return Math.log(sumExp) + maxRow; } static class GLMGenericWeightsTask extends FrameTask2<GLMGenericWeightsTask> { final double [] _beta; double _sparseOffset; private final GLMWeightsFun _glmw; private transient GLMWeights _ws; double _likelihood; public GLMGenericWeightsTask(H2OCountedCompleter cmp, DataInfo dinfo, Key jobKey, double [] beta, GLMWeightsFun glmw) { super(cmp, dinfo, jobKey); _beta = beta; _glmw = glmw; assert _glmw._family != Family.multinomial:"Generic glm weights task does not work for family multinomial"; } @Override public void chunkInit(){ _ws = new GLMWeights(); if(_sparse) _sparseOffset = GLM.sparseOffset(_beta,_dinfo); } @Override protected void processRow(Row row) { double eta = row.innerProduct(_beta) + _sparseOffset; _glmw.computeWeights(row.response(0),eta,row.offset,row.weight,_ws); row.setOutput(0,_ws.w); row.setOutput(1,_ws.z); _likelihood += _ws.l; } @Override public void reduce(GLMGenericWeightsTask gwt) { _likelihood += gwt._likelihood; } } static class GLMMultinomialWeightsTask extends FrameTask2<GLMGenericWeightsTask> { final double [] _beta; double _sparseOffset; double _likelihood; final int classId; public GLMMultinomialWeightsTask(H2OCountedCompleter cmp, DataInfo dinfo, Key jobKey, double [] beta, int cid) { super(cmp, dinfo, jobKey); _beta = beta; classId = cid; } @Override public void chunkInit(){ if(_sparse) _sparseOffset = GLM.sparseOffset(_beta,_dinfo); } @Override protected void processRow(Row row) { double y = row.response(0); double maxRow = row.getOutput(2); double etaY = row.getOutput(3); double eta = row.innerProduct(_beta) + _sparseOffset; if(classId == y){ etaY = eta; row.setOutput(3,eta); } if(eta > maxRow) { maxRow = eta; row.setOutput(2,eta); } double etaExp = Math.exp(eta - maxRow); double sumExp = row.getOutput(4) + etaExp; double mu = etaExp/sumExp; if(mu < 1e-16) mu = 1e-16; double d = mu*(1-mu); row.setOutput(0,row.weight * d); row.setOutput(1,eta + (y-mu)/d); // wz = r.weight * (eta * d + (y-mu)); _likelihood += row.weight * (etaY - Math.log(sumExp) - maxRow); } @Override public void reduce(GLMGenericWeightsTask gwt) { _likelihood += gwt._likelihood; } } static class GLMBinomialWeightsTask extends FrameTask2<GLMGenericWeightsTask> { final double [] _beta; double _sparseOffset; double _likelihood; public GLMBinomialWeightsTask(H2OCountedCompleter cmp, DataInfo dinfo, Key jobKey, double [] beta) { super(cmp, dinfo, jobKey); _beta = beta; } @Override public void chunkInit(){ if(_sparse) _sparseOffset = GLM.sparseOffset(_beta,_dinfo); } @Override protected void processRow(Row row) { double y = row.response(0); double eta = row.innerProduct(_beta) + _sparseOffset; double mu = 1/(Math.exp(-eta) + 1); if(mu < 1e-16) mu = 1e-16; double d = mu*(1-mu); row.setOutput(0,row.weight * d); row.setOutput(1,eta + (y-mu)/d); // wz = r.weight * (eta * d + (y-mu)); _likelihood += row.weight * (MathUtils.y_log_y(y, mu) + MathUtils.y_log_y(1 - y, 1 - mu)); } @Override public void reduce(GLMGenericWeightsTask gwt) { _likelihood += gwt._likelihood; } } static abstract class GLMGradientTask extends MRTask<GLMGradientTask> { final double [] _beta; public double [] _gradient; public double _likelihood; final transient double _currentLambda; final transient double _reg; protected final DataInfo _dinfo; protected GLMGradientTask(Key jobKey, DataInfo dinfo, double reg, double lambda, double[] beta){ _dinfo = dinfo; _beta = beta.clone(); _reg = reg; _currentLambda = lambda; } protected abstract void computeGradientMultipliers(double [] es, double [] ys, double [] ws); private final void computeCategoricalEtas(Chunk [] chks, double [] etas, double [] vals, int [] ids) { // categoricals for(int cid = 0; cid < _dinfo._cats; ++cid){ Chunk c = chks[cid]; if(c.isSparseZero()) { int nvals = c.getSparseDoubles(vals,ids,-1); for(int i = 0; i < nvals; ++i){ int id = _dinfo.getCategoricalId(cid,(int)vals[i]); if(id >=0) etas[ids[i]] += _beta[id]; } } else { c.getIntegers(ids, 0, c._len,-1); for(int i = 0; i < ids.length; ++i){ int id = _dinfo.getCategoricalId(cid,ids[i]); if(id >=0) etas[i] += _beta[id]; } } } } private final void computeCategoricalGrads(Chunk [] chks, double [] etas, double [] vals, int [] ids) { // categoricals for(int cid = 0; cid < _dinfo._cats; ++cid){ Chunk c = chks[cid]; if(c.isSparseZero()) { int nvals = c.getSparseDoubles(vals,ids,-1); for(int i = 0; i < nvals; ++i){ int id = _dinfo.getCategoricalId(cid,(int)vals[i]); if(id >=0) _gradient[id] += etas[ids[i]]; } } else { c.getIntegers(ids, 0, c._len,-1); for(int i = 0; i < ids.length; ++i){ int id = _dinfo.getCategoricalId(cid,ids[i]); if(id >=0) _gradient[id] += etas[i]; } } } } private final void computeNumericEtas(Chunk [] chks, double [] etas, double [] vals, int [] ids) { int numOff = _dinfo.numStart(); for(int cid = 0; cid < _dinfo._nums; ++cid){ double scale = _dinfo._normMul != null?_dinfo._normMul[cid]:1; double off = _dinfo._normSub != null?_dinfo._normSub[cid]:0; double NA = _dinfo._numMeans[cid]; Chunk c = chks[cid+_dinfo._cats]; double b = scale*_beta[numOff+cid]; if(c.isSparseZero()){ int nvals = c.getSparseDoubles(vals,ids,NA); for(int i = 0; i < nvals; ++i) etas[ids[i]] += vals[i] * b; } else if(c.isSparseNA()){ int nvals = c.getSparseDoubles(vals,ids,NA); for(int i = 0; i < nvals; ++i) etas[ids[i]] += (vals[i] - off) * b; } else { c.getDoubles(vals,0,vals.length,NA); for(int i = 0; i < vals.length; ++i) etas[i] += (vals[i] - off) * b; } } } private final void computeNumericGrads(Chunk [] chks, double [] etas, double [] vals, int [] ids) { int numOff = _dinfo.numStart(); for(int cid = 0; cid < _dinfo._nums; ++cid){ double NA = _dinfo._numMeans[cid]; Chunk c = chks[cid+_dinfo._cats]; double scale = _dinfo._normMul == null?1:_dinfo._normMul[cid]; if(c.isSparseZero()){ double g = 0; int nVals = c.getSparseDoubles(vals,ids,NA); for(int i = 0; i < nVals; ++i) g += vals[i]*scale*etas[ids[i]]; _gradient[numOff+cid] = g; } else if(c.isSparseNA()){ double off = _dinfo._normSub == null?0:_dinfo._normSub[cid]; double g = 0; int nVals = c.getSparseDoubles(vals,ids,NA); for(int i = 0; i < nVals; ++i) g += (vals[i]-off)*scale*etas[ids[i]]; _gradient[numOff+cid] = g; } else { double off = _dinfo._normSub == null?0:_dinfo._normSub[cid]; c.getDoubles(vals,0,vals.length,NA); double g = 0; for(int i = 0; i < vals.length; ++i) g += (vals[i]-off)*scale*etas[i]; _gradient[numOff+cid] = g; } } } public void map(Chunk [] chks) { _gradient = MemoryManager.malloc8d(_beta.length); Chunk response = chks[chks.length-1]; Chunk weights = _dinfo._weights?chks[_dinfo.weightChunkId()]:new C0DChunk(1,response._len); double [] ws = weights.getDoubles(MemoryManager.malloc8d(weights._len),0,weights._len); double [] ys = response.getDoubles(MemoryManager.malloc8d(weights._len),0,response._len); double [] etas = MemoryManager.malloc8d(response._len); if(_dinfo._offset) chks[_dinfo.offsetChunkId()].getDoubles(etas,0,etas.length); double sparseOffset = 0; int numStart = _dinfo.numStart(); if(_dinfo._normSub != null) for(int i = 0; i < _dinfo._nums; ++i) if(chks[_dinfo._cats + i].isSparseZero()) sparseOffset -= _beta[numStart + i]*_dinfo._normSub[i]*_dinfo._normMul[i]; ArrayUtils.add(etas,sparseOffset + _beta[_beta.length-1]); double [] vals = MemoryManager.malloc8d(response._len); int [] ids = MemoryManager.malloc4(response._len); computeCategoricalEtas(chks,etas,vals,ids); computeNumericEtas(chks,etas,vals,ids); computeGradientMultipliers(etas,ys,ws); // walk the chunks again, add to the gradient computeCategoricalGrads(chks,etas,vals,ids); computeNumericGrads(chks,etas,vals,ids); // add intercept _gradient[_gradient.length-1] = ArrayUtils.sum(etas); if(_dinfo._normSub != null) { double icpt = _gradient[_gradient.length-1]; for(int i = 0; i < _dinfo._nums; ++i) { if(chks[_dinfo._cats+i].isSparseZero()) { double d = _dinfo._normSub[i] * _dinfo._normMul[i]; _gradient[numStart + i] -= d * icpt; } } } } @Override public final void reduce(GLMGradientTask gmgt){ ArrayUtils.add(_gradient,gmgt._gradient); _likelihood += gmgt._likelihood; } @Override public final void postGlobal(){ ArrayUtils.mult(_gradient,_reg); for(int j = 0; j < _beta.length - 1; ++j) _gradient[j] += _currentLambda * _beta[j]; } } static class GLMGenericGradientTask extends GLMGradientTask { private final GLMWeightsFun _glmf; public GLMGenericGradientTask(Key jobKey, DataInfo dinfo, GLMParameters parms, double lambda, double[] beta) { super(jobKey, dinfo, parms._obj_reg, lambda, beta); _glmf = new GLMWeightsFun(parms); } @Override protected void computeGradientMultipliers(double [] es, double [] ys, double [] ws){ double l = 0; for(int i = 0; i < es.length; ++i) { if (Double.isNaN(ys[i]) || ws[i] == 0) { es[i] = 0; } else { double mu = _glmf.linkInv(es[i]); l += ws[i] * _glmf.likelihood(ys[i], mu); double var = _glmf.variance(mu); if (var < 1e-6) var = 1e-6; es[i] = ws[i] * (mu - ys[i]) / (var * _glmf.linkDeriv(mu)); } } _likelihood = l; } } static class GLMPoissonGradientTask extends GLMGradientTask { private final GLMWeightsFun _glmf; public GLMPoissonGradientTask(Key jobKey, DataInfo dinfo, GLMParameters parms, double lambda, double[] beta) { super(jobKey, dinfo, parms._obj_reg, lambda, beta); _glmf = new GLMWeightsFun(parms); } @Override protected void computeGradientMultipliers(double [] es, double [] ys, double [] ws){ double l = 0; for(int i = 0; i < es.length; ++i) { if (Double.isNaN(ys[i]) || ws[i] == 0) { es[i] = 0; } else { double eta = es[i]; double mu = Math.exp(eta); double yr = ys[i]; double diff = mu - yr; l += ws[i] * (yr == 0?mu:yr*Math.log(yr/mu) + diff); es[i] = ws[i]*diff; } } _likelihood = 2*l; } } static class GLMQuasiBinomialGradientTask extends GLMGradientTask { private final GLMWeightsFun _glmf; public GLMQuasiBinomialGradientTask(Key jobKey, DataInfo dinfo, GLMParameters parms, double lambda, double[] beta) { super(jobKey, dinfo, parms._obj_reg, lambda, beta); _glmf = new GLMWeightsFun(parms); } @Override protected void computeGradientMultipliers(double [] es, double [] ys, double [] ws){ double l = 0; for(int i = 0; i < es.length; ++i){ double p = _glmf.linkInv(es[i]); if(p == 0) p = 1e-15; if(p == 1) p = 1 - 1e-15; es[i] = -ws[i]*(ys[i]-p); l += ys[i]*Math.log(p) + (1-ys[i])*Math.log(1-p); } _likelihood = -l; } } static class GLMBinomialGradientTask extends GLMGradientTask { public GLMBinomialGradientTask(Key jobKey, DataInfo dinfo, GLMParameters parms, double lambda, double [] beta) { super(jobKey,dinfo,parms._obj_reg,lambda,beta); assert parms._family == Family.binomial && parms._link == Link.logit; } @Override protected void computeGradientMultipliers(double[] es, double[] ys, double[] ws) { for(int i = 0; i < es.length; ++i) { if(Double.isNaN(ys[i]) || ws[i] == 0){es[i] = 0; continue;} double e = es[i], w = ws[i]; double yr = ys[i]; double ym = 1.0 / (Math.exp(-e) + 1.0); if(ym != yr) _likelihood += w*((MathUtils.y_log_y(yr, ym)) + MathUtils.y_log_y(1 - yr, 1 - ym)); es[i] = ws[i] * (ym - yr); } } } static class GLMGaussianGradientTask extends GLMGradientTask { public GLMGaussianGradientTask(Key jobKey, DataInfo dinfo, GLMParameters parms, double lambda, double [] beta) { super(jobKey,dinfo,parms._obj_reg,lambda,beta); assert parms._family == Family.gaussian && parms._link == Link.identity; } @Override protected void computeGradientMultipliers(double[] es, double[] ys, double[] ws) { for(int i = 0; i < es.length; ++i) { double w = ws[i]; if(w == 0 || Double.isNaN(ys[i])){ es[i] = 0; continue; } double e = es[i], y = ys[i]; double d = (e-y); double wd = w*d; _likelihood += wd*d; es[i] = wd; } } } static class GLMMultinomialGradientTask extends MRTask<GLMMultinomialGradientTask> { final double [][] _beta; final transient double _currentLambda; final transient double _reg; private double [][] _gradient; double _likelihood; Job _job; final boolean _sparse; final DataInfo _dinfo; /** * * @param job * @param dinfo * @param lambda * @param beta coefficients as 2D array [P][K] * @param reg */ public GLMMultinomialGradientTask(Job job, DataInfo dinfo, double lambda, double[][] beta, double reg) { _currentLambda = lambda; _reg = reg; // need to flip the beta _beta = new double[beta[0].length][beta.length]; for(int i = 0; i < _beta.length; ++i) for(int j = 0; j < _beta[i].length; ++j) _beta[i][j] = beta[j][i]; _job = job; _sparse = FrameUtils.sparseRatio(dinfo._adaptedFrame) < .125; _dinfo = dinfo; if(_dinfo._offset) throw H2O.unimpl(); } private final void computeCategoricalEtas(Chunk [] chks, double [][] etas, double [] vals, int [] ids) { // categoricals for(int cid = 0; cid < _dinfo._cats; ++cid){ Chunk c = chks[cid]; if(c.isSparseZero()) { int nvals = c.getSparseDoubles(vals,ids,-1); for(int i = 0; i < nvals; ++i){ int id = _dinfo.getCategoricalId(cid,(int)vals[i]); if(id >=0)ArrayUtils.add(etas[ids[i]],_beta[id]); } } else { c.getIntegers(ids, 0, c._len,-1); for(int i = 0; i < ids.length; ++i){ int id = _dinfo.getCategoricalId(cid,ids[i]); if(id >=0) ArrayUtils.add(etas[i],_beta[id]); } } } } private final void computeCategoricalGrads(Chunk [] chks, double [][] etas, double [] vals, int [] ids) { // categoricals for(int cid = 0; cid < _dinfo._cats; ++cid){ Chunk c = chks[cid]; if(c.isSparseZero()) { int nvals = c.getSparseDoubles(vals,ids,-1); for(int i = 0; i < nvals; ++i){ int id = _dinfo.getCategoricalId(cid,(int)vals[i]); if(id >=0) ArrayUtils.add(_gradient[id],etas[ids[i]]); } } else { c.getIntegers(ids, 0, c._len,-1); for(int i = 0; i < ids.length; ++i){ int id = _dinfo.getCategoricalId(cid,ids[i]); if(id >=0) ArrayUtils.add(_gradient[id],etas[i]); } } } } private final void computeNumericEtas(Chunk [] chks, double [][] etas, double [] vals, int [] ids) { int numOff = _dinfo.numStart(); for(int cid = 0; cid < _dinfo._nums; ++cid){ double [] b = _beta[numOff+cid]; double scale = _dinfo._normMul != null?_dinfo._normMul[cid]:1; double NA = _dinfo._numMeans[cid]; Chunk c = chks[cid+_dinfo._cats]; if(c.isSparseZero() || c.isSparseNA()){ int nvals = c.getSparseDoubles(vals,ids,NA); for(int i = 0; i < nvals; ++i) { double d = vals[i]*scale; ArrayUtils.wadd(etas[ids[i]],b,d); } } else { c.getDoubles(vals,0,vals.length,NA); double off = _dinfo._normSub != null?_dinfo._normSub[cid]:0; for(int i = 0; i < vals.length; ++i) { double d = (vals[i] - off) * scale; ArrayUtils.wadd(etas[i],b,d); } } } } private final void computeNumericGrads(Chunk [] chks, double [][] etas, double [] vals, int [] ids) { int numOff = _dinfo.numStart(); for(int cid = 0; cid < _dinfo._nums; ++cid){ double [] g = _gradient[numOff + cid]; double NA = _dinfo._numMeans[cid]; Chunk c = chks[cid+_dinfo._cats]; double scale = _dinfo._normMul == null?1:_dinfo._normMul[cid]; if(c.isSparseZero() || c.isSparseNA()){ int nVals = c.getSparseDoubles(vals,ids,NA); for (int i = 0; i < nVals; ++i) ArrayUtils.wadd(g,etas[ids[i]],vals[i] * scale); } else { double off = _dinfo._normSub == null?0:_dinfo._normSub[cid]; c.getDoubles(vals,0,vals.length,NA); for(int i = 0; i < vals.length; ++i) ArrayUtils.wadd(g,etas[i],(vals[i] - off) * scale); } } } final void computeGradientMultipliers(double [][] etas, double [] ys, double [] ws){ int K = _beta[0].length; double [] exps = new double[K+1]; for(int i = 0; i < etas.length; ++i) { double w = ws[i]; if(w == 0){ Arrays.fill(etas[i],0); continue; } int y = (int) ys[i]; double logSumExp = computeMultinomialEtas(etas[i], exps); _likelihood -= w * (etas[i][y] - logSumExp); for (int c = 0; c < K; ++c) etas[i][c] = w * (exps[c + 1] - (y == c ? 1 : 0)); } } @Override public void map(Chunk[] chks) { if(_job != null && _job.stop_requested()) throw new Job.JobCancelledException(); int numStart = _dinfo.numStart(); int K = _beta[0].length;// number of classes int P = _beta.length; // number of predictors (+ intercept) int M = chks[0]._len; // number of rows in this chunk of data _gradient = new double[P][K]; double [][] etas = new double[M][K]; double[] offsets = new double[K]; for(int k = 0; k < K; ++k) offsets[k] = _beta[P-1][k]; // intercept // sparse offset + intercept if(_dinfo._normSub != null) { for(int i = 0; i < _dinfo._nums; ++i) if(chks[_dinfo._cats + i].isSparseZero()) ArrayUtils.wadd(offsets,_beta[numStart + i], -_dinfo._normSub[i]*_dinfo._normMul[i]); } for (int i = 0; i < chks[0]._len; ++i) System.arraycopy(offsets, 0, etas[i], 0, K); Chunk response = chks[_dinfo.responseChunkId(0)]; double [] ws = MemoryManager.malloc8d(M); if(_dinfo._weights) ws = chks[_dinfo.weightChunkId()].getDoubles(ws,0,M); else Arrays.fill(ws,1); chks = Arrays.copyOf(chks,chks.length-1-(_dinfo._weights?1:0)); double [] vals = MemoryManager.malloc8d(M); int [] ids = MemoryManager.malloc4(M); computeCategoricalEtas(chks,etas,vals,ids); computeNumericEtas(chks,etas,vals,ids); computeGradientMultipliers(etas,response.getDoubles(vals,0,M),ws); computeCategoricalGrads(chks,etas,vals,ids); computeNumericGrads(chks,etas,vals,ids); double [] g = _gradient[P-1]; // add intercept for(int i = 0; i < etas.length; ++i) ArrayUtils.add(g,etas[i]); if(_dinfo._normSub != null) { double [] icpt = _gradient[P-1]; for(int i = 0; i < _dinfo._normSub.length; ++i) { if(chks[_dinfo._cats+i].isSparseZero()) ArrayUtils.wadd(_gradient[numStart+i],icpt,-_dinfo._normSub[i]*_dinfo._normMul[i]); } } } @Override public void reduce(GLMMultinomialGradientTask gmgt){ if(_gradient != gmgt._gradient) ArrayUtils.add(_gradient,gmgt._gradient); _likelihood += gmgt._likelihood; } @Override public void postGlobal(){ ArrayUtils.mult(_gradient, _reg); int P = _beta.length; // add l2 penalty for(int c = 0; c < P-1; ++c) for(int j = 0; j < _beta[0].length; ++j) _gradient[c][j] += _currentLambda * _beta[c][j]; } public double [] gradient(){ double [] res = MemoryManager.malloc8d(_gradient.length*_gradient[0].length); int P = _gradient.length; for(int k = 0; k < _gradient[0].length; ++k) for(int i = 0; i < _gradient.length; ++i) res[k*P + i] = _gradient[i][k]; return res; } } // public static class GLMCoordinateDescentTask extends MRTask<GLMCoordinateDescentTask> { // final double [] _betaUpdate; // final double [] _beta; // final double _xOldSub; // final double _xOldMul; // final double _xNewSub; // final double _xNewMul; // // double [] _xy; // // public GLMCoordinateDescentTask(double [] betaUpdate, double [] beta, double xOldSub, double xOldMul, double xNewSub, double xNewMul) { // _betaUpdate = betaUpdate; // _beta = beta; // _xOldSub = xOldSub; // _xOldMul = xOldMul; // _xNewSub = xNewSub; // _xNewMul = xNewMul; // } // // public void map(Chunk [] chks) { // Chunk xOld = chks[0]; // Chunk xNew = chks[1]; // if(xNew.vec().isCategorical()){ // _xy = MemoryManager.malloc8d(xNew.vec().domain().length); // } else // _xy = new double[1]; // Chunk eta = chks[2]; // Chunk weights = chks[3]; // Chunk res = chks[4]; // for(int i = 0; i < eta._len; ++i) { // double w = weights.atd(i); // double e = eta.atd(i); // if(_betaUpdate != null) { // if (xOld.vec().isCategorical()) { // int cid = (int) xOld.at8(i); // e = +_betaUpdate[cid]; // } else // e += _betaUpdate[0] * (xOld.atd(i) - _xOldSub) * _xOldMul; // eta.set(i, e); // } // int cid = 0; // double x = w; // if(xNew.vec().isCategorical()) { // cid = (int) xNew.at8(i); // e -= _beta[cid]; // } else { // x = (xNew.atd(i) - _xNewSub) * _xNewMul; // e -= _beta[0] * x; // x *= w; // } // _xy[cid] += x * (res.atd(i) - e); // } // } // @Override public void reduce(GLMCoordinateDescentTask t) { // ArrayUtils.add(_xy, t._xy); // } // } // /** // * Compute initial solution for multinomial problem (Simple weighted LR with all weights = 1/4) // */ // public static final class GLMMultinomialInitTsk extends MRTask<GLMMultinomialInitTsk> { // double [] _mu; // DataInfo _dinfo; // Gram _gram; // double [][] _xy; // // @Override public void map(Chunk [] chks) { // Rows rows = _dinfo.rows(chks); // _gram = new Gram(_dinfo); // _xy = new double[_mu.length][_dinfo.fullN()+1]; // int numStart = _dinfo.numStart(); // double [] ds = new double[_mu.length]; // for(int i = 0; i < ds.length; ++i) // ds[i] = 1.0/(_mu[i] * (1-_mu[i])); // for(int i = 0; i < rows._nrows; ++i) { // Row r = rows.row(i); // double y = r.response(0); // _gram.addRow(r,.25); // for(int c = 0; c < _mu.length; ++c) { // double iY = y == c?1:0; // double z = (y-_mu[c]) * ds[i]; // for(int j = 0; j < r.nBins; ++j) // _xy[c][r.binIds[j]] += z; // for(int j = 0; j < r.nNums; ++j){ // int id = r.numIds == null?(j + numStart):r.numIds[j]; // double val = r.numVals[j]; // _xy[c][id] += z*val; // } // } // } // } // @Override public void reduce(){ // // } // } /** * Task to compute t(X) %*% W %*% X and t(X) %*% W %*% y */ public static class LSTask extends FrameTask2<LSTask> { public double[] _xy; public Gram _gram; final int numStart; public LSTask(H2OCountedCompleter cmp, DataInfo dinfo, Key jobKey) { super(cmp, dinfo, jobKey); numStart = _dinfo.numStart(); } @Override public void chunkInit() { _gram = new Gram(_dinfo.fullN(), _dinfo.largestCat(), _dinfo.numNums(), _dinfo._cats, true); _xy = MemoryManager.malloc8d(_dinfo.fullN() + 1); } @Override protected void processRow(Row r) { double wz = r.weight * (r.response(0) - r.offset); for (int i = 0; i < r.nBins; ++i) { _xy[r.binIds[i]] += wz; } for (int i = 0; i < r.nNums; ++i) { int id = r.numIds == null ? (i + numStart) : r.numIds[i]; double val = r.numVals[i]; _xy[id] += wz * val; } if (_dinfo._intercept) _xy[_xy.length - 1] += wz; _gram.addRow(r, r.weight); } @Override public void reduce(LSTask lst) { ArrayUtils.add(_xy, lst._xy); _gram.add(lst._gram); } @Override public void postGlobal() { if (_sparse && _dinfo._normSub != null) { // need to adjust gram for missing centering! int ns = _dinfo.numStart(); int interceptIdx = _xy.length - 1; double[] interceptRow = _gram._xx[interceptIdx - _gram._diagN]; double nobs = interceptRow[interceptRow.length - 1]; // weighted _nobs for (int i = ns; i < _dinfo.fullN(); ++i) { double iMean = _dinfo._normSub[i - ns] * _dinfo._normMul[i - ns]; for (int j = 0; j < ns; ++j) _gram._xx[i - _gram._diagN][j] -= interceptRow[j] * iMean; for (int j = ns; j <= i; ++j) { double jMean = _dinfo._normSub[j - ns] * _dinfo._normMul[j - ns]; _gram._xx[i - _gram._diagN][j] -= interceptRow[i] * jMean + interceptRow[j] * iMean - nobs * iMean * jMean; } } if (_dinfo._intercept) { // do the intercept row for (int j = ns; j < _dinfo.fullN(); ++j) interceptRow[j] -= nobs * _dinfo._normSub[j - ns] * _dinfo._normMul[j - ns]; } // and the xy vec as well for (int i = ns; i < _dinfo.fullN(); ++i) { _xy[i] -= _xy[_xy.length - 1] * _dinfo._normSub[i - ns] * _dinfo._normMul[i - ns]; } } } } public static class GLMWLSTask extends LSTask { final GLMWeightsFun _glmw; final double [] _beta; double _sparseOffset; public GLMWLSTask(H2OCountedCompleter cmp, DataInfo dinfo, Key jobKey, GLMWeightsFun glmw, double [] beta) { super(cmp, dinfo, jobKey); _glmw = glmw; _beta = beta; } private transient GLMWeights _ws; @Override public void chunkInit(){ super.chunkInit(); _ws = new GLMWeights(); } @Override public void processRow(Row r) { // update weights double eta = r.innerProduct(_beta) + _sparseOffset; _glmw.computeWeights(r.response(0),eta,r.weight,r.offset,_ws); r.weight = _ws.w; r.offset = 0; // handled offset here r.setResponse(0,_ws.z); super.processRow(r); } } public static class GLMMultinomialWLSTask extends LSTask { final GLMWeightsFun _glmw; final double [] _beta; double _sparseOffset; public GLMMultinomialWLSTask(H2OCountedCompleter cmp, DataInfo dinfo, Key jobKey, GLMWeightsFun glmw, double [] beta) { super(cmp, dinfo, jobKey); _glmw = glmw; _beta = beta; } private transient GLMWeights _ws; @Override public void chunkInit(){ super.chunkInit(); _ws = new GLMWeights(); } @Override public void processRow(Row r) { // update weights double eta = r.innerProduct(_beta) + _sparseOffset; _glmw.computeWeights(r.response(0),eta,r.weight,r.offset,_ws); r.weight = _ws.w; r.offset = 0; // handled offset here r.setResponse(0,_ws.z); super.processRow(r); } } public static class GLMIterationTaskMultinomial extends FrameTask2<GLMIterationTaskMultinomial> { final int _c; final double [] _beta; // current beta to compute update of predictors for the current class double [] _xy; Gram _gram; transient double _sparseOffset; public GLMIterationTaskMultinomial(DataInfo dinfo, Key jobKey, double [] beta, int c) { super(null, dinfo, jobKey); _beta = beta; _c = c; } @Override public void chunkInit(){ // initialize _gram = new Gram(_dinfo.fullN(), _dinfo.largestCat(), _dinfo.numNums(), _dinfo._cats,true); _xy = MemoryManager.malloc8d(_dinfo.fullN()+1); // + 1 is for intercept if(_sparse) _sparseOffset = GLM.sparseOffset(_beta,_dinfo); } @Override protected void processRow(Row r) { double y = r.response(0); double sumExp = r.response(1); double maxRow = r.response(2); int numStart = _dinfo.numStart(); y = (y == _c)?1:0; double eta = r.innerProduct(_beta) + _sparseOffset; if(eta > maxRow) maxRow = eta; double etaExp = Math.exp(eta-maxRow); sumExp += etaExp; double mu = (etaExp == Double.POSITIVE_INFINITY?1:(etaExp / sumExp)); if(mu < 1e-16) mu = 1e-16;// double d = mu*(1-mu); double wz = r.weight * (eta * d + (y-mu)); double w = r.weight * d; for(int i = 0; i < r.nBins; ++i) { _xy[r.binIds[i]] += wz; } for(int i = 0; i < r.nNums; ++i){ int id = r.numIds == null?(i + numStart):r.numIds[i]; double val = r.numVals[i]; _xy[id] += wz*val; } if(_dinfo._intercept) _xy[_xy.length-1] += wz; _gram.addRow(r, w); } @Override public void reduce(GLMIterationTaskMultinomial glmt) { ArrayUtils.add(_xy,glmt._xy); _gram.add(glmt._gram); } } public static class GLMMultinomialUpdate extends FrameTask2<GLMMultinomialUpdate> { private final double [][] _beta; // updated value of beta private final int _c; private transient double [] _sparseOffsets; private transient double [] _etas; public GLMMultinomialUpdate(DataInfo dinfo, Key jobKey, double [] beta, int c) { super(null, dinfo, jobKey); _beta = ArrayUtils.convertTo2DMatrix(beta,dinfo.fullN()+1); _c = c; } @Override public void chunkInit(){ // initialize _sparseOffsets = MemoryManager.malloc8d(_beta.length); _etas = MemoryManager.malloc8d(_beta.length); if(_sparse) { for(int i = 0; i < _beta.length; ++i) _sparseOffsets[i] = GLM.sparseOffset(_beta[i],_dinfo); } } private transient Chunk _sumExpChunk; private transient Chunk _maxRowChunk; @Override public void map(Chunk [] chks) { _sumExpChunk = chks[chks.length-2]; _maxRowChunk = chks[chks.length-1]; super.map(chks); } @Override protected void processRow(Row r) { double maxrow = 0; for(int i = 0; i < _beta.length; ++i) { _etas[i] = r.innerProduct(_beta[i]) + _sparseOffsets[i]; if(_etas[i] > maxrow) maxrow = _etas[i]; } double sumExp = 0; for(int i = 0; i < _beta.length; ++i) // if(i != _c) sumExp += Math.exp(_etas[i]-maxrow); _maxRowChunk.set(r.cid,_etas[_c]); _sumExpChunk.set(r.cid,Math.exp(_etas[_c]-maxrow)/sumExp); } } /** * One iteration of glm, computes weighted gram matrix and t(x)*y vector and t(y)*y scalar. * * @author tomasnykodym */ public static class GLMIterationTask extends FrameTask2<GLMIterationTask> { final GLMWeightsFun _glmf; double [][]_beta_multinomial; double []_beta; protected Gram _gram; // wx%*%x double [] _xy; // wx^t%*%z, double _yy; final double [] _ymu; long _nobs; public double _likelihood; private transient GLMWeights _w; // final double _lambda; double wsum, wsumu; double _sumsqe; int _c = -1; public GLMIterationTask(Key jobKey, DataInfo dinfo, GLMWeightsFun glmw,double [] beta) { super(null,dinfo,jobKey); _beta = beta; _ymu = null; _glmf = glmw; } public GLMIterationTask(Key jobKey, DataInfo dinfo, GLMWeightsFun glmw, double [] beta, int c) { super(null,dinfo,jobKey); _beta = beta; _ymu = null; _glmf = glmw; _c = c; } @Override public boolean handlesSparseData(){return true;} transient private double _sparseOffset; @Override public void chunkInit() { // initialize _gram = new Gram(_dinfo.fullN(), _dinfo.largestCat(), _dinfo.numNums(), _dinfo._cats,true); _xy = MemoryManager.malloc8d(_dinfo.fullN()+1); // + 1 is for intercept if(_sparse) _sparseOffset = GLM.sparseOffset(_beta,_dinfo); _w = new GLMWeights(); } @Override protected void processRow(Row r) { // called for every row in the chunk if(r.isBad() || r.weight == 0) return; ++_nobs; double y = r.response(0); _yy += y*y; final int numStart = _dinfo.numStart(); double wz,w; if(_glmf._family == Family.multinomial) { y = (y == _c)?1:0; double mu = r.response(1); double eta = r.response(2); double d = mu*(1-mu); if(d == 0) d = 1e-10; wz = r.weight * (eta * d + (y-mu)); w = r.weight * d; } else if(_beta != null) { _glmf.computeWeights(y, r.innerProduct(_beta) + _sparseOffset, r.offset, r.weight, _w); w = _w.w; wz = w*_w.z; _likelihood += _w.l; } else { w = r.weight; wz = w*(y - r.offset); } wsum+=w; wsumu+=r.weight; // just add the user observation weight for the scaling. for(int i = 0; i < r.nBins; ++i) _xy[r.binIds[i]] += wz; for(int i = 0; i < r.nNums; ++i){ int id = r.numIds == null?(i + numStart):r.numIds[i]; double val = r.numVals[i]; _xy[id] += wz*val; } if(_dinfo._intercept) _xy[_xy.length-1] += wz; _gram.addRow(r,w); } @Override public void chunkDone(){adjustForSparseStandardizedZeros();} @Override public void reduce(GLMIterationTask git){ ArrayUtils.add(_xy, git._xy); _gram.add(git._gram); _nobs += git._nobs; wsum += git.wsum; wsumu += git.wsumu; _likelihood += git._likelihood; _sumsqe += git._sumsqe; _yy += git._yy; super.reduce(git); } private void adjustForSparseStandardizedZeros(){ if(_sparse && _dinfo._normSub != null) { // need to adjust gram for missing centering! int ns = _dinfo.numStart(); int interceptIdx = _xy.length - 1; double[] interceptRow = _gram._xx[interceptIdx - _gram._diagN]; double nobs = interceptRow[interceptRow.length - 1]; // weighted _nobs for (int i = ns; i < _dinfo.fullN(); ++i) { double iMean = _dinfo._normSub[i - ns] * _dinfo._normMul[i - ns]; for (int j = 0; j < ns; ++j) _gram._xx[i - _gram._diagN][j] -= interceptRow[j] * iMean; for (int j = ns; j <= i; ++j) { double jMean = _dinfo._normSub[j - ns] * _dinfo._normMul[j - ns]; _gram._xx[i - _gram._diagN][j] -= interceptRow[i] * jMean + interceptRow[j] * iMean - nobs * iMean * jMean; } } if (_dinfo._intercept) { // do the intercept row for (int j = ns; j < _dinfo.fullN(); ++j) interceptRow[j] -= nobs * _dinfo._normSub[j - ns] * _dinfo._normMul[j - ns]; } // and the xy vec as well for (int i = ns; i < _dinfo.fullN(); ++i) { _xy[i] -= _xy[_xy.length - 1] * _dinfo._normSub[i - ns] * _dinfo._normMul[i - ns]; } } } public boolean hasNaNsOrInf() { return ArrayUtils.hasNaNsOrInfs(_xy) || _gram.hasNaNsOrInfs(); } } /* public static class GLMCoordinateDescentTask extends FrameTask2<GLMCoordinateDescentTask> { final GLMParameters _params; final double [] _betaw; final double [] _betacd; public double [] _temp; public double [] _varsum; public double _ws=0; long _nobs; public double _likelihoods; public GLMCoordinateDescentTask(Key jobKey, DataInfo dinfo, double lambda, GLMModel.GLMParameters glm, boolean validate, double [] betaw, double [] betacd, double ymu, Vec rowFilter, H2OCountedCompleter cmp) { super(cmp,dinfo,jobKey,rowFilter); _params = glm; _betaw = betaw; _betacd = betacd; } @Override public boolean handlesSparseData(){return false;} @Override public void chunkInit() { _temp=MemoryManager.malloc8d(_dinfo.fullN()+1); // using h2o memory manager _varsum=MemoryManager.malloc8d(_dinfo.fullN()); } @Override protected void processRow(Row r) { if(r.bad || r.weight == 0) return; ++_nobs; final double y = r.response(0); assert ((_params._family != Family.gamma) || y > 0) : "illegal response column, y must be > 0 for family=Gamma."; assert ((_params._family != Family.binomial) || (0 <= y && y <= 1)) : "illegal response column, y must be <0,1> for family=Binomial. got " + y; final double w, eta, mu, var, z; final int numStart = _dinfo.numStart(); double d = 1; if( _params._family == Family.gaussian && _params._link == Link.identity){ w = r.weight; z = y - r.offset; mu = 0; eta = mu; } else { eta = r.innerProduct(_betaw); mu = _params.linkInv(eta + r.offset); var = Math.max(1e-6, _params.variance(mu)); // avoid numerical problems with 0 variance d = _params.linkDeriv(mu); z = eta + (y-mu)*d; w = r.weight/(var*d*d); } _likelihoods += r.weight*_params.likelihood(y,mu); assert w >= 0|| Double.isNaN(w) : "invalid weight " + w; // allow NaNs - can occur if line-search is needed! _ws+=w; double xb = r.innerProduct(_betacd); for(int i = 0; i < r.nBins; ++i) { // go over cat variables _temp[r.binIds[i]] += (z - xb + _betacd[r.binIds[i]]) *w; _varsum[r.binIds[i]] += w ; } for(int i = 0; i < r.nNums; ++i){ // num vars int id = r.numIds == null?(i + numStart):r.numIds[i]; _temp[id] += (z- xb + r.get(id)*_betacd[id] )*(r.get(id)*w); _varsum[id] += w*r.get(id)*r.get(id); } _temp[_temp.length-1] += w*(z-r.innerProduct(_betacd)+_betacd[_betacd.length-1]); } @Override public void reduce(GLMCoordinateDescentTask git){ // adding contribution of all the chunks ArrayUtils.add(_temp, git._temp); ArrayUtils.add(_varsum, git._varsum); _ws+= git._ws; _nobs += git._nobs; _likelihoods += git._likelihoods; super.reduce(git); } } */ public static class GLMCoordinateDescentTaskSeqNaive extends MRTask<GLMCoordinateDescentTaskSeqNaive> { public double [] _normMulold; public double [] _normSubold; public double [] _normMulnew; public double [] _normSubnew; final double [] _betaold; // current old value at j final double [] _betanew; // global beta @ j-1 that was just updated. final int [] _catLvls_new; // sorted list of indices of active levels only for one categorical variable final int [] _catLvls_old; public double [] _temp; boolean _skipFirst; long _nobs; int _cat_num; // 1: c and p categorical, 2:c numeric and p categorical, 3:c and p numeric , 4: c categorical and previous num. boolean _interceptnew; boolean _interceptold; public GLMCoordinateDescentTaskSeqNaive(boolean interceptold, boolean interceptnew, int cat_num , double [] betaold, double [] betanew, int [] catLvlsold, int [] catLvlsnew, double [] normMulold, double [] normSubold, double [] normMulnew, double [] normSubnew, boolean skipFirst ) { // pass it norm mul and norm sup - in the weights already done. norm //mul and mean will be null without standardization. _normMulold = normMulold; _normSubold = normSubold; _normMulnew = normMulnew; _normSubnew = normSubnew; _cat_num = cat_num; _betaold = betaold; _betanew = betanew; _interceptold = interceptold; // if updating beta_1, then the intercept is the previous column _interceptnew = interceptnew; // if currently updating the intercept value _catLvls_old = catLvlsold; _catLvls_new = catLvlsnew; _skipFirst = skipFirst; } @Override public void map(Chunk [] chunks) { int cnt = 0; Chunk wChunk = chunks[cnt++]; Chunk zChunk = chunks[cnt++]; Chunk ztildaChunk = chunks[cnt++]; Chunk xpChunk=null, xChunk=null; _temp = new double[_betaold.length]; if (_interceptnew) { xChunk = new C0DChunk(1,chunks[0]._len); xpChunk = chunks[cnt++]; } else { if (_interceptold) { xChunk = chunks[cnt++]; xpChunk = new C0DChunk(1,chunks[0]._len); } else { xChunk = chunks[cnt++]; xpChunk = chunks[cnt++]; } } // For each observation, add corresponding term to temp - or if categorical variable only add the term corresponding to its active level and the active level // of the most recently updated variable before it (if also cat). If for an obs the active level corresponds to an inactive column, we just dont want to include // it - same if inactive level in most recently updated var. so set these to zero ( Wont be updating a betaj which is inactive) . for (int i = 0; i < chunks[0]._len; ++i) { // going over all the rows in the chunk double betanew = 0; // most recently updated prev variable double betaold = 0; // old value of current variable being updated double w = wChunk.atd(i); if(w == 0) continue; ++_nobs; int observation_level = 0, observation_level_p = 0; double val = 1, valp = 1; if(_cat_num == 1) { observation_level = (int) xChunk.at8(i); // only need to change one temp value per observation. if (_catLvls_old != null) observation_level = Arrays.binarySearch(_catLvls_old, observation_level); observation_level_p = (int) xpChunk.at8(i); // both cat if (_catLvls_new != null) observation_level_p = Arrays.binarySearch(_catLvls_new, observation_level_p); if(_skipFirst){ observation_level--; observation_level_p--; } } else if(_cat_num == 2){ val = xChunk.atd(i); // current num and previous cat if (_normMulold != null && _normSubold != null) val = (val - _normSubold[0]) * _normMulold[0]; observation_level_p = (int) xpChunk.at8(i); if (_catLvls_new != null) observation_level_p = Arrays.binarySearch(_catLvls_new, observation_level_p); if(_skipFirst){ observation_level_p--; } } else if(_cat_num == 3){ val = xChunk.atd(i); // both num if (_normMulold != null && _normSubold != null) val = (val - _normSubold[0]) * _normMulold[0]; valp = xpChunk.atd(i); if (_normMulnew != null && _normSubnew != null) valp = (valp - _normSubnew[0]) * _normMulnew[0]; } else if(_cat_num == 4){ observation_level = (int) xChunk.at8(i); // current cat if (_catLvls_old != null) observation_level = Arrays.binarySearch(_catLvls_old, observation_level); // search to see if this level is active. if(_skipFirst){ observation_level--; } valp = xpChunk.atd(i); //prev numeric if (_normMulnew != null && _normSubnew != null) valp = (valp - _normSubnew[0]) * _normMulnew[0]; } if(observation_level >= 0) betaold = _betaold[observation_level]; if(observation_level_p >= 0) betanew = _betanew[observation_level_p]; if (_interceptnew) { ztildaChunk.set(i, ztildaChunk.atd(i) - betaold + valp * betanew); // _temp[0] += w * (zChunk.atd(i) - ztildaChunk.atd(i)); } else { ztildaChunk.set(i, ztildaChunk.atd(i) - val * betaold + valp * betanew); if(observation_level >=0 ) // if the active level for that observation is an "inactive column" don't want to add contribution to temp for that observation _temp[observation_level] += w * val * (zChunk.atd(i) - ztildaChunk.atd(i)); } } } @Override public void reduce(GLMCoordinateDescentTaskSeqNaive git){ ArrayUtils.add(_temp, git._temp); _nobs += git._nobs; super.reduce(git); } } public static class GLMCoordinateDescentTaskSeqIntercept extends MRTask<GLMCoordinateDescentTaskSeqIntercept> { final double [] _betaold; public double _temp; DataInfo _dinfo; public GLMCoordinateDescentTaskSeqIntercept( double [] betaold, DataInfo dinfo) { _betaold = betaold; _dinfo = dinfo; } @Override public void map(Chunk [] chunks) { int cnt = 0; Chunk wChunk = chunks[cnt++]; Chunk zChunk = chunks[cnt++]; Chunk filterChunk = chunks[cnt++]; Row r = _dinfo.newDenseRow(); for(int i = 0; i < chunks[0]._len; ++i) { if(filterChunk.atd(i)==1) continue; _dinfo.extractDenseRow(chunks,i,r); _temp = wChunk.at8(i)* (zChunk.atd(i)- r.innerProduct(_betaold) ); } } @Override public void reduce(GLMCoordinateDescentTaskSeqIntercept git){ _temp+= git._temp; super.reduce(git); } } public static class GLMGenerateWeightsTask extends MRTask<GLMGenerateWeightsTask> { final GLMParameters _params; final double [] _betaw; double [] denums; double wsum,wsumu; DataInfo _dinfo; double _likelihood; public GLMGenerateWeightsTask(Key jobKey, DataInfo dinfo, GLMModel.GLMParameters glm, double[] betaw) { _params = glm; _betaw = betaw; _dinfo = dinfo; } @Override public void map(Chunk [] chunks) { Chunk wChunk = chunks[chunks.length-3]; Chunk zChunk = chunks[chunks.length-2]; Chunk zTilda = chunks[chunks.length-1]; chunks = Arrays.copyOf(chunks,chunks.length-3); denums = new double[_dinfo.fullN()+1]; // full N is expanded variables with categories Row r = _dinfo.newDenseRow(); for(int i = 0; i < chunks[0]._len; ++i) { _dinfo.extractDenseRow(chunks,i,r); if (r.isBad() || r.weight == 0) { wChunk.set(i,0); zChunk.set(i,0); zTilda.set(i,0); continue; } final double y = r.response(0); assert ((_params._family != Family.gamma) || y > 0) : "illegal response column, y must be > 0 for family=Gamma."; assert ((_params._family != Family.binomial) || (0 <= y && y <= 1)) : "illegal response column, y must be <0,1> for family=Binomial. got " + y; final double w, eta, mu, var, z; final int numStart = _dinfo.numStart(); double d = 1; eta = r.innerProduct(_betaw); if (_params._family == Family.gaussian && _params._link == Link.identity) { w = r.weight; z = y - r.offset; mu = 0; } else { mu = _params.linkInv(eta + r.offset); var = Math.max(1e-6, _params.variance(mu)); // avoid numerical problems with 0 variance d = _params.linkDeriv(mu); z = eta + (y - mu) * d; w = r.weight / (var * d * d); } _likelihood += _params.likelihood(y,mu); zTilda.set(i,eta-_betaw[_betaw.length-1]); assert w >= 0 || Double.isNaN(w) : "invalid weight " + w; // allow NaNs - can occur if line-search is needed! wChunk.set(i,w); zChunk.set(i,z); wsum+=w; wsumu+=r.weight; // just add the user observation weight for the scaling. for(int j = 0; j < r.nBins; ++j) { // go over cat variables denums[r.binIds[j]] += w; // binIds skips the zeros. } for(int j = 0; j < r.nNums; ++j){ // num vars int id = r.numIds == null?(j + numStart):r.numIds[j]; denums[id]+= w*r.get(id)*r.get(id); } } } @Override public void reduce(GLMGenerateWeightsTask git){ // adding contribution of all the chunks ArrayUtils.add(denums, git.denums); wsum+=git.wsum; wsumu += git.wsumu; _likelihood += git._likelihood; super.reduce(git); } } public static class ComputeSETsk extends FrameTask2<ComputeSETsk> { // final double [] _betaOld; final double [] _betaNew; double _sumsqe; double _wsum; public ComputeSETsk(H2OCountedCompleter cmp, DataInfo dinfo, Key jobKey, /*, double [] betaOld,*/ double [] betaNew, GLMParameters parms) { super(cmp, dinfo, jobKey); // _betaOld = betaOld; _glmf = new GLMWeightsFun(parms); _betaNew = betaNew; } transient double _sparseOffsetOld = 0; transient double _sparseOffsetNew = 0; final GLMWeightsFun _glmf; transient GLMWeights _glmw; @Override public void chunkInit(){ if(_sparse) { // _sparseOffsetOld = GLM.sparseOffset(_betaNew, _dinfo); _sparseOffsetNew = GLM.sparseOffset(_betaNew, _dinfo); } _glmw = new GLMWeights(); } @Override protected void processRow(Row r) { double z = r.response(0) - r.offset; double w = r.weight; if(_glmf._family != Family.gaussian) { // double etaOld = r.innerProduct(_betaOld) + _sparseOffsetOld; double etaOld = r.innerProduct(_betaNew) + _sparseOffsetNew; _glmf.computeWeights(r.response(0),etaOld,r.offset,r.weight,_glmw); z = _glmw.z; w = _glmw.w; } double eta = r.innerProduct(_betaNew) + _sparseOffsetNew; // double mu = _parms.linkInv(eta); _sumsqe += w*(eta - z)*(eta - z); _wsum += Math.sqrt(w); } @Override public void reduce(ComputeSETsk c){_sumsqe += c._sumsqe; _wsum += c._wsum;} } }