AdaGradRDAUDTF.java example

Explorer
incubator-hivemall-master
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
package hivemall.classifier;

import hivemall.common.LossFunctions;
import hivemall.model.FeatureValue;
import hivemall.model.IWeightValue;
import hivemall.model.WeightValue.WeightValueParamsF2;
import hivemall.utils.lang.Primitives;

import javax.annotation.Nonnull;

import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.Options;
import org.apache.hadoop.hive.ql.exec.Description;
import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;

@Description(name = "train_adagrad_rda",
        value = "_FUNC_(list<string|int|bigint> features, int label [, const string options])"
                + " - Returns a relation consists of <string|int|bigint feature, float weight>",
        extended = "Build a prediction model by Adagrad+RDA regularization binary classifier")
public final class AdaGradRDAUDTF extends BinaryOnlineClassifierUDTF {

    private float eta;
    private float lambda;
    private float scaling;

    @Override
    public StructObjectInspector initialize(ObjectInspector[] argOIs) throws UDFArgumentException {
        final int numArgs = argOIs.length;
        if (numArgs != 2 && numArgs != 3) {
            throw new UDFArgumentException(
                "_FUNC_ takes 2 or 3 arguments: List<Text|Int|BitInt> features, int label [, constant string options]");
        }

        StructObjectInspector oi = super.initialize(argOIs);
        model.configureParams(true, false, true);
        return oi;
    }

    @Override
    protected Options getOptions() {
        Options opts = super.getOptions();
        opts.addOption("eta", "eta0", true, "The learning rate \\eta [default 0.1]");
        opts.addOption("lambda", true, "lambda constant of RDA [default: 1E-6f]");
        opts.addOption("scale", true,
            "Internal scaling/descaling factor for cumulative weights [default: 100]");
        return opts;
    }

    @Override
    protected CommandLine processOptions(ObjectInspector[] argOIs) throws UDFArgumentException {
        CommandLine cl = super.processOptions(argOIs);
        if (cl == null) {
            this.eta = 0.1f;
            this.lambda = 1E-6f;
            this.scaling = 100f;
        } else {
            this.eta = Primitives.parseFloat(cl.getOptionValue("eta"), 0.1f);
            this.lambda = Primitives.parseFloat(cl.getOptionValue("lambda"), 1E-6f);
            this.scaling = Primitives.parseFloat(cl.getOptionValue("scale"), 100f);
        }
        return cl;
    }

    @Override
    protected void train(@Nonnull final FeatureValue[] features, final int label) {
        final float y = label > 0 ? 1.f : -1.f;

        float p = predict(features);
        float loss = LossFunctions.hingeLoss(p, y); // 1.0 - y * p        
        if (loss <= 0.f) { // max(0, 1 - y * p)
            return;
        }
        // subgradient => -y * W dot xi
        update(features, y, count);
    }

    protected void update(@Nonnull final FeatureValue[] features, final float y, final int t) {
        for (FeatureValue f : features) {// w[f] += y * x[f]
            if (f == null) {
                continue;
            }
            Object x = f.getFeature();
            float xi = f.getValueAsFloat();

            updateWeight(x, xi, y, t);
        }
    }

    protected void updateWeight(@Nonnull final Object x, final float xi, final float y,
            final float t) {
        final float gradient = -y * xi;
        final float scaled_gradient = gradient * scaling;

        float scaled_sum_sqgrad = 0.f;
        float scaled_sum_grad = 0.f;
        IWeightValue old = model.get(x);
        if (old != null) {
            scaled_sum_sqgrad = old.getSumOfSquaredGradients();
            scaled_sum_grad = old.getSumOfGradients();
        }
        scaled_sum_grad += scaled_gradient;
        scaled_sum_sqgrad += (scaled_gradient * scaled_gradient);

        float sum_grad = scaled_sum_grad * scaling;
        double sum_sqgrad = scaled_sum_sqgrad * scaling;

        // sign(u_{t,i})
        float sign = (sum_grad > 0.f) ? 1.f : -1.f;
        // |u_{t,i}|/t - \lambda
        float meansOfGradients = sign * sum_grad / t - lambda;
        if (meansOfGradients < 0.f) {
            // x_{t,i} = 0
            model.delete(x);
        } else {
            // x_{t,i} = -sign(u_{t,i}) * \frac{\eta t}{\sqrt{G_{t,ii}}}(|u_{t,i}|/t - \lambda)
            float weight = -1.f * sign * eta * t * meansOfGradients / (float) Math.sqrt(sum_sqgrad);
            IWeightValue new_w = new WeightValueParamsF2(weight, scaled_sum_sqgrad, scaled_sum_grad);
            model.set(x, new_w);
        }
    }
}