/* * Apache License * Version 2.0, January 2004 * http://www.apache.org/licenses/ * * Copyright 2013 Aurelian Tutuianu * Copyright 2014 Aurelian Tutuianu * Copyright 2015 Aurelian Tutuianu * Copyright 2016 Aurelian Tutuianu * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * */ package rapaio.data; import java.util.*; /** * Ordinal variables contains values for categorical observations where order of labels is important. * <p> * The domain of the definition is called levels and is given at construction time or can be changed latter. * <p> * This type of variable accepts two value representation: as labels and as indexes. * <p> * Label representation is the natural representation since in experiments * the nominal vectors are given as string values. * <p> * The index representation is learn based on the term levels and is used often for performance * reasons instead of label representation, where the actual label value does not matter. * <p> * Index values can be used to compare to values, however other numeric statistics such as mean * does not apply since there are meaningless. The reason why the index mean is meaningless is * that there are no guarantees that the difference between index i and i-1 is the same between * index i and i+1. Indexes specify only order, but not numerical quantities. * * @author <a href="mailto:padreati@yahoo.com>Aurelian Tutuianu</a> */ public final class Ordinal extends FactorBase { /** * Builds a new empty ordinal variable. * * @return new variable instance of ordinal type */ public static Ordinal empty() { return new Ordinal(); } /** * Builds a new ordinal variable with given levels and of give size filled with missing values. * * @param rows variable size * @param dict term levels * @return new variable instance of ordinal type */ public static Ordinal empty(int rows, String... dict) { return Ordinal.empty(rows, Arrays.asList(dict)); } /** * Builds a new ordinal variable with given levels and of given size filled with missing values. * * @param rows variable size * @param dict term levels * @return new variable instance of ordinal type */ public static Ordinal empty(int rows, Collection<String> dict) { Ordinal nominal = new Ordinal(); for (String next : dict) { if (nominal.dict.contains(next)) continue; nominal.dict.add(next); nominal.reverse.put(next, nominal.reverse.size()); } nominal.data = new int[rows]; nominal.rows = rows; return nominal; } private static final long serialVersionUID = 5438713835700406847L; private Ordinal() { super(); // set the missing value this.reverse = new HashMap<>(); this.reverse.put("?", 0); this.dict = new ArrayList<>(); this.dict.add("?"); data = new int[0]; rows = 0; } @Override public Ordinal withName(String name) { return (Ordinal) super.withName(name); } @Override public VarType type() { return VarType.ORDINAL; } @Override public void addRows(int rowCount) { grow(rows + rowCount); for (int i = 0; i < rowCount; i++) { data[rows + i] = 0; } rows += rowCount; } @Override public Var newInstance(int rows) { return Ordinal.empty(rows, levels()); } @Override public Ordinal solidCopy() { return (Ordinal) super.solidCopy(); } @Override public String toString() { return "Ordinal[name:" + name() + ", rowCount:" + rowCount() + "]"; } }