/* Copyright (C) 2002 Univ. of Massachusetts Amherst, Computer Science Dept. This file is part of "MALLET" (MAchine Learning for LanguagE Toolkit). http://www.cs.umass.edu/~mccallum/mallet This software is provided under the terms of the Common Public License, version 1.0, as published by http://www.opensource.org. For further information, see the file `LICENSE' included with this distribution. */ package cc.mallet.types; import java.io.Serializable; import java.util.Arrays; import java.util.HashMap; import java.util.Iterator; import java.util.List; import java.util.ListIterator; import java.util.NoSuchElementException; import java.util.Random; import cc.mallet.types.Instance; import cc.mallet.types.InstanceList; /** * An implementation of InstanceList that logically combines multiple instance * lists so that they appear as one list without copying the original lists. * This is useful when running cross-validation experiments with large data sets. * * Any operation that would modify the size of the list is not supported. * * @see InstanceList * * @author Michael Bond <a href="mailto:mbond@gmail.com">mbond@gmail.com</a> */ public class MultiInstanceList extends InstanceList { private static final long serialVersionUID = -7177121200386974657L; private static final InstanceList[] EMPTY_ARRAY = new InstanceList[0]; private final InstanceList[] lists; private final int[] offsets; private class MultiIterator implements Iterator<Instance>, Serializable { private static final long serialVersionUID = -2446488635289279133L; int index = 0; Iterator<Instance> i; public MultiIterator () { this.i = lists.length == 0 ? null : lists[0].iterator (); } public boolean hasNext () { if (this.index < lists.length) { if (this.i.hasNext ()) { return true; } for (int tmpIndex = this.index + 1; tmpIndex < lists.length; tmpIndex++) { final InstanceList list = lists[tmpIndex]; if (list != null && lists[tmpIndex].size () > 0) { return true; } } } return false; } public Instance next () { if (this.index < lists.length) { if (this.i.hasNext ()) { return this.i.next (); } for (this.index++; this.index < lists.length; this.index++) { final InstanceList list = lists[this.index]; if (list != null && lists[this.index].size () > 0) { this.i = lists[this.index].iterator (); return this.i.next (); } } } throw new NoSuchElementException (); } public void remove () { throw new UnsupportedOperationException (); } } /** * Constructs a {@link MultiInstanceList} with an array of {@link InstanceList} * * @param lists Array of {@link InstanceList} to logically combine */ public MultiInstanceList (InstanceList[] lists) { super (lists[0].getPipe ()); this.lists = lists; this.offsets = new int[lists.length]; // build index offsets array and populate instance weights int offset = 0; for (int i = 0; i < lists.length; i++) { this.offsets[i] = offset; offset += lists[i].size (); if (lists[i].instWeights != null) { if (this.instWeights == null) { this.instWeights = new HashMap<Instance,Double> (); } this.instWeights.putAll (instWeights); } } } /** * Constructs a {@link MultiInstanceList} with a {@link List} of {@link InstanceList} * * @param lists List of {@link InstanceList} to logically combine */ public MultiInstanceList (List<InstanceList> lists) { this (lists.toArray (EMPTY_ARRAY)); } public boolean add (Instance instance, double instanceWeight) { throw new UnsupportedOperationException (); } public boolean add (Instance instance) { throw new UnsupportedOperationException (); } public void add (int index, Instance element) { throw new UnsupportedOperationException (); } public void clear () { throw new UnsupportedOperationException (); } public Object clone () { InstanceList[] newLists = new InstanceList[this.lists.length]; for (int i = 0; i < this.lists.length; i++) { newLists[i] = (InstanceList) this.lists[i].clone (); } return new MultiInstanceList (newLists); } public InstanceList cloneEmpty () { InstanceList[] newLists = new InstanceList[this.lists.length]; for (int i = 0; i < this.lists.length; i++) { newLists[i] = this.lists[i].cloneEmpty (); } return new MultiInstanceList (newLists); } protected InstanceList cloneEmptyInto (InstanceList ret) { throw new UnsupportedOperationException (); } public boolean contains (Object elem) { for (InstanceList list : this.lists) { if (list != null && list.contains (elem)) { return true; } } return false; } public CrossValidationIterator crossValidationIterator (int nfolds, int seed) { throw new UnsupportedOperationException (); } public CrossValidationIterator crossValidationIterator (int nfolds) { throw new UnsupportedOperationException (); } public void ensureCapacity (int minCapacity) { throw new UnsupportedOperationException (); } public boolean equals (Object o) { if (o instanceof MultiInstanceList) { MultiInstanceList tmp = (MultiInstanceList) o; if (tmp.lists.length != this.lists.length) { return false; } for (int i = 0; i < this.lists.length; i++) { InstanceList thisList = this.lists[i]; InstanceList tmpList = tmp.lists[i]; if (thisList == null && tmpList != null) { return false; } else if (!thisList.equals (tmpList)) { return false; } } return true; } return false; } //@Override public Instance get (int index) { int i = getOffsetIndex (index); return this.lists[i].get (index - this.offsets[i]); } /** * Gets the index into the offsets array for the given element index * * @param index Index of element * @return Index into offsets, will always give a valid index */ private int getOffsetIndex (int index) { int i = Arrays.binarySearch (this.offsets, index); if (i < 0) { i = (-i) - 2; } return i; } //@Override public int hashCode () { int hashCode = 1; for (InstanceList list : this.lists) { hashCode = 31*hashCode + (list==null ? 0 : list.hashCode ()); } return hashCode; } //@Override public int indexOf (Object elem) { for (int i = 0; i < this.lists.length; i++) { int index = this.lists[i].indexOf (elem); if (index != -1) { return index + this.offsets[i]; } } return -1; } //@Override public boolean isEmpty () { for (InstanceList list : this.lists) { if (list != null && !list.isEmpty ()) { return true; } } return false; } //@Override public Iterator<Instance> iterator () { return new MultiIterator (); } //@Override public int lastIndexOf (Object elem) { for (int i = this.lists.length - 1; i >= 0; i--) { int index = this.lists[i].lastIndexOf (elem); if (index != -1) { return index + this.offsets[i]; } } return -1; } //@Override public ListIterator<Instance> listIterator () { throw new UnsupportedOperationException (); } //@Override public ListIterator<Instance> listIterator (int index) { throw new UnsupportedOperationException (); } //@Override public boolean remove (Instance instance) { throw new UnsupportedOperationException (); } //@Override public Instance remove (int index) { throw new UnsupportedOperationException (); } //@Override public boolean remove (Object o) { throw new UnsupportedOperationException (); } //@Override public Instance set (int index, Instance instance) { int i = getOffsetIndex (index); return this.lists[i].set (index - this.offsets[i], instance); } //@Override public void setInstance (int index, Instance instance) { int i = getOffsetIndex (index); this.lists[i].setInstance (index - this.offsets[i], instance); } //@Override public void setInstanceWeight (Instance instance, double weight) { super.setInstanceWeight (instance, weight); int index = indexOf (instance); int i = getOffsetIndex (index); this.lists[i].setInstanceWeight (index - this.offsets[i], weight); } //@Override public InstanceList shallowClone () { InstanceList[] newLists = new InstanceList[this.lists.length]; for (int i = 0; i < this.lists.length; i++) { newLists[i] = this.lists[i].shallowClone (); } return new MultiInstanceList (newLists); } //@Override public void shuffle (Random r) { throw new UnsupportedOperationException (); } //@Override public int size () { int size = 0; for (InstanceList list : this.lists) { if (list != null) { size += list.size (); } } return size; } //@Override public InstanceList[] split (double[] proportions) { throw new UnsupportedOperationException (); } //@Override public InstanceList[] split (Random r, double[] proportions) { throw new UnsupportedOperationException (); } //@Override public InstanceList[] splitInOrder (double[] proportions) { throw new UnsupportedOperationException (); } //@Override public InstanceList[] splitInOrder (int[] counts) { throw new UnsupportedOperationException (); } //@Override public InstanceList[] splitInTwoByModulo (int m) { throw new UnsupportedOperationException (); } //@Override public InstanceList subList (double proportion) { throw new UnsupportedOperationException (); } //@Override public InstanceList subList (int start, int end) { throw new UnsupportedOperationException (); } //@Override public Object[] toArray () { Object[] result = new Object[size ()]; int i = 0; for (InstanceList list : this.lists) { if (list != null) { for (Instance instance : list) { result[i++] = instance; } } } return result; } @SuppressWarnings("unchecked") //@Override public <T> T[] toArray (T[] a) { int size = size (); if (a.length < size) { a = (T[])java.lang.reflect.Array .newInstance (a.getClass ().getComponentType (), size); } Object[] result = a; int i = 0; for (InstanceList list : this.lists) { if (list != null) { for (Instance instance : list) { result[i++] = instance; } } } if (a.length > size) a[size] = null; return a; } //@Override public String toString () { StringBuffer buf = new StringBuffer (); buf.append ("["); for (int listIndex = 0; listIndex < this.lists.length; listIndex++) { if (this.lists[listIndex] != null) { Iterator<Instance> i = this.lists[listIndex].iterator (); boolean hasNext = i.hasNext (); while (hasNext) { Instance o = i.next (); buf.append (String.valueOf (o)); hasNext = i.hasNext (); if (listIndex < this.lists.length || hasNext) { buf.append (", "); } } } } buf.append ("]"); return buf.toString (); } //@Override public void trimToSize () { for (InstanceList list : this.lists) { list.trimToSize (); } } }