/* Copyright (C) 2002 Univ. of Massachusetts Amherst, Computer Science Dept.
This file is part of "MALLET" (MAchine Learning for LanguagE Toolkit).
http://www.cs.umass.edu/~mccallum/mallet
This software is provided under the terms of the Common Public License,
version 1.0, as published by http://www.opensource.org. For further
information, see the file `LICENSE' included with this distribution. */
package edu.nd.nina.types;
import java.nio.channels.Pipe;
import java.util.ArrayList;
import java.util.List;
import edu.nd.nina.util.PropertyList;
/**
* A machine learning "example" to be used in training, testing or performance
* of various machine learning algorithms.
*
* <p>
* An instance contains four generic fields of predefined name: "data",
* "target", "name", and "source". "Data" holds the data represented `by the
* instance, "target" is often a label associated with the instance, "name" is a
* short identifying name for the instance (such as a filename), and "source" is
* human-readable sourceinformation, (such as the original text).
*
* <p>
* Each field has no predefined type, and may change type as the instance is
* processed. For example, the data field may start off being a string that
* represents a file name and then be processed by a {@link cc.mallet.pipe.Pipe}
* into a CharSequence representing the contents of the file, and eventually to
* a feature vector holding indices into an {@link cc.mallet.types.Alphabet}
* holding words found in the file. It is up to each pipe which fields in the
* Instance it modifies; the most common case is that the pipe modifies the data
* field.
*
* <p>
* Generally speaking, there are two modes of operation for Instances. (1) An
* instance gets created and passed through a Pipe, and the resulting
* data/target/name/source fields are used. This is generally done for training
* instances. (2) An instance gets created with raw values in its slots, then
* different users of the instance call newPipedCopy() with their respective
* different pipes. This might be done for test instances at "performance" time.
*
* <p>
* Rather than store an {@link cc.mallet.types.Alphabet} in the Instance, we
* obtain it through the Pipe instance variable, because the Pipe also indicates
* where the data came from and how to interpret the Alphabet.
*
* <p>
* Instances can be made immutable if locked. Although unlocked Instances are
* mutable, typically the only code that changes the values in the four slots is
* inside Pipes.
*
* <p>
* Note that constructing an instance with a pipe argument means
* "Construct the instance and then run it through the pipe".
* {@link cc.mallet.types.InstanceList} uses this method when adding instances
* through a pipeInputIterator.
*
* @see Pipe
* @see Alphabet
* @see InstanceList
* @author Andrew McCallum <a
* href="mailto:mccallum@cs.umass.edu">mccallum@cs.umass.edu</a>
*/
public class Instance implements AlphabetCarrying, Cloneable {
//private static Logger logger = Logger.getLogger(Instance.class.getName());
// The input data in digested form, e.g. a FeatureVector
protected Object data;
// The output data in digested form, e.g. a Label
protected Object target;
// A readable name of the source, e.g. for ML error analysis
protected Object name;
/*
* The input in a reproducable form, e.g. enabling re-print of string w/ POS
* tags, usually without target information, e.g. an un-annotated
* RegionList.
*/
protected Object source;
PropertyList properties = null;
boolean locked = false;
/**
* In certain unusual circumstances, you might want to create an Instance
* without sending it through a pipe.
*/
public Instance(Object data, Object target, Object name, Object source) {
this.data = data;
this.target = target;
this.name = name;
this.source = source;
}
public Object getData() {
return data;
}
public Object getTarget() {
return target;
}
public Object getName() {
return name;
}
public Object getSource() {
return source;
}
public Alphabet getDataAlphabet() {
if (data instanceof AlphabetCarrying)
return ((AlphabetCarrying) data).getAlphabet();
else
return null;
}
public Alphabet getTargetAlphabet() {
if (target instanceof AlphabetCarrying)
return ((AlphabetCarrying) target).getAlphabet();
else
return null;
}
public Alphabet getAlphabet() {
return getDataAlphabet();
}
public List<Alphabet> getAlphabets() {
List<Alphabet> x = new ArrayList<Alphabet>();
x.add(getDataAlphabet());
x.add(getTargetAlphabet());
return x;
}
public boolean alphabetsMatch(AlphabetCarrying object) {
List<Alphabet> oas = object.getAlphabets();
return oas.size() == 2 && oas.get(0).equals(getDataAlphabet())
&& oas.get(1).equals(getDataAlphabet());
}
public boolean isLocked() {
return locked;
}
public void lock() {
locked = true;
}
public void unLock() {
locked = false;
}
public Labeling getLabeling() {
if (target == null || target instanceof Labeling)
return (Labeling) target;
throw new IllegalStateException("Target is not a Labeling; it is a "
+ target.getClass().getName());
}
public void setData(Object d) {
if (!locked)
data = d;
else
throw new IllegalStateException("Instance is locked.");
}
public void setTarget(Object t) {
if (!locked)
target = t;
else
throw new IllegalStateException("Instance is locked.");
}
public void setLabeling(Labeling l) {
// This test isn't strictly necessary, but might catch some typos.
assert (target == null || target instanceof Labeling);
if (!locked)
target = l;
else
throw new IllegalStateException("Instance is locked.");
}
public void setName(Object n) {
if (!locked)
name = n;
else
throw new IllegalStateException("Instance is locked.");
}
public void setSource(Object s) {
if (!locked)
source = s;
else
throw new IllegalStateException("Instance is locked.");
}
public void clearSource() {
source = null;
}
public Instance shallowCopy() {
Instance ret = new Instance(data, target, name, source);
ret.locked = locked;
ret.properties = properties;
return ret;
}
public Object clone() {
return shallowCopy();
}
// Setting and getting properties
public void setProperty(String key, Object value) {
properties = PropertyList.add(key, value, properties);
}
public void setNumericProperty(String key, double value) {
properties = PropertyList.add(key, value, properties);
}
@Deprecated
public PropertyList getProperties() {
return properties;
}
@Deprecated
public void setPropertyList(PropertyList p) {
if (!locked)
properties = p;
else
throw new IllegalStateException("Instance is locked.");
}
public Object getProperty(String key) {
return properties == null ? null : properties.lookupObject(key);
}
public double getNumericProperty(String key) {
return (properties == null ? 0.0 : properties.lookupNumber(key));
}
public boolean hasProperty(String key) {
return (properties == null ? false : properties.hasProperty(key));
}
}