/**
* Portions Copyright 2001 Sun Microsystems, Inc.
* Portions Copyright 1999-2001 Language Technologies Institute,
* Carnegie Mellon University.
* All Rights Reserved. Use is subject to license terms.
*
* See the file "license.terms" for information on usage and
* redistribution of this file, and for a DISCLAIMER OF ALL
* WARRANTIES.
*/
package edu.cmu.sphinx.alignment.tokenizer;
import java.util.Iterator;
import edu.cmu.sphinx.alignment.Token;
/**
* Holds all the data for an utterance to be spoken. It is incrementally
* modified by various UtteranceProcessor implementations. An utterance
* contains a set of Features (essential a set of properties) and a set of
* Relations. A Relation is an ordered set of Item graphs. The utterance
* contains a set of features and implements FeatureSet so that applications
* can set/get features directly from the utterance. If a feature query is not
* found in the utterance feature set, the query is forwarded to the FeatureSet
* of the voice associated with the utterance.
*/
public class Utterance {
private FeatureSet features;
private FeatureSet relations;
/**
* Creates an utterance with the given set of tokenized text.
*
* @param tokenizer tokenizer to use for utterance.
*/
public Utterance(CharTokenizer tokenizer) {
features = new FeatureSet();
relations = new FeatureSet();
setTokenList(tokenizer);
}
/**
* Creates a new relation with the given name and adds it to this
* utterance.
*
* @param name the name of the new relation
*
* @return the newly created relation
*/
public Relation createRelation(String name) {
Relation relation = new Relation(name, this);
relations.setObject(name, relation);
return relation;
}
/**
* Retrieves a relation from this utterance.
*
* @param name the name of the Relation
*
* @return the relation or null if the relation is not found
*/
public Relation getRelation(String name) {
return (Relation) relations.getObject(name);
}
/**
* Determines if this utterance contains a relation with the given name.
*
* @param name the name of the relation of interest.
* @return if relation is present
*/
public boolean hasRelation(String name) {
return relations.isPresent(name);
}
/**
* Removes the named feature from this set of features.
*
* @param name the name of the feature of interest
*/
public void remove(String name) {
features.remove(name);
}
/**
* Convenience method that sets the named feature as an int.
*
* @param name the name of the feature
* @param value the value of the feature
*/
public void setInt(String name, int value) {
features.setInt(name, value);
}
/**
* Convenience method that sets the named feature as a float.
*
* @param name the name of the feature
* @param value the value of the feature
*/
public void setFloat(String name, float value) {
features.setFloat(name, value);
}
/**
* Convenience method that sets the named feature as a String.
*
* @param name the name of the feature
* @param value the value of the feature
*/
public void setString(String name, String value) {
features.setString(name, value);
}
/**
* Sets the named feature.
*
* @param name the name of the feature
* @param value the value of the feature
*/
public void setObject(String name, Object value) {
features.setObject(name, value);
}
/**
* Returns the Item in the given Relation associated with the given time.
*
* @param relation the name of the relation
* @param time the time
* @return the item
*/
public Item getItem(String relation, float time) {
String pathName = null;
if (relation.equals(Relation.WORD)) {
pathName = "R:SylStructure.parent.parent.R:Word";
} else if (relation.equals(Relation.TOKEN)) {
pathName = "R:SylStructure.parent.parent.R:Token.parent";
} else {
throw new IllegalArgumentException(
"Utterance.getItem(): relation cannot be " + relation);
}
PathExtractor path = new PathExtractor(pathName, false);
// get the Item in the Segment Relation with the given time
Item segmentItem = getItem(getRelation(relation), time);
if (segmentItem != null) {
return path.findItem(segmentItem);
} else {
return null;
}
}
private static Item getItem(Relation segmentRelation, float time) {
Item lastSegment = segmentRelation.getTail();
// If given time is closer to the front than the end, search from
// the front; otherwise, start search from end
// this might not be the best strategy though.
float lastSegmentEndTime = getSegmentEnd(lastSegment);
if (time < 0 || lastSegmentEndTime < time) {
return null;
} else if (lastSegmentEndTime - time > time) {
return findFromFront(segmentRelation, time);
} else {
return findFromEnd(segmentRelation, time);
}
}
private static Item findFromEnd(Relation segmentRelation, float time) {
Item item = segmentRelation.getTail();
while (item != null && getSegmentEnd(item) > time) {
item = item.getPrevious();
}
if (item != segmentRelation.getTail()) {
item = item.getNext();
}
return item;
}
private static Item findFromFront(Relation segmentRelation, float time) {
Item item = segmentRelation.getHead();
while (item != null && time > getSegmentEnd(item)) {
item = item.getNext();
}
return item;
}
private static float getSegmentEnd(Item segment) {
FeatureSet segmentFeatureSet = segment.getFeatures();
return segmentFeatureSet.getFloat("end");
}
/**
* Sets the token list for this utterance. Note that this could be
* optimized by turning the token list directly into the token relation.
*
* @param tokenList the tokenList
*
*/
private void setTokenList(Iterator<Token> tokenizer) {
Relation relation = createRelation(Relation.TOKEN);
while (tokenizer.hasNext()) {
Token token = tokenizer.next();
String tokenWord = token.getWord();
if (tokenWord != null && tokenWord.length() > 0) {
Item item = relation.appendItem();
FeatureSet featureSet = item.getFeatures();
featureSet.setString("name", tokenWord);
featureSet.setString("whitespace", token.getWhitespace());
featureSet.setString("prepunctuation",
token.getPrepunctuation());
featureSet.setString("punc", token.getPostpunctuation());
featureSet.setString("file_pos",
String.valueOf(token.getPosition()));
featureSet.setString("line_number",
String.valueOf(token.getLineNumber()));
}
}
}
}