/*
* Copyright (C) 2015 Artificial Intelligence
* Laboratory @ University of Udine.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*/
package it.uniud.ailab.dcore.persistence;
import com.fasterxml.jackson.annotation.JsonIgnore;
import it.uniud.ailab.dcore.annotation.Annotation;
import it.uniud.ailab.dcore.annotation.annotations.FeatureAnnotation;
import it.uniud.ailab.dcore.utils.ListUtils;
import java.util.ArrayList;
import java.util.List;
/**
* The Gram is the data structure in which all the data concerning a NGram is
* stored.
*
* @author Dario De Nart
* @author Marco Basaldella
*/
//@JsonIgnoreProperties({"surfaces,tokenLists,appaerances,features"})
public class Keyphrase extends Gram {
/**
* The different list of words forming the surface of the gram.
*/
private List<List<Token>> tokenLists;
/**
* The different string representation of the gram.
*/
private final List<String> surfaces;
/**
* The concept Units in which the gram appears.
*/
private List<DocumentComponent> appareances;
/**
* The identifier for a Keyphrase object.
*/
public static final String KEYPHRASE = "Keyphrase";
/**
* Instantiated an n-gram. Usually, the surface should be simply the the
* concatenation of the text of the tokens. The signature can be used for
* comparison, so be sure to generate different signatures for n-grams
* that are different in your domain. For example, you can use the sequence
* of the stems of the tokens, so that n-grams with the same stemmed form
* are considered equal.
*
*
* @param sequence the tokens that form the gram
* @param identifier unique identifier of the gram.
* @param surface the pretty-printed string representation of the gram
*/
public Keyphrase(String identifier, List<Token> sequence, String surface) {
super(identifier, sequence, surface,KEYPHRASE);
tokenLists = new ArrayList<>();
tokenLists.add(sequence);
surfaces = new ArrayList<>();
surfaces.add(surface);
appareances = new ArrayList<>();
}
/**
* Adds a surface to the n-gram. Duplicates are permitted.
*
* @param surface the surface to add
* @param tokens the tokens that form the surface
*/
@Override
public void addSurface(String surface,List<Token> tokens) {
surfaces.add(surface);
tokenLists.add(tokens);
}
/**
* Adds a group of surfaces to the n-gram. Duplicates are permitted.
*
* @param surfaces the surface to add
* @param tokenLists the tokens that form the surface
*/
@Override
public void addSurfaces(List<String> surfaces,List<List<Token>> tokenLists) {
if (surfaces.size() != tokenLists.size())
throw new IllegalArgumentException(
"Mismatching size of surfaces and token lists.");
this.surfaces.addAll(surfaces);
// note: do not use addAll. The references are lost if you don't copy
for (List<Token> t : tokenLists) {
this.tokenLists.add(new ArrayList<Token>(t));
}
}
/**
* The tokens that form the most common surface of the gram.
*
* @return the tokens of the surface of the gram.
*/
@Override
public List<Token> getTokens() {
return tokenLists.get(surfaces.indexOf(ListUtils.mostCommon(surfaces)));
}
/**
* Returns all the possible lists of tokens that form the gram.
*
* @return all the possible lists of tokens that form the gram.
*/
@Override
@JsonIgnore
public List<List<Token>> getTokenLists() {
return tokenLists;
}
/**
* The human-readable form of the gram. This is the most common surface
* between all the surfaces associated with the gram; if there are more than
* one, the first one that has been added to the gram is selected.
*
* @return the human-readable form of the gram.
*/
@Override
public String getSurface() {
return ListUtils.mostCommon(surfaces);
}
/**
* Returns all the surfaces of the gram. Note: may contain
* duplicates.
*
* @return all the surfaces of the gram.
*/
@Override
@JsonIgnore
public List<String> getSurfaces() {
return surfaces;
}
/**
* The identifier of the gram. Please note that it is possible that two
* grams with different surface or tokens may have the same identifier,
* based on the policy of the class that generated the gram.
*
* For example, "italian" and "Italy" may have the same identifier, because
* the identifier has been generated using the same stem "ital". Otherwise,
* the identifier may be the same link on an external ontology: in this
* case, both words may have been associated with the entity "Italy".
*
*
* @return the signature of the gram.
*/
@Override
public String getIdentifier() {
return super.getIdentifier();
}
// <editor-fold desc="Feature and annotation Management">
/**
* Adds a feature to the gram.
*
* @param feature the identifier of the feature
* @param value the value of the feature
*/
public void putFeature(String feature, double value) {
addAnnotation(new FeatureAnnotation(feature,value));
}
/**
* Adds a feature to the gram,
*
* @param f the feature to add.
*/
public void putFeature(FeatureAnnotation f) {
addAnnotation(f);
}
/**
* Check if the gram has been annotated by the annotator specified via input
* string.
*
* @param featureName the name of the feature to search
* @return true if the gram has the feature; false otherwise
*/
public boolean hasFeature(String featureName) {
return this.hasAnnotation(featureName);
}
/**
* Gets the feature generated by the annotator specified via input string.
*
* Please note that this method makes no difference between a feature that
* has been assigned with value 0 and a feature that has not been assigned
* to the gram, since in both cases the value 0 will be returned.
*
* @param featureName the name of the feature to search
* @return the value of the feature. Returns 0 if the feature is not in the
* gram.
*/
public double getFeature(String featureName) {
// null check; if the feature is not specified, we assume it's 0.
if (!this.hasAnnotation(featureName)) {
return 0;
}
return ((FeatureAnnotation) getAnnotation(featureName))
.getScore();
}
/**
* Returns all the features associated with the gram.
*
* @return all the features associated with the gram.
*/
@JsonIgnore
public FeatureAnnotation[] getFeatures() {
List<FeatureAnnotation> features = new ArrayList<>();
for (Annotation ann : getAnnotations()) {
if (ann instanceof FeatureAnnotation)
features.add((FeatureAnnotation)ann);
}
return features.toArray(new FeatureAnnotation[features.size()]);
}
/**
* Sets the features of the gram, deleting the previous ones (if any).
*
* @param features the new features of the gram.
*/
public void setFeatures(FeatureAnnotation[] features) {
for (FeatureAnnotation f : features)
this.addAnnotation(f);
}
// </editor-fold>
// <editor-fold desc="Location Management">
/**
* Adds an appearance of the gram; in other words, adds the component in
* which the gram appears to the list of the appearances.
*
* @param component the component in which the gram appears
*/
@Override
public void addAppaerance(DocumentComponent component) {
appareances.add(component);
}
/**
* Gets all the components in which the gram appears.
*
* @return all the components in which the gram appears.
*/
@Override
public List<DocumentComponent> getAppaerances() {
return appareances;
}
// </editor-fold>
}