/* * File: TermLengthFilter.java * Authors: Justin Basilico * Company: Sandia National Laboratories * Project: Cognitive Foundry * * Copyright July 27, 2009, Sandia Corporation. * Under the terms of Contract DE-AC04-94AL85000, there is a non-exclusive * license for use of this work by or on behalf of the U.S. Government. Export * of this program may require a license from the United States Government. * See CopyrightHistory.txt for complete details. * */ package gov.sandia.cognition.text.term.filter; import gov.sandia.cognition.annotation.PublicationReference; import gov.sandia.cognition.annotation.PublicationType; import gov.sandia.cognition.text.term.TermOccurrence; /** * Implements a filter based on the length of a term. The length is computed * from the name of the term. * * @author Justin Basilico * @since 3.0 */ public class TermLengthFilter extends AbstractSingleTermFilter { /** The default minimum length is {@value}. */ public static final int DEFAULT_MINIMUM_LENGTH = 3; /** The default maximum length is {@value}. Based on the maximum known * length of non-technical and non-coined English words. */ @PublicationReference( author="Wikipedia", title="Longest word in English", year=2009, type=PublicationType.WebPage, url="http://en.wikipedia.org/wiki/Longest_word_in_English") public static final int DEFAULT_MAXIMUM_LENGTH = 28; /** The minimum allowed length. Inclusive. A null value indicates no * minimum. Must be non-negative. */ protected Integer minimumLength; /** The maximum allowed length. Inclusive. A null value indicates no * maximum. Must be non-negative. */ protected Integer maximumLength; /** * Creates a new {@code TermLengthFilter} with default minimum and * maximum values. */ public TermLengthFilter() { this(DEFAULT_MINIMUM_LENGTH, DEFAULT_MAXIMUM_LENGTH); } /** * Creates a new {@code TermLengthFilter} with given minimum and maximum * values. * * @param minimumLength * The minimum allowed term length. * @param maximumLength * The maximum allowed term length. */ public TermLengthFilter( final Integer minimumLength, final Integer maximumLength) { super(); this.setMinimumLength(minimumLength); this.setMaximumLength(maximumLength); } public TermOccurrence filterTerm( final TermOccurrence occurrence) { // Get the length of the name of the term. final int length = occurrence.getTerm().getName().toString().length(); if (this.minimumLength != null && length < this.minimumLength) { // Smaller than the minimum length. return null; } else if (this.maximumLength != null && length > this.maximumLength) { // Larger than the maximum length. return null; } else { // Within the term bounds. return occurrence; } } /** * Gets the minimum length allowed for a term (inclusive). A null value * means no minimum. * * @return * The minimum length allowed for a term. */ public Integer getMinimumLength() { return this.minimumLength; } /** * Gets the minimum length allowed for a term (inclusive). A null value * means no minimum. * * @param minimumLength * The minimum length allowed for a term. Must be non-negative. */ public void setMinimumLength( final Integer minimumLength) { if (minimumLength != null && minimumLength < 0) { throw new IllegalArgumentException( "minimumLength must be non-negative"); } this.minimumLength = minimumLength; } /** * Gets the maximum length allowed for a term (inclusive). A null value * means no maximum. * * @return * The maximum length allowed for a term. */ public Integer getMaximumLength() { return this.maximumLength; } /** * Gets the maximum length allowed for a term (inclusive). A null value * means no maximum. * * @param maximumLength * The maximum length allowed for a term. Must be positive. */ public void setMaximumLength( final Integer maximumLength) { if (maximumLength != null && maximumLength <= 0) { throw new IllegalArgumentException( "maximumLength must be non-negative"); } this.maximumLength = maximumLength; } }