/**
* AnalyzerBeans
* Copyright (C) 2014 Neopost - Customer Information Management
*
* This copyrighted material is made available to anyone wishing to use, modify,
* copy, or redistribute it subject to the terms and conditions of the GNU
* Lesser General Public License, as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
* or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License
* for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this distribution; if not, write to:
* Free Software Foundation, Inc.
* 51 Franklin Street, Fifth Floor
* Boston, MA 02110-1301 USA
*/
package org.eobjects.analyzer.beans.stringpattern;
import java.io.Serializable;
import java.text.DecimalFormatSymbols;
import java.util.EnumMap;
import java.util.EnumSet;
import java.util.LinkedList;
import java.util.List;
public class TokenizerConfiguration implements Serializable {
private static final long serialVersionUID = 1L;
private EnumSet<TokenType> _tokenTypes;
private EnumMap<TokenType, Boolean> _discriminateTokenLength;
private boolean _discriminateTextCase;
private boolean _discriminateWhiteSpaces;
private boolean _discriminateDecimalNumbers;
private boolean _discriminateNegativeNumbers;
private boolean _upperCaseExpandable;
private boolean _lowerCaseExpandable;
private Character _thousandsSeparator;
private Character _decimalSeparator;
private Character _minusSign;
private List<PredefinedTokenDefinition> _predefinedTokens = new LinkedList<PredefinedTokenDefinition>();
public TokenizerConfiguration() {
this(true);
}
public TokenizerConfiguration(boolean enableMixedTokens) {
this(enableMixedTokens, DecimalFormatSymbols.getInstance().getDecimalSeparator(), DecimalFormatSymbols.getInstance()
.getGroupingSeparator(), DecimalFormatSymbols.getInstance().getMinusSign());
}
public TokenizerConfiguration(boolean enableMixed, Character decimalSeparator, Character thousandsSeparator,
Character minusSign) {
_tokenTypes = EnumSet.allOf(TokenType.class);
if (!enableMixed) {
_tokenTypes.remove(TokenType.MIXED);
}
// set default values;
_discriminateTokenLength = new EnumMap<TokenType, Boolean>(TokenType.class);
_discriminateTokenLength.put(TokenType.TEXT, false);
_discriminateTokenLength.put(TokenType.NUMBER, false);
_discriminateTokenLength.put(TokenType.MIXED, false);
_discriminateTokenLength.put(TokenType.PREDEFINED, false);
_discriminateTokenLength.put(TokenType.WHITESPACE, true);
_discriminateTokenLength.put(TokenType.DELIM, true);
_discriminateTextCase = true;
_discriminateWhiteSpaces = true;
_discriminateDecimalNumbers = true;
_discriminateNegativeNumbers = false;
_upperCaseExpandable = false;
_lowerCaseExpandable = true;
_decimalSeparator = decimalSeparator;
_thousandsSeparator = thousandsSeparator;
_minusSign = minusSign;
}
/**
* Sets which token types are enabled
*/
public void setTokenTypes(EnumSet<TokenType> tokenTypes) {
_tokenTypes = tokenTypes;
}
/**
* Which token types are enabled
*/
public EnumSet<TokenType> getTokenTypes() {
return _tokenTypes;
}
/**
* Should tokens be discriminated (when matching) based on length. For
* example, if "hello" and "hi" should be matched, then length
* discrimination should be false. If only "hello" and "world", but not "hi"
* should be matched then length discrimination should be true.
*/
public EnumMap<TokenType, Boolean> getDiscriminateTokenLength() {
return _discriminateTokenLength;
}
/**
* Should tokens be discriminated (when matching) based on length. For
* example, if "hello" and "hi" should be matched, then length
* discrimination should be false. If only "hello" and "world", but not "hi"
* should be matched then length discrimination should be true.
*/
public boolean isDistriminateTokenLength(TokenType tokenType) {
Boolean discriminateTokenLength = _discriminateTokenLength.get(tokenType);
if (discriminateTokenLength == null) {
return false;
}
return discriminateTokenLength.booleanValue();
}
/**
* Sets which tokens should be discriminated (when matching) based on
* length. For example, if "hello" and "hi" should be matched, then length
* discrimination should be false. If only "hello" and "world", but not "hi"
* should be matched then length discrimination should be true.
*/
public void setDistriminateTokenLength(EnumMap<TokenType, Boolean> discriminateTokenLength) {
_discriminateTokenLength = discriminateTokenLength;
}
/**
* Sets which tokens should be discriminated (when matching) based on
* length. For example, if "hello" and "hi" should be matched, then length
* discrimination should be false. If only "hello" and "world", but not "hi"
* should be matched then length discrimination should be true.
*/
public void setDistriminateTokenLength(TokenType tokenType, boolean discriminateTokenLength) {
_discriminateTokenLength.put(tokenType, Boolean.valueOf(discriminateTokenLength));
}
/**
* Discriminate the case of characters in TEXT tokens
*/
public boolean isDiscriminateTextCase() {
return _discriminateTextCase;
}
/**
* Sets whether to discriminate the case of characters in TEXT tokens
*/
public void setDiscriminateTextCase(boolean discriminateTextCase) {
_discriminateTextCase = discriminateTextCase;
}
/**
* Discriminate the type of whitespaces (space, tab etc.)
*/
public boolean isDiscriminateWhiteSpaces() {
return _discriminateWhiteSpaces;
}
/**
* Sets whether to discriminate the type of whitespaces (space, tab etc.)
*/
public void setDiscriminateWhiteSpaces(boolean discriminateWhiteSpaces) {
_discriminateWhiteSpaces = discriminateWhiteSpaces;
}
public List<PredefinedTokenDefinition> getPredefinedTokens() {
return _predefinedTokens;
}
public void setPredefinedTokens(List<PredefinedTokenDefinition> predefinedTokens) {
_predefinedTokens = predefinedTokens;
}
/**
* Discriminate decimal numbers from integers when matching
*/
public boolean isDiscriminateDecimalNumbers() {
return _discriminateDecimalNumbers;
}
/**
* Sets whether to discriminate decimal numbers from integers when matching
*/
public void setDiscriminateDecimalNumbers(boolean discriminateDecimalNumbers) {
_discriminateDecimalNumbers = discriminateDecimalNumbers;
}
/**
* Characters to use for thousands separator in numbers (typically ',')
*/
public Character getThousandsSeparator() {
return _thousandsSeparator;
}
/**
* Sets the characters to use for thousands separator in numbers (typically
* ',')
*/
public void setThousandsSeparator(Character thousandSeparator) {
_thousandsSeparator = thousandSeparator;
}
/**
* Characters to use for decimal separation in numbers (typically '.')
*/
public Character getDecimalSeparator() {
return _decimalSeparator;
}
/**
* Sets the characters to use for decimal separation in numbers (typically
* '.')
*/
public void setDecimalSeparator(Character decimalSeparator) {
_decimalSeparator = decimalSeparator;
}
/**
* Character to use for minus sign in numbers (typically '-')
*/
public Character getMinusSign() {
return _minusSign;
}
/**
* Sets the character to use for minus sign in numbers (typically '-')
*/
public void setMinusSign(Character minusSign) {
_minusSign = minusSign;
}
/**
* Discriminate negative numbers from positive numbers
*/
public boolean isDiscriminateNegativeNumbers() {
return _discriminateNegativeNumbers;
}
/**
* Sets whether to discriminate negative numbers from positive numbers
*/
public void setDiscriminateNegativeNumbers(boolean discriminateNegativeNumbers) {
_discriminateNegativeNumbers = discriminateNegativeNumbers;
}
/**
* Are upper case TEXT tokens expandable (ie. "ABC" and "ABCD" is treated as
* a single "AAAA" pattern) or not
*/
public boolean isUpperCaseExpandable() {
return _upperCaseExpandable;
}
/**
* Sets whether or not to make upper case TEXT tokens expandable
*
* @param upperCaseExpandable
*/
public void setUpperCaseExpandable(boolean upperCaseExpandable) {
_upperCaseExpandable = upperCaseExpandable;
}
/**
* Are lower case TEXT tokens expandable (ie. "hello" and "hi" is treated as
* a single "aaaaa" pattern) or not
*/
public boolean isLowerCaseExpandable() {
return _lowerCaseExpandable;
}
/**
* Sets whether or not to make lower case TEXT tokens expandable
*
* @param lowerCaseExpandable
*/
public void setLowerCaseExpandable(boolean lowerCaseExpandable) {
_lowerCaseExpandable = lowerCaseExpandable;
}
public boolean isTokenTypeEnabled(TokenType tokenType) {
return _tokenTypes.contains(tokenType);
}
}