/**
* AnalyzerBeans
* Copyright (C) 2014 Neopost - Customer Information Management
*
* This copyrighted material is made available to anyone wishing to use, modify,
* copy, or redistribute it subject to the terms and conditions of the GNU
* Lesser General Public License, as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
* or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License
* for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this distribution; if not, write to:
* Free Software Foundation, Inc.
* 51 Franklin Street, Fifth Floor
* Boston, MA 02110-1301 USA
*/
package org.eobjects.analyzer.beans.stringpattern;
import java.util.Arrays;
import java.util.List;
import java.util.ListIterator;
import java.util.StringTokenizer;
/**
* Tokenizer that can be used to "reverse engineer" a pattern string into a
* proper list of tokens
*/
public class ReverseTokenizer implements Tokenizer {
private final TokenizerConfiguration _configuration;
private final String nullTokenString = NullToken.INSTANCE.getString();
private final String blankTokenString = BlankToken.INSTANCE.getString();
public ReverseTokenizer(TokenizerConfiguration configuration) {
_configuration = configuration;
}
@Override
public List<Token> tokenize(final String pattern) {
if (pattern == null || nullTokenString.equals(pattern)) {
return Arrays.asList(NullToken.INSTANCE);
}
if (blankTokenString.equals(pattern)) {
return Arrays.asList(BlankToken.INSTANCE);
}
DefaultTokenizer delegate = new DefaultTokenizer(_configuration);
List<Token> tokens = delegate.tokenize(pattern);
if (_configuration.isTokenTypeEnabled(TokenType.MIXED)) {
for (ListIterator<Token> it = tokens.listIterator(); it.hasNext();) {
Token token = (Token) it.next();
if (token.getType() == TokenType.DELIM) {
final String string = token.getString();
if (string.indexOf("??") != -1) {
// tokenize the string and split DELIM from MIXED tokens
final StringTokenizer tokenizer = new StringTokenizer(string, "?", true);
it.remove();
final StringBuilder tokenStringBuilder = new StringBuilder();
while (tokenizer.hasMoreTokens()) {
final String tokenString = tokenizer.nextToken();
if (tokenString.startsWith("?")) {
tokenStringBuilder.append(tokenString);
} else {
if (tokenStringBuilder.length() > 0) {
it.add(new SimpleToken(TokenType.MIXED, tokenStringBuilder.toString()));
tokenStringBuilder.setLength(0);
}
it.add(new SimpleToken(TokenType.DELIM, tokenString));
}
}
if (tokenStringBuilder.length() > 0) {
it.add(new SimpleToken(TokenType.MIXED, tokenStringBuilder.toString()));
tokenStringBuilder.setLength(0);
}
}
}
}
}
return tokens;
}
}