/*
* Copyright 2006-2012 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.springframework.batch.item.file.transform;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.springframework.util.Assert;
/**
* Line-tokenizer using a regular expression to filter out data (by using matching and non-matching groups).
* Consider the following regex which picks only the first and last name (notice the non-matching group in the middle):
* <pre>
* (.*?)(?: .*)* (.*)
* </pre>
* For the names:
* <ul>
* <li>"Graham James Edward Miller"</li>
* <li>"Andrew Gregory Macintyre"</li>
* <li>"No MiddleName"</li>
* </ul>
*
* the output will be:
* <ul>
* <li>"Miller", "Graham"</li>
* <li>"Macintyre", "Andrew"</li>
* <li>"MiddleName", "No"</li>
* </ul>
*
* An empty list is returned, in case of a non-match.
*
* @see Matcher#group(int)
* @author Costin Leau
*/
public class RegexLineTokenizer extends AbstractLineTokenizer {
private Pattern pattern;
@Override
protected List<String> doTokenize(String line) {
Matcher matcher = pattern.matcher(line);
boolean matchFound = matcher.find();
if (matchFound) {
List<String> tokens = new ArrayList<String>(matcher.groupCount());
for (int i = 1; i <= matcher.groupCount(); i++) {
tokens.add(matcher.group(i));
}
return tokens;
}
return Collections.emptyList();
}
/**
* Sets the regex pattern to use.
*
* @param pattern Regular Expression pattern
*/
public void setPattern(Pattern pattern) {
Assert.notNull(pattern, "a non-null pattern is required");
this.pattern = pattern;
}
/**
* Sets the regular expression to use.
*
* @param regex regular expression (as a String)
*/
public void setRegex(String regex) {
Assert.hasText(regex, "a valid regex is required");
this.pattern = Pattern.compile(regex);
}
}