package eu.dnetlib.iis.wf.affmatching.match.voter; import java.io.Serializable; import java.util.List; import org.apache.commons.lang3.StringUtils; /** * Class containing helper methods for filtering strings. * * @author madryk */ class StringFilter implements Serializable { private static final long serialVersionUID = 1L; //------------------------ LOGIC -------------------------- /** * Returns filtered value without charsToFilter and without * words shorter than wordToRemoveMaxLength.<br/> * Internally uses {@link #filterChars(String, List)} and * {@link #filterShortWords(String, int)}. */ public String filterCharsAndShortWords(String value, List<Character> charsToFilter, int wordToRemoveMaxLength) { String filteredValue = value; filteredValue = filterChars(filteredValue, charsToFilter); filteredValue = filterShortWords(filteredValue, wordToRemoveMaxLength); return filteredValue; } /** * Returns filtered value without charsToFilter. */ public String filterChars(String value, List<Character> charsToFilter) { String filteredValue = value; for (Character charToFilter : charsToFilter) { filteredValue = StringUtils.remove(filteredValue, charToFilter); } return filteredValue; } /** * Returns filtered value without words shorter than wordToRemoveMaxLength.<br/> * When wordToRemoveMaxLength is zero then returns the value unchanged. */ public String filterShortWords(String value, int wordToRemoveMaxLength) { if (wordToRemoveMaxLength == 0) { return value; } String filteredValue = value; filteredValue = StringUtils.removePattern(filteredValue, "\\b\\w{1," + wordToRemoveMaxLength + "}\\b"); filteredValue = filteredValue.trim().replaceAll(" +", " "); return filteredValue; } }