/** * AnalyzerBeans * Copyright (C) 2014 Neopost - Customer Information Management * * This copyrighted material is made available to anyone wishing to use, modify, * copy, or redistribute it subject to the terms and conditions of the GNU * Lesser General Public License, as published by the Free Software Foundation. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License * for more details. * * You should have received a copy of the GNU Lesser General Public License * along with this distribution; if not, write to: * Free Software Foundation, Inc. * 51 Franklin Street, Fifth Floor * Boston, MA 02110-1301 USA */ package org.eobjects.analyzer.util; import java.util.ArrayList; import java.util.EnumMap; import java.util.List; import java.util.Map; import java.util.Map.Entry; import java.util.Set; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.slf4j.Logger; import org.slf4j.LoggerFactory; /** * Provides a useful abstraction over regular expressions where the groups are * named and not nescesarily ordered in the way specified by the enum that holds * the group names. * * */ public class NamedPattern<E extends Enum<E>> { private static final Logger logger = LoggerFactory.getLogger(NamedPattern.class); /** * Defines a default group literal which resolves to a single word with any * kind of letter (including diacritics) */ public static final String DEFAULT_GROUP_LITERAL = "([\\p{Lu}\\p{Ll}]+)"; private EnumMap<E, Integer> groupIndexes; private Pattern pattern; private Class<E> groupEnum; public NamedPattern(String pattern, Class<E> groupEnum) { if (pattern == null) { throw new IllegalArgumentException("pattern cannot be null"); } if (groupEnum == null) { throw new IllegalArgumentException("groupEnum cannot be null"); } pattern = pattern.replaceAll("\\(", "\\\\("); pattern = pattern.replaceAll("\\)", "\\\\)"); pattern = pattern.replaceAll("\\[", "\\\\["); pattern = pattern.replaceAll("\\]", "\\\\]"); this.groupEnum = groupEnum; groupIndexes = new EnumMap<E, Integer>(groupEnum); E[] availableGroupNames = groupEnum.getEnumConstants(); List<E> usedGroupNames = new ArrayList<E>(); List<Integer> groupNameStringIndexOfs = new ArrayList<Integer>(); for (int i = 0; i < availableGroupNames.length; i++) { E group = availableGroupNames[i]; String groupToken = getGroupToken(group); int indexOf = pattern.indexOf(groupToken); if (indexOf != -1) { usedGroupNames.add(group); groupNameStringIndexOfs.add(indexOf); } } if (usedGroupNames.isEmpty()) { throw new IllegalArgumentException("None of the groups defined in " + groupEnum.getSimpleName() + " where found in the pattern: " + pattern); } Integer groupIndex = getIndexOfHighest(groupNameStringIndexOfs); while (groupIndex != null) { E group = usedGroupNames.remove(groupIndex.intValue()); groupNameStringIndexOfs.remove(groupIndex.intValue()); groupIndexes.put(group, usedGroupNames.size() + 1); pattern = pattern.replace(getGroupToken(group), getGroupLiteral(group)); groupIndex = getIndexOfHighest(groupNameStringIndexOfs); } logger.info("compiling pattern: {}", pattern); this.pattern = Pattern.compile(pattern); } protected String getGroupToken(E group) { return group.name(); } protected String getGroupLiteral(E group) { if (group instanceof HasGroupLiteral) { String groupLiteral = ((HasGroupLiteral) group).getGroupLiteral(); if (groupLiteral == null) { return DEFAULT_GROUP_LITERAL; } return groupLiteral; } return DEFAULT_GROUP_LITERAL; } private Integer getIndexOfHighest(List<Integer> integerList) { Integer result = null; int highestValue = -1; for (int i = 0; i < integerList.size(); i++) { Integer integer = integerList.get(i); if (integer.intValue() > highestValue) { result = i; highestValue = integer; } } return result; } /** * Matches a string against this named pattern. * * @param string * the string to match * @return a match object, or null if there was no match */ public NamedPatternMatch<E> match(String string) { Matcher matcher = pattern.matcher(string); while (matcher.find()) { int start = matcher.start(); int end = matcher.end(); if (start == 0 && end == string.length()) { Map<E, String> resultMap = new EnumMap<E, String>(groupEnum); Set<Entry<E, Integer>> entries = groupIndexes.entrySet(); for (Entry<E, Integer> entry : entries) { E group = entry.getKey(); Integer groupIndex = entry.getValue(); String result = matcher.group(groupIndex); resultMap.put(group, result); } return new NamedPatternMatch<E>(resultMap); } } return null; } public Pattern getPattern() { return pattern; } public Set<E> getUsedGroups() { return groupIndexes.keySet(); } }