package org.jabref.logic.formatter.bibtexfields; import java.util.Objects; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.jabref.logic.l10n.Localization; import org.jabref.model.cleanup.Formatter; import com.google.common.base.Strings; /** * This class includes sensible defaults for consistent formatting of BibTeX page numbers. * * From BibTeX manual: * One or more page numbers or range of numbers, such as 42--111 or 7,41,73--97 or 43+ * (the '+' in this last example indicates pages following that don't form a simple range). * To make it easier to maintain Scribe-compatible databases, the standard styles convert * a single dash (as in 7-33) to the double dash used in TEX to denote number ranges (as in 7--33). */ public class NormalizePagesFormatter implements Formatter { // "startpage" and "endpage" are named groups. See http://stackoverflow.com/a/415635/873282 for a documentation private static final Pattern PAGES_DETECT_PATTERN = Pattern.compile("\\A(?<startpage>(\\d+:)?\\d+)(?:-{1,2}(?<endpage>(\\d+:)?\\d+))?\\Z"); private static final String REJECT_LITERALS = "[^a-zA-Z0-9,\\-\\+,:]"; private static final String PAGES_REPLACE_PATTERN = "${startpage}--${endpage}"; private static final String SINGLE_PAGE_REPLACE_PATTERN = "$1"; @Override public String getName() { return Localization.lang("Normalize page numbers"); } @Override public String getKey() { return "normalize_page_numbers"; } /** * Format page numbers, separated either by commas or double-hyphens. * Converts the range number format of the <code>pages</code> field to page_number--page_number. * Removes unwanted literals except letters, numbers and -+ signs. * Keeps the existing String if the resulting field does not match the expected Regex. * * <example> * 1-2 -> 1--2 * 1,2,3 -> 1,2,3 * {1}-{2} -> 1--2 * 43+ -> 43+ * Invalid -> Invalid * </example> */ @Override public String format(String value) { Objects.requireNonNull(value); if (value.isEmpty()) { // nothing to do return value; } // Remove pages prefix String cleanValue = value.replace("pp.", "").replace("p.",""); // remove unwanted literals incl. whitespace cleanValue = cleanValue.replaceAll("\u2013|\u2014", "-").replaceAll(REJECT_LITERALS, ""); // try to find pages pattern Matcher matcher = PAGES_DETECT_PATTERN.matcher(cleanValue); if (matcher.matches()) { // replace if (Strings.isNullOrEmpty(matcher.group("endpage"))) { return matcher.replaceFirst(SINGLE_PAGE_REPLACE_PATTERN); } else { return matcher.replaceFirst(PAGES_REPLACE_PATTERN); } } // no replacement return value; } @Override public String getDescription() { return Localization.lang("Normalize pages to BibTeX standard."); } @Override public String getExampleInput() { return "1 - 2"; } @Override public int hashCode() { return defaultHashCode(); } @Override public boolean equals(Object obj) { return defaultEquals(obj); } }