/* * Copyright (C) 2013 Alex Kuiper * * This file is part of PageTurner * * PageTurner is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * PageTurner is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with PageTurner. If not, see <http://www.gnu.org/licenses/>.* */ package net.nightwhistler.pageturner; import jedi.functional.FunctionalPrimitives; import java.util.List; import java.util.regex.Matcher; import java.util.regex.Pattern; import static java.util.Arrays.asList; import static jedi.functional.FunctionalPrimitives.select; public class TextUtil { private static final Pattern PUNCTUATION = Pattern.compile("\\.( ?\\.)*[\"'”’]?|[\\?!] ?[\"'”’]?|, ?[\"'”’]|”"); /* These are titles like Mr. Mrs., etc that will often cause incorrect breaks in English text. We filter them out. */ private static final String[] TITLES = { "mr", "mrs", "dr", "ms", "st" }; private TextUtil() {} /** * Processes an input string and enters a newline after full stops, * question marks, etc. * * @param input * @return */ public static List<String> splitOnPunctuation(String input) { StringBuffer stringBuffer = new StringBuffer(); Matcher matcher = PUNCTUATION.matcher(input); int previousMatch = 0; while (matcher.find()) { String match = matcher.group(); int startIndex = matcher.start(); String subString = input.substring(previousMatch, startIndex ); boolean shouldReplace = true; for ( String title: TITLES ) { if ( subString.toLowerCase().endsWith(title)) { shouldReplace = false; } } if ( subString.trim().length() == 1 ) { shouldReplace = false; } String replacement; if ( shouldReplace ) { replacement = match + "\n"; } else { replacement = match; } matcher.appendReplacement(stringBuffer, replacement); previousMatch = startIndex; } matcher.appendTail(stringBuffer); return select(asList(stringBuffer.toString().split("\n")), s -> s.length() > 0 ); } public static String shortenText( String original ) { String text = original; if ( text.length() > 40 ) { text = text.substring(0, 40) + "…"; } return text; } }