/** * Copyright 2008 Google Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * */ package org.waveprotocol.wave.model.util; @Deprecated public class TitleExtractor { /** * Extracts a title from an XML string. * The string may be either an entire document (that contains a title * element) or the inner XML of a title element). * * @param rich an XML string * @return a title extracted from {@code rich}. The string is element-free, * escaped XML. */ public static String extractTitle(String rich) { return firstPhrase(rich); } /** * Tests if an XML string contains only a title. * Essentially, this just tests if the text view of the XML string contains * only a single sentence. * * @param rich an XML string * @return true if {@code rich} contains just a single sentence. */ public static boolean isOnlyTitle(String rich) { String titleIsh = stripWhite(processForFirstPhrase(rich)); String title = extractTitle(rich); return titleIsh.equals(title); } private static String firstPhrase(String rich) { String processed = processForFirstPhrase(rich); int stop = processed.indexOf('\n'); return stripWhite(stop != -1 ? processed.substring(0, stop) : processed); } private static String processForFirstPhrase(String rich) { // Place a \n at first <br> String text = rich.replaceFirst("<br.*?>", "\n") // Place a \n at first closing p .replaceFirst("</p>", "\n") // Place a \n at first stop char, followed by whitespace .replaceFirst("((\\.|\\?|!)+(\\s))", "$1\n"); return stripTags(text); } /** * Remove opening and closing tags. */ private static String stripTags(String rich) { // This was done via a rich.replaceAll("<(.|\\n)+?>", ""); // which would cause a StackOverflowError in the indexer when a root blip contained a large // tag e.g. a gadget. StringBuilder b = new StringBuilder(); int start = 0; int open = rich.indexOf('<'); if (open < 0) { return rich; } while(start >= 0 && start < rich.length()) { if (open < 0) { // Append all the rest. b.append(rich.substring(start)); break; } if (open > start) { // Append the chars between the start and the open. b.append(rich.substring(start, open)); } // jump to the next start = rich.indexOf('>', open); if (start > 0) { // skip the '>' ++start; } open = rich.indexOf('<', start); } return b.toString(); } /** * Strips leading and trailing whitespace. */ private static String stripWhite(String text) { return text.replaceAll("^(\\s|\u00a0)+|\\s+$", ""); } }