/* * aitools utilities * Copyright (C) 2006 Noel Bush * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ package org.aitools.util.xml; import java.util.ArrayList; import java.util.Arrays; import java.util.List; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.jdom.Text; /** * Utilities specific to (X)HTML handling. * * @author <a href="mailto:noel@aitools.org">Noel Bush</a> */ public class XHTML { private static final Pattern XHTML_BREAK_LINE_REGEX = Pattern .compile("[\r\n]*(<(.+?:)?br( .*?)?/>|<(.+?:)?p( .+?)?>|</(.+?:)?p>)[\r\n]*"); private static final Pattern PRE_REGEX = Pattern.compile("<(?:.+?:)?pre(?: .+?)?>(.+?)</(?:.+?:)?pre>", Pattern.DOTALL); private static final Pattern LINEFEED_REGEX = Pattern.compile("[\r\n]+"); private static final String[] EMPTY_STRING_ARRAY = { "" }; /** * Breaks a message into multiple lines at an (X)HTML <br/>, except if it comes at the beginning of the message, * or ending (X)HTML </p>. * * @param input the string to break * @return one line per array item */ public static String[] breakLines(String input) { // Null inputs return an empty string array. if (input == null) { return EMPTY_STRING_ARRAY; } // Trim all whitespace at beginning and end. String _input = input.trim(); // Empty trimmed inputs return an empty string array. if (_input.equals("")) { return EMPTY_STRING_ARRAY; } // Split into lines. List<String> rawLines = Arrays.asList(XHTML_BREAK_LINE_REGEX.split(_input)); // Preserve actual line breaks within <pre/> sections. List<String> preservedLines = new ArrayList<String>(rawLines.size()); Matcher matcher; String normalizedLine; for (String line : rawLines) { matcher = PRE_REGEX.matcher(line); if (matcher.matches()) { for (String preLine : LINEFEED_REGEX.split(matcher.group(1))) { if (preLine.length() > 0) { preservedLines.add(preLine); } } } else { // Remove blank lines. normalizedLine = Text.normalizeString(line); if (normalizedLine.length() > 0) { preservedLines.add(normalizedLine); } } } return preservedLines.toArray(EMPTY_STRING_ARRAY); } }