/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ /* $Id$ */ package org.apache.fop.text.linebreak; import org.junit.Test; import static org.junit.Assert.assertTrue; /** * JUnit test case for the LineBreakStatus class */ public class LineBreakStatusTestCase { /* * These symbols are used to indicate the break action returned * by the paragraph breaking. Their meaning is as per Unicode * <a href="http://unicode.org/reports/tr14/#PairBasedImplementation">technical * report #14</a>. */ private static final String BREAK_ACTION = "_%#@^!"; /** * Test of reset method, of class org.apache.commons.text.linebreak.LineBreakStatus. */ @Test public void testReset() { System.out.println("testReset"); // TODO } /** * Test of nextChar method, of class org.apache.commons.text.linebreak.LineBreakStatus. * Runs tests for most of the Line Breaking Properties defined in the Unicode standard. */ @Test public void testNextChar() { System.out.println("testNextChar"); // AL -- Ordinary Alphabetic and Symbol Characters (XP) assertTrue(testBreak( "Nobreak", "^^^^^^^" )); // BA -- Break Opportunity After (A) assertTrue(testBreak( "Thin Space" + "\u2009" + "break", "^^^^^%^^^^" + "^" + "_^^^^" )); assertTrue(testBreak( "Shy" + "\u00AD" + "break", "^^^" + "^" + "_^^^^" )); // BB -- Break opportunites before characters (B) assertTrue(testBreak( "Acute Accent" + "\u00B4" + "break", "^^^^^^%^^^^^" + "_" + "^^^^^" )); // B2 -- Break Opportunity Before and After (B/A/XP) assertTrue(testBreak( "Em Dash" + "\u2014" + "break", "^^^%^^^" + "_" + "_^^^^" )); assertTrue(testBreak( "Em Dash Dash" + "\u2014" + "\u2014" + "break", "^^^%^^^^%^^^" + "_" + "^" + "_^^^^" )); // BK Mandatory Break (A) -- normative assertTrue(testBreak( "Form Feed" + "\u000C" + "break", "^^^^^%^^^" + "^" + "!^^^^" )); assertTrue(testBreak( "Line Separator" + "\u2028" + "break", "^^^^^%^^^^^^^^" + "^" + "!^^^^" )); assertTrue(testBreak( "Paragraph Separator" + "\u2029" + "break", "^^^^^^^^^^%^^^^^^^^" + "^" + "!^^^^" )); // CB Contingent Break Opportunity (B/A) -- normative // TODO Don't know quite what to do here // CL -- Closing Punctuation (XB) assertTrue(testBreak( "Right Parenthesis ) break", "^^^^^^%^^^^^^^^^^^^^%^^^^" )); // CM -- Attached Characters and Combining Marks (XB) -- normative assertTrue(testBreak( "Grave Accent" + "\u0300" + " break", "^^^^^^%^^^^^" + "^" + "^%^^^^" )); // CR -- Carriage Return (A) -- normative assertTrue(testBreak( "CR" + "\r" + "break", "^^" + "^" + "!^^^^" )); assertTrue(testBreak( "CRLF" + "\r\n" + "break", "^^^^" + "^^" + "!^^^^" )); // EX -- Exclamation / interrogation (XB) assertTrue(testBreak( "EX CL ! ) break", "^^^%^^^^^^%^^^^" )); assertTrue(testBreak( "EX Wave Dash ! " + "\u301C" + " break", "^^^%^^^^%^^^^^^" + "%" + "^_^^^^" )); // GL -- Non-breaking ("Glue") (XB/XA) -- normative assertTrue(testBreak( "No" + "\u00a0" + "break", "^^" + "^" + "^^^^^" )); assertTrue(testBreak( "Non" + "\u2011" + " Hyphen", "^^^" + "^" + "^%^^^^^" )); // H2 -- Hangul LVT Syllable (B/A) // TODO // H3 -- Hangul LVT Syllable (B/A) // TODO // HY -- Hyphen Minus assertTrue(testBreak( "Normal-Hyphen", "^^^^^^^_^^^^^" )); assertTrue(testBreak( "Normal - Hyphen", "^^^^^^^%^_^^^^^" )); assertTrue(testBreak( "123-456", "^^^^^^^" )); assertTrue(testBreak( "123 - 456", "^^^^%^%^^" )); // ID -- Ideographic (B/A) assertTrue(testBreak( "\u4E00" + "\u3000" + "\u4E02", "^" + "_" + "_" )); // IN -- Inseperable characters (XP) assertTrue(testBreak( "IN " + "\u2024" + "\u2025" + "\u2026", "^^^" + "%" + "^" + "^" )); // IS -- Numeric Separator (Infix) (XB) assertTrue(testBreak( "123,456.00 12:59", "^^^^^^^^^^^%^^^^" )); // JL -- Hangul L Jamo (B) // TODO // JT -- Hangul T Jamo (A) // TODO // JV -- Hangul V Jamo (XA/XB) // TODO // LF -- Line Feed (A) -- normative assertTrue(testBreak( "Simple" + "\n" + "\n" + "break", "^^^^^^" + "^" + "!" + "!^^^^" )); // NL -- Next Line (A) -- normative assertTrue(testBreak( "NL" + "\u0085" + "break", "^^" + "^" + "!^^^^" )); // NS -- Non-starters (XB) // TODO // NU -- Numeric (XP) // Tested as part of IS // OP -- Opening Punctuation (XA) assertTrue(testBreak( "[ Bracket ( Parenthesis", "^^^^^^^^^^%^^^^^^^^^^^^" )); // PO -- Postfix (Numeric) (XB) assertTrue(testBreak( "(12.00)%", "^^^^^^^^" )); // PR -- Prefix (Numeric) (XA) assertTrue(testBreak( "$1000.00", "^^^^^^^^" )); // QU -- Ambiguous Quotation (XB/XA) assertTrue(testBreak( "'In Quotes'", "^^^^%^^^^^^" )); assertTrue(testBreak( "' (In Quotes) '", "^^^^^^%^^^^^^^%" )); // SA -- Complex-context Dependent Characters (South East Asian) (P) // TODO // SP -- Space (A) -- normative assertTrue(testBreak( "Simple break", "^^^^^^^%^^^^" )); assertTrue(testBreak( "Simple break2", "^^^^^^^^^^%^^^^^" )); // SY -- Symbols Allowing Break After (A) assertTrue(testBreak( "http://xmlgraphics.apache.org/fop", "^^^^^^^_^^^^^^^^^^^^^^^^^^^^^^_^^" )); assertTrue(testBreak( "1/2 31/10/2005", "^^^^%^^^^^^^^^" )); // WJ -- Word Joiner (XA/XB) -- (normative) assertTrue(testBreak( "http://" + "\u2060" + "xmlgraphics.apache.org/" + "\uFEFF" + "fop", "^^^^^^^" + "^" + "^^^^^^^^^^^^^^^^^^^^^^^" + "^" + "^^^" )); assertTrue(testBreak( "Simple " + "\u2060" + "break", "^^^^^^^" + "^" + "^^^^^" )); assertTrue(testBreak( "Simple" + "\u200B" + "\u2060" + "break", "^^^^^^" + "^" + "_" + "^^^^^" )); // XX -- Unknown (XP) // TODO // ZW -- Zero Width Space (A) -- (normative) assertTrue(testBreak( "Simple" + "\u200B" + "break", "^^^^^^" + "^" + "_^^^^" )); // Unassigned codepoint: should yield same result as AL assertTrue(testBreak( "No" + "\u1F7E" + "break", "^^" + "^" + "^^^^^" )); } /** * Tests the paragraph break status (break actions) returned from calling * LineBreakStatus.nextChar() on each character of paragraph against * the expected break actions. There must be a positional match between * the characters in paragraph and characters in breakAction. * @param paragraph The text to be analysed for line breaks * @param breakActions The symbolic representation of the break actions * expected to be returned. */ private boolean testBreak(String paragraph, String breakActions) { boolean result = true; int length = paragraph.length(); LineBreakStatus lbs = new LineBreakStatus(); for (int i = 0; i < length; i++) { byte breakAction = lbs.nextChar(paragraph.charAt(i)); if (BREAK_ACTION.charAt(breakAction) != breakActions.charAt(i)) { System.err.println(paragraph); System.err.println(breakActions); System.err.println("pos = " + i + " expected '" + breakActions.charAt(i) + "' got '" + BREAK_ACTION.charAt(breakAction) + "'"); result = false; } } return result; } }