/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ /* $Id$ */ package org.apache.fop.text.linebreak; /** * This class is meant for supporting the Unicode line breaking algorithm. * See: <a href="http://unicode.org/reports/tr14/">UTR 14</a> * */ public class LineBreakStatus { /** Constant indicating a Direct Break */ public static final byte DIRECT_BREAK = LineBreakUtils.DIRECT_BREAK; /** Constant indicating an Indirect Break */ public static final byte INDIRECT_BREAK = LineBreakUtils.INDIRECT_BREAK; /** Constant indicating a Combining Indirect Break */ public static final byte COMBINING_INDIRECT_BREAK = LineBreakUtils.COMBINING_INDIRECT_BREAK; /** Constant indicating a Combining Prohibited Break */ public static final byte COMBINING_PROHIBITED_BREAK = LineBreakUtils.COMBINING_PROHIBITED_BREAK; /** Constant indicating a Prohibited Break */ public static final byte PROHIBITED_BREAK = LineBreakUtils.PROHIBITED_BREAK; /** Constant indicating a Explicit Break */ public static final byte EXPLICIT_BREAK = LineBreakUtils.EXPLICIT_BREAK; private byte leftClass; private boolean hadSpace; /** * Resets the class to the same state as if new LineBreakStatus() had just been called. */ public LineBreakStatus() { reset(); } /** * Reset the status. * This method will reset the status to the initial state. It is meant * for recycling objects. */ public void reset() { leftClass = -1; hadSpace = false; } /** * Check whether a line break may happen according to the rules described in * the <a href="http://unicode.org/reports/tr14/#Algorithm">Unicode Line Breaking * Algorithm</a>. The function returns the line breaking status of the point * <em>before</em> the given character. * The algorithm is the table-driven algorithm, as described in * <a href="http://unicode.org/reports/tr14/#PairBasedImplementation"> * Unicode Technical Report #14</a>. * The pair table is taken from {@link LineBreakUtils}. * * TODO: Better handling for AI, SA, SG and XX line break classes. * * @param c the character to check * @return the break action to be taken * one of: {@link #DIRECT_BREAK}, * {@link #INDIRECT_BREAK}, * {@link #COMBINING_INDIRECT_BREAK}, * {@link #COMBINING_PROHIBITED_BREAK}, * {@link #PROHIBITED_BREAK}, * {@link #EXPLICIT_BREAK} */ public byte nextChar(char c) { byte currentClass = LineBreakUtils.getLineBreakProperty(c); /* Initial conversions */ switch (currentClass) { case 0: // Unassigned codepoint: same treatment as AI case LineBreakUtils.LINE_BREAK_PROPERTY_AI: case LineBreakUtils.LINE_BREAK_PROPERTY_SG: case LineBreakUtils.LINE_BREAK_PROPERTY_XX: // LB 1: Resolve AI, ... SG and XX into other line breaking classes // depending on criteria outside the scope of this algorithm. // In the absence of such criteria, it is recommended that // classes AI, ... SG and XX be resolved to AL currentClass = LineBreakUtils.LINE_BREAK_PROPERTY_AL; break; case LineBreakUtils.LINE_BREAK_PROPERTY_SA: // LB 1: Resolve ... SA ... into other line breaking classes // depending on criteria outside the scope of this algorithm. // In the absence of such criteria, it is recommended that // ... SA be resolved to AL, except that characters of // class SA that have General_Category Mn or Mc be resolved to CM switch (Character.getType(c)) { case Character.COMBINING_SPACING_MARK: //General_Category "Mc" case Character.NON_SPACING_MARK: //General_Category "Mn" currentClass = LineBreakUtils.LINE_BREAK_PROPERTY_CM; break; default: currentClass = LineBreakUtils.LINE_BREAK_PROPERTY_AL; } default: //nop } /* Check 1: First character or initial character after a reset/mandatory break? */ switch (leftClass) { case -1: //first character or initial character after a reset() leftClass = currentClass; if (leftClass == LineBreakUtils.LINE_BREAK_PROPERTY_CM) { // LB 10: Treat any remaining combining marks as AL leftClass = LineBreakUtils.LINE_BREAK_PROPERTY_AL; } // LB 2: Never break at the start of text return PROHIBITED_BREAK; case LineBreakUtils.LINE_BREAK_PROPERTY_BK: case LineBreakUtils.LINE_BREAK_PROPERTY_LF: case LineBreakUtils.LINE_BREAK_PROPERTY_NL: //first character after mandatory break // LB 4: Always break after hard line breaks // LB 5: Treat ... LF and NL has hard line breaks reset(); leftClass = currentClass; return EXPLICIT_BREAK; case LineBreakUtils.LINE_BREAK_PROPERTY_CR: //first character after a carriage return: // LB 5: Treat CR followed by LF, as well as CR ... as hard line breaks // If current is LF, then fall through to Check 2 (see below), // and the hard break will be signaled for the character after LF (see above) if (currentClass != LineBreakUtils.LINE_BREAK_PROPERTY_LF) { reset(); leftClass = currentClass; return EXPLICIT_BREAK; } default: //nop } /* Check 2: current is a mandatory break or space? */ switch (currentClass) { case LineBreakUtils.LINE_BREAK_PROPERTY_BK: case LineBreakUtils.LINE_BREAK_PROPERTY_LF: case LineBreakUtils.LINE_BREAK_PROPERTY_NL: case LineBreakUtils.LINE_BREAK_PROPERTY_CR: // LB 6: Do not break before a hard break leftClass = currentClass; return PROHIBITED_BREAK; case LineBreakUtils.LINE_BREAK_PROPERTY_SP: // LB 7: Do not break before spaces ... // Zero-width spaces are in the pair-table (see below) hadSpace = true; return PROHIBITED_BREAK; default: //nop } /* Normal treatment, if the first two checks did not return */ boolean savedHadSpace = hadSpace; hadSpace = false; byte breakAction = LineBreakUtils.getLineBreakPairProperty(leftClass, currentClass); switch (breakAction) { case PROHIBITED_BREAK: case DIRECT_BREAK: leftClass = currentClass; return breakAction; case INDIRECT_BREAK: leftClass = currentClass; if (savedHadSpace) { return INDIRECT_BREAK; } else { return PROHIBITED_BREAK; } case COMBINING_INDIRECT_BREAK: if (savedHadSpace) { leftClass = currentClass; return COMBINING_INDIRECT_BREAK; } else { return PROHIBITED_BREAK; } case COMBINING_PROHIBITED_BREAK: if (savedHadSpace) { leftClass = currentClass; } return COMBINING_PROHIBITED_BREAK; default: assert false; return breakAction; } } /** * for debugging only */ /* public static void main(String args[]) { LineBreakStatus lbs = new LineBreakStatus(); lbs.nextChar('\n'); lbs.nextChar('\n'); lbs.nextChar('x'); } */ }