Rule.java example

Explorer
OmegaT-master
/**************************************************************************
 OmegaT - Computer Assisted Translation (CAT) tool
          with fuzzy matching, translation memory, keyword search,
          glossaries, and translation leveraging into updated projects.

 Copyright (C) 2000-2006 Keith Godfrey and Maxym Mykhalchuk
               Home page: http://www.omegat.org/
               Support center: http://groups.yahoo.com/group/OmegaT/

 This file is part of OmegaT.

 OmegaT is free software: you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
 the Free Software Foundation, either version 3 of the License, or
 (at your option) any later version.

 OmegaT is distributed in the hope that it will be useful,
 but WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 GNU General Public License for more details.

 You should have received a copy of the GNU General Public License
 along with this program.  If not, see <http://www.gnu.org/licenses/>.
 **************************************************************************/

package org.omegat.core.segmentation;

import java.io.Serializable;
import java.util.regex.Pattern;
import java.util.regex.PatternSyntaxException;

/**
 * The class representing a single segmentation rule.
 *
 * @author Maxym Mykhalchuk
 */
public class Rule implements Serializable {

    private static final long serialVersionUID = 7645267236376489908L;

    /** Creates a new empty instance of segmentation rule */
    public Rule() {
    }

    /** Creates an initialized instance of segmentation rule */
    public Rule(boolean breakRule, String beforebreak, String afterbreak) {
        setBreakRule(breakRule);
        setBeforebreak(beforebreak);
        setAfterbreak(afterbreak);
    }

    public Rule copy() {
        Rule result = new Rule();
        result.breakRule = breakRule;
        result.beforebreak = beforebreak;
        result.afterbreak = afterbreak;
        return result;
    }

    /**
     * Holds value of property breakRule.
     * <p>
     * This property corresponds to 'break' attribute of SRX 'rule', meaning
     * whether this is a rule that determines a break or an exception.
     */
    private boolean breakRule;

    /**
     * Returns whether this is a rule that determines a break or an exception.
     *
     * @return true is this is a break rule.
     */
    public boolean isBreakRule() {
        return this.breakRule;
    }

    /**
     * Sets whether this is a rule that determines a break or an exception.
     *
     * @param breakRule
     *            New value -- true for a break rule, false for an exception.
     */
    public void setBreakRule(boolean breakRule) {
        this.breakRule = breakRule;
    }

    /**
     * A regular expression which represents the text that appears before a
     * segment break.
     */
    private Pattern beforebreak;

    /**
     * Returns a regular expression which represents the text that appears
     * before a segment break.
     *
     * @return regular expression of a text before break.
     */
    public String getBeforebreak() {
        if (beforebreak != null)
            return beforebreak.pattern();
        else
            return null;
    }

    /**
     * Returns a regular expression which represents the text that appears
     * before a segment break.
     *
     * @return regular expression of a text before break.
     */
    public Pattern getCompiledBeforebreak() {
        return beforebreak;
    }

    /**
     * Sets a regular expression which represents the text that appears before a
     * segment break.
     *
     * @param beforebreak
     *            Regular expression string of a text before break.
     */
    public void setBeforebreak(String beforebreak) throws PatternSyntaxException {
        this.beforebreak = compilePattern(beforebreak);
    }

    /**
     * A regular expression which represents the text that appears after a
     * segment break.
     */
    private Pattern afterbreak;

    /**
     * Returns a regular expression which represents the text that appears after
     * a segment break.
     *
     * @return regular expression of a text after break.
     */
    public String getAfterbreak() {
        if (afterbreak != null)
            return afterbreak.pattern();
        else
            return null;
    }

    /**
     * Returns a regular expression which represents the text that appears after
     * a segment break.
     *
     * @return regular expression of a text after break.
     */
    public Pattern getCompiledAfterbreak() {
        return afterbreak;
    }

    /**
     * Sets a regular expression which represents the text that appears after a
     * segment break.
     *
     * @param afterbreak
     *            Regular expression string of a text after break.
     */
    public void setAfterbreak(String afterbreak) throws PatternSyntaxException {
        this.afterbreak = compilePattern(afterbreak);
    }

    /**
     * Compiles the pattern and avoids two bugs:
     * <ul>
     * <li>#1385202 - "." does not match newline chars, and hence, OmegaT does
     * not segment on "<br>
     * \n". Fixed by adding Pattern.DOTALL flag.
     * <li>#1393484 - Case sensitivity for segmentation rules, e.g. exception
     * "M\." glues "them. All". Fixed by testing for case sensitivity, and
     * turning UNICODE_CASE flag on iff the case insensitivity is turned on too.
     * </ul>
     */
    private Pattern compilePattern(String pattern) {
        Pattern testFlags = Pattern.compile(pattern);
        if ((testFlags.flags() & Pattern.CASE_INSENSITIVE) == Pattern.CASE_INSENSITIVE)
            return Pattern.compile(pattern, Pattern.UNICODE_CASE | Pattern.DOTALL);
        else
            return Pattern.compile(pattern, Pattern.DOTALL);
    }

    /** Indicates whether some other Rule is "equal to" this one. */
    public boolean equals(Object obj) {
        if (obj == null || !(obj instanceof Rule)) {
            return false;
        }
        Rule that = (Rule) obj;
        return this.breakRule == that.breakRule && this.getBeforebreak().equals(that.getBeforebreak())
                && this.getAfterbreak().equals(that.getAfterbreak());
    }

    /** Returns a hash code value for the object. */
    public int hashCode() {
        return (this.isBreakRule() ? 1 : -1) + this.getBeforebreak().hashCode()
                - this.getAfterbreak().hashCode();
    }

    /** Returns a string representation of the Rule for debugging purposes. */
    public String toString() {
        return (isBreakRule() ? "Break " : "Exception ") + "Before: " + getBeforebreak() + "After: "
                + getAfterbreak();
    }
}