/**************************************************************************
OmegaT - Computer Assisted Translation (CAT) tool
with fuzzy matching, translation memory, keyword search,
glossaries, and translation leveraging into updated projects.
Copyright (C) 2000-2006 Keith Godfrey and Maxym Mykhalchuk
Home page: http://www.omegat.org/
Support center: http://groups.yahoo.com/group/OmegaT/
This file is part of OmegaT.
OmegaT is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
OmegaT is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
**************************************************************************/
package org.omegat.core.segmentation;
import java.io.Serializable;
import java.util.regex.Pattern;
import java.util.regex.PatternSyntaxException;
/**
* The class representing a single segmentation rule.
*
* @author Maxym Mykhalchuk
*/
public class Rule implements Serializable {
private static final long serialVersionUID = 7645267236376489908L;
/** Creates a new empty instance of segmentation rule */
public Rule() {
}
/** Creates an initialized instance of segmentation rule */
public Rule(boolean breakRule, String beforebreak, String afterbreak) {
setBreakRule(breakRule);
setBeforebreak(beforebreak);
setAfterbreak(afterbreak);
}
public Rule copy() {
Rule result = new Rule();
result.breakRule = breakRule;
result.beforebreak = beforebreak;
result.afterbreak = afterbreak;
return result;
}
/**
* Holds value of property breakRule.
* <p>
* This property corresponds to 'break' attribute of SRX 'rule', meaning
* whether this is a rule that determines a break or an exception.
*/
private boolean breakRule;
/**
* Returns whether this is a rule that determines a break or an exception.
*
* @return true is this is a break rule.
*/
public boolean isBreakRule() {
return this.breakRule;
}
/**
* Sets whether this is a rule that determines a break or an exception.
*
* @param breakRule
* New value -- true for a break rule, false for an exception.
*/
public void setBreakRule(boolean breakRule) {
this.breakRule = breakRule;
}
/**
* A regular expression which represents the text that appears before a
* segment break.
*/
private Pattern beforebreak;
/**
* Returns a regular expression which represents the text that appears
* before a segment break.
*
* @return regular expression of a text before break.
*/
public String getBeforebreak() {
if (beforebreak != null)
return beforebreak.pattern();
else
return null;
}
/**
* Returns a regular expression which represents the text that appears
* before a segment break.
*
* @return regular expression of a text before break.
*/
public Pattern getCompiledBeforebreak() {
return beforebreak;
}
/**
* Sets a regular expression which represents the text that appears before a
* segment break.
*
* @param beforebreak
* Regular expression string of a text before break.
*/
public void setBeforebreak(String beforebreak) throws PatternSyntaxException {
this.beforebreak = compilePattern(beforebreak);
}
/**
* A regular expression which represents the text that appears after a
* segment break.
*/
private Pattern afterbreak;
/**
* Returns a regular expression which represents the text that appears after
* a segment break.
*
* @return regular expression of a text after break.
*/
public String getAfterbreak() {
if (afterbreak != null)
return afterbreak.pattern();
else
return null;
}
/**
* Returns a regular expression which represents the text that appears after
* a segment break.
*
* @return regular expression of a text after break.
*/
public Pattern getCompiledAfterbreak() {
return afterbreak;
}
/**
* Sets a regular expression which represents the text that appears after a
* segment break.
*
* @param afterbreak
* Regular expression string of a text after break.
*/
public void setAfterbreak(String afterbreak) throws PatternSyntaxException {
this.afterbreak = compilePattern(afterbreak);
}
/**
* Compiles the pattern and avoids two bugs:
* <ul>
* <li>#1385202 - "." does not match newline chars, and hence, OmegaT does
* not segment on "<br>
* \n". Fixed by adding Pattern.DOTALL flag.
* <li>#1393484 - Case sensitivity for segmentation rules, e.g. exception
* "M\." glues "them. All". Fixed by testing for case sensitivity, and
* turning UNICODE_CASE flag on iff the case insensitivity is turned on too.
* </ul>
*/
private Pattern compilePattern(String pattern) {
Pattern testFlags = Pattern.compile(pattern);
if ((testFlags.flags() & Pattern.CASE_INSENSITIVE) == Pattern.CASE_INSENSITIVE)
return Pattern.compile(pattern, Pattern.UNICODE_CASE | Pattern.DOTALL);
else
return Pattern.compile(pattern, Pattern.DOTALL);
}
/** Indicates whether some other Rule is "equal to" this one. */
public boolean equals(Object obj) {
if (obj == null || !(obj instanceof Rule)) {
return false;
}
Rule that = (Rule) obj;
return this.breakRule == that.breakRule && this.getBeforebreak().equals(that.getBeforebreak())
&& this.getAfterbreak().equals(that.getAfterbreak());
}
/** Returns a hash code value for the object. */
public int hashCode() {
return (this.isBreakRule() ? 1 : -1) + this.getBeforebreak().hashCode()
- this.getAfterbreak().hashCode();
}
/** Returns a string representation of the Rule for debugging purposes. */
public String toString() {
return (isBreakRule() ? "Break " : "Exception ") + "Before: " + getBeforebreak() + "After: "
+ getAfterbreak();
}
}