/**************************************************************************
OmegaT - Computer Assisted Translation (CAT) tool
with fuzzy matching, translation memory, keyword search,
glossaries, and translation leveraging into updated projects.
Copyright (C) 2000-2006 Keith Godfrey and Maxym Mykhalchuk
2008 Alex Buloichik
Home page: http://www.omegat.org/
Support center: http://groups.yahoo.com/group/OmegaT/
This file is part of OmegaT.
OmegaT is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
OmegaT is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
**************************************************************************/
package org.omegat.core.segmentation;
import java.beans.ExceptionListener;
import java.beans.XMLDecoder;
import java.beans.XMLEncoder;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.Serializable;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;
import javax.xml.bind.JAXBContext;
import javax.xml.bind.JAXBException;
import org.omegat.util.Language;
import org.omegat.util.Log;
import org.omegat.util.OStrings;
import gen.core.segmentation.Languagemap;
import gen.core.segmentation.Languagerule;
import gen.core.segmentation.Srx;
/**
* The class with all the segmentation data possible -- rules, languages, etc.
* It loads and saves its data from/to SRX file.
*
* @author Maxym Mykhalchuk
*/
public class SRX implements Serializable {
private static final long serialVersionUID = 2182125877925944613L;
public static final String CONF_SENTSEG = "segmentation.conf";
/** Context for JAXB rules processing. */
protected static final JAXBContext SRX_JAXB_CONTEXT;
static {
try {
SRX_JAXB_CONTEXT = JAXBContext.newInstance(Srx.class);
} catch (LinkageError ex) {
throw new ExceptionInInitializerError(OStrings.getString("STARTUP_JAXB_LINKAGE_ERROR"));
} catch (JAXBException ex) {
if (ex.getMessage() != null) {
throw new ExceptionInInitializerError(ex.getMessage());
}
if (ex.getCause() != null) {
throw new ExceptionInInitializerError(ex.getCause().getClass().getName() + ": "
+ ex.getCause().getMessage());
}
throw new ExceptionInInitializerError(ex.getClass().getName());
}
}
/**
* Initializes SRX rules to defaults.
*/
private void init() {
this.mappingRules = new ArrayList<MapRule>();
this.includeEndingTags=true;
this.segmentSubflows=true;
initDefaults();
}
/**
* Creates an empty SRX, without any rules.
* <p>
* Please do not call directly unless you know what you are doing.
*/
public SRX() {
}
public SRX copy() {
SRX result = new SRX();
result.mappingRules = new ArrayList<MapRule>(mappingRules.size());
for (MapRule rule : mappingRules) {
result.mappingRules.add(rule.copy());
}
return result;
}
/**
* Saves segmentation rules into specified file.
*/
public static void saveTo(SRX srx, File outFile) throws IOException {
if (srx == null) {
outFile.delete();
return;
}
try {
srx.setVersion(CURRENT_VERSION);
XMLEncoder xmlenc = new XMLEncoder(new FileOutputStream(outFile));
xmlenc.writeObject(srx);
xmlenc.close();
} catch (IOException ioe) {
Log.logErrorRB("CORE_SRX_ERROR_SAVING_SEGMENTATION_CONFIG");
Log.log(ioe);
throw ioe;
}
}
/**
* Loads segmentation rules from an XML file. If there's an error loading a
* file, it calls <code>initDefaults</code>.
* <p>
* Since 1.6.0 RC8 it also checks if the version of segmentation rules saved
* is older than that of the current OmegaT, and tries to merge the two sets
* of rules.
*/
public static SRX loadSRX(File configFile) {
if (!configFile.exists()) {
return null;
}
SRX res;
try {
MyExceptionListener myel = new MyExceptionListener();
XMLDecoder xmldec = new XMLDecoder(new FileInputStream(configFile), null, myel);
res = (SRX) xmldec.readObject();
xmldec.close();
if (myel.isExceptionOccured()) {
StringBuilder sb = new StringBuilder();
for (Exception ex : myel.getExceptionsList()) {
sb.append(" ");
sb.append(ex);
sb.append("\n");
}
Log.logErrorRB("CORE_SRX_EXC_LOADING_SEG_RULES", sb.toString());
res = new SRX();
res.initDefaults();
return res;
}
// checking the version
if (CURRENT_VERSION.compareTo(res.getVersion()) > 0) {
// yeap, the segmentation config file is of the older version
// initing defaults
SRX defaults = new SRX();
defaults.initDefaults();
// and merging them into loaded rules
res = merge(res, defaults);
}
} catch (Exception e) {
// silently ignoring FNF
if (!(e instanceof FileNotFoundException))
Log.log(e);
res = new SRX();
res.initDefaults();
}
return res;
}
/**
* Does a config file already exists for the project at the given location?
* @param configDir the project directory for storage of settings file
*/
public static boolean projectConfigFileExists(String configDir) {
File configFile = new File(configDir + CONF_SENTSEG);
return configFile.exists();
}
/** Merges two sets of segmentation rules together. */
private static SRX merge(SRX current, SRX defaults) {
current = upgrade(current, defaults);
int defaultMapRulesN = defaults.getMappingRules().size();
for (int i = 0; i < defaultMapRulesN; i++) {
MapRule dmaprule = defaults.getMappingRules().get(i);
String dcode = dmaprule.getLanguageCode();
// trying to find
boolean found = false;
int currentMapRulesN = current.getMappingRules().size();
MapRule cmaprule = null;
for (int j = 0; j < currentMapRulesN; j++) {
cmaprule = current.getMappingRules().get(j);
String ccode = cmaprule.getLanguageCode();
if (dcode.equals(ccode)) {
found = true;
break;
}
}
if (found) {
// merging -- adding those rules not there in current list
List<Rule> crules = cmaprule.getRules();
List<Rule> drules = dmaprule.getRules();
for (Rule drule : drules) {
if (!crules.contains(drule)) {
if (drule.isBreakRule()) {
// breaks go to the end
crules.add(drule);
} else {
// exceptions go before the first break rule
int currentRulesN = crules.size();
int firstBreakRuleN = currentRulesN;
for (int k = 0; k < currentRulesN; k++) {
Rule crule = crules.get(k);
if (crule.isBreakRule()) {
firstBreakRuleN = k;
break;
}
}
crules.add(firstBreakRuleN, drule);
}
}
}
} else {
// just adding before the default rules
int englishN = currentMapRulesN;
for (int j = 0; j < currentMapRulesN; j++) {
cmaprule = current.getMappingRules().get(j);
String cpattern = cmaprule.getPattern();
if (DEFAULT_RULES_PATTERN.equals(cpattern)) {
englishN = j;
break;
}
}
current.getMappingRules().add(englishN, dmaprule);
}
}
return current;
}
/** Implements some upgrade heuristics. */
private static SRX upgrade(SRX current, SRX defaults) {
// renaming "Default (English)" to "Default"
// and removing English/Text/HTML-specific rules from there
if (OT160RC9_VERSION.equals(CURRENT_VERSION)) {
String DEF = "Default (English)";
for (int i = 0; i < current.getMappingRules().size(); i++) {
MapRule maprule = current.getMappingRules().get(i);
if (DEF.equals(maprule.getLanguageCode())) {
maprule.setLanguage(LanguageCodes.DEFAULT_CODE);
maprule.getRules().removeAll(getRulesForLanguage(defaults, LanguageCodes.ENGLISH_CODE));
maprule.getRules().removeAll(getRulesForLanguage(defaults, LanguageCodes.F_TEXT_CODE));
maprule.getRules().removeAll(getRulesForLanguage(defaults, LanguageCodes.F_HTML_CODE));
}
}
}
return current;
}
/**
* Find rules for specific language.
*
* @param source
* rules list
* @param langName
* language name
* @return list of rules
*/
private static List<Rule> getRulesForLanguage(final SRX source, String langName) {
for (MapRule mr : source.getMappingRules()) {
if (langName.equals(mr.getLanguageCode())) {
return mr.getRules();
}
}
return null;
}
/**
* My Own Class to listen to exceptions, occured while loading filters
* configuration.
*/
static class MyExceptionListener implements ExceptionListener {
private List<Exception> exceptionsList = new ArrayList<Exception>();
private boolean exceptionOccured = false;
public void exceptionThrown(Exception e) {
exceptionOccured = true;
exceptionsList.add(e);
}
/**
* Returns whether any exceptions occured.
*/
public boolean isExceptionOccured() {
return exceptionOccured;
}
/**
* Returns the list of occured exceptions.
*/
public List<Exception> getExceptionsList() {
return exceptionsList;
}
}
// Patterns
private static final String DEFAULT_RULES_PATTERN = ".*";
/**
* Initializes default rules.
*/
private void initDefaults() {
try {
List<MapRule> newMap = new ArrayList<MapRule>();
URL rulesUrl = this.getClass().getResource("defaultRules.srx");
Srx data = (Srx) SRX_JAXB_CONTEXT.createUnmarshaller().unmarshal(rulesUrl);
for (Languagerule rules : data.getBody().getLanguagerules().getLanguagerule()) {
String lang = rules.getLanguagerulename();
String pattern = DEFAULT_RULES_PATTERN;
for (Languagemap lm : data.getBody().getMaprules().getLanguagemap()) {
if (lm.getLanguagerulename().equals(rules.getLanguagerulename())) {
pattern = lm.getLanguagepattern();
break;
}
}
List<Rule> rulesList = new ArrayList<Rule>(rules.getRule().size());
for (gen.core.segmentation.Rule r : rules.getRule()) {
boolean isBreak = "yes".equalsIgnoreCase(r.getBreak());
rulesList.add(new Rule(isBreak, r.getBeforebreak().getContent(), r.getAfterbreak()
.getContent()));
}
newMap.add(new MapRule(lang, pattern, rulesList));
}
// set rules only if no errors
getMappingRules().addAll(newMap);
} catch (Exception ex) {
ex.printStackTrace();
}
}
public static SRX getDefault() {
SRX srx = new SRX();
srx.init();
return srx;
}
/**
* Finds the rules for a certain language.
* <p>
* Usually (if the user didn't screw up the setup) there're a default
* segmentation rules, so it's a good idea to rely on this method always
* returning at least some rules.
* <p>
* Or in case of a completely screwed setup -- an empty list without any
* rules.
*/
public List<Rule> lookupRulesForLanguage(Language srclang) {
List<Rule> rules = new ArrayList<Rule>();
for (int i = 0; i < getMappingRules().size(); i++) {
MapRule maprule = getMappingRules().get(i);
if (maprule.getCompiledPattern().matcher(srclang.getLanguage()).matches())
rules.addAll(maprule.getRules());
}
return rules;
}
/**
* Holds value of property segmentSubflows.
*/
private boolean segmentSubflows = true;
/**
* Getter for property segmentSubflows.
*
* @return Value of property segmentSubflows.
*/
public boolean isSegmentSubflows() {
return this.segmentSubflows;
}
/**
* Setter for property segmentSubflows.
*
* @param segmentSubflows
* New value of property segmentSubflows.
*/
public void setSegmentSubflows(boolean segmentSubflows) {
this.segmentSubflows = segmentSubflows;
}
/**
* Holds value of property includeStartingTags.
*/
private boolean includeStartingTags;
/**
* Getter for property includeStartingTags.
*
* @return Value of property includeStartingTags.
*/
public boolean isIncludeStartingTags() {
return this.includeStartingTags;
}
/**
* Setter for property includeStartingTags.
*
* @param includeStartingTags
* New value of property includeStartingTags.
*/
public void setIncludeStartingTags(boolean includeStartingTags) {
this.includeStartingTags = includeStartingTags;
}
/**
* Holds value of property includeEndingTags.
*/
private boolean includeEndingTags = true;
/**
* Getter for property includeEndingTags.
*
* @return Value of property includeEndingTags.
*/
public boolean isIncludeEndingTags() {
return this.includeEndingTags;
}
/**
* Setter for property includeEndingTags.
*
* @param includeEndingTags
* New value of property includeEndingTags.
*/
public void setIncludeEndingTags(boolean includeEndingTags) {
this.includeEndingTags = includeEndingTags;
}
/**
* Holds value of property includeIsolatedTags.
*/
private boolean includeIsolatedTags;
/**
* Getter for property includeIsolatedTags.
*
* @return Value of property includeIsolatedTags.
*/
public boolean isIncludeIsolatedTags() {
return this.includeIsolatedTags;
}
/**
* Setter for property includeIsolatedTags.
*
* @param includeIsolatedTags
* New value of property includeIsolatedTags.
*/
public void setIncludeIsolatedTags(boolean includeIsolatedTags) {
this.includeIsolatedTags = includeIsolatedTags;
}
/**
* Correspondences between languages and their segmentation rules. Each
* element is of class {@link MapRule}.
*/
private List<MapRule> mappingRules = new ArrayList<MapRule>();
/**
* Returns all mapping rules (of class {@link MapRule}) at once:
* correspondences between languages and their segmentation rules.
*/
public List<MapRule> getMappingRules() {
return mappingRules;
}
/**
* Sets all mapping rules (of class {@link MapRule}) at once:
* correspondences between languages and their segmentation rules.
*/
public void setMappingRules(List<MapRule> rules) {
mappingRules = rules;
}
// ////////////////////////////////////////////////////////////////
// Versioning properties to detect version upgrades
// and possibly do something if required
/** Initial version of segmentation support (1.4.6 beta 4 -- 1.6.0 RC7). */
public static final String INITIAL_VERSION = "0.2";
/** Segmentation support of 1.6.0 RC8 (a bit more rules added). */
public static final String OT160RC8_VERSION = "0.2.1";
/** Segmentation support of 1.6.0 RC9 (rules separated). */
public static final String OT160RC9_VERSION = "0.2.2";
/** Currently supported segmentation support version. */
public static final String CURRENT_VERSION = OT160RC9_VERSION;
/** Version of OmegaT segmentation support. */
private String version;
/** Returns segmentation support version. */
public String getVersion() {
return version;
}
/** Sets segmentation support version. */
public void setVersion(String value) {
version = value;
}
@Override
public int hashCode() {
final int prime = 31;
int result = 1;
result = prime * result + (includeEndingTags ? 1231 : 1237);
result = prime * result + (includeIsolatedTags ? 1231 : 1237);
result = prime * result + (includeStartingTags ? 1231 : 1237);
result = prime * result + ((mappingRules == null) ? 0 : mappingRules.hashCode());
result = prime * result + (segmentSubflows ? 1231 : 1237);
result = prime * result + ((version == null) ? 0 : version.hashCode());
return result;
}
@Override
public boolean equals(Object obj) {
if (this == obj)
return true;
if (obj == null)
return false;
if (getClass() != obj.getClass())
return false;
SRX other = (SRX) obj;
if (includeEndingTags != other.includeEndingTags)
return false;
if (includeIsolatedTags != other.includeIsolatedTags)
return false;
if (includeStartingTags != other.includeStartingTags)
return false;
if (mappingRules == null) {
if (other.mappingRules != null)
return false;
} else if (!mappingRules.equals(other.mappingRules))
return false;
if (segmentSubflows != other.segmentSubflows)
return false;
if (version == null) {
if (other.version != null)
return false;
} else if (!version.equals(other.version))
return false;
return true;
}
}