/**************************************************************************
OmegaT - Computer Assisted Translation (CAT) tool
with fuzzy matching, translation memory, keyword search,
glossaries, and translation leveraging into updated projects.
Copyright (C) 2000-2006 Keith Godfrey and Maxym Mykhalchuk
2006 Thomas Huriaux
2008 Martin Fleurke
2009 Alex Buloichik
2011 Didier Briel
2013-1014 Alex Buloichik, Enrique Estevez
2017 Didier Briel
Home page: http://www.omegat.org/
Support center: http://groups.yahoo.com/group/OmegaT/
This file is part of OmegaT.
OmegaT is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
OmegaT is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
**************************************************************************/
package org.omegat.filters2.po;
import java.awt.Window;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.omegat.core.data.ProtectedPart;
import org.omegat.core.data.SegmentProperties;
import org.omegat.filters2.AbstractFilter;
import org.omegat.filters2.FilterContext;
import org.omegat.filters2.Instance;
import org.omegat.filters2.TranslationException;
import org.omegat.util.Language;
import org.omegat.util.Log;
import org.omegat.util.OStrings;
import org.omegat.util.PatternConsts;
import org.omegat.util.StringUtil;
import org.omegat.util.TagUtil;
/**
* Filter to support po files (in various encodings).
*
* Format described on http://www.gnu.org/software/hello/manual/gettext/PO-Files.html
*
* Filter is not thread-safe !
*
* Filter uses msgctx field as path, and plural index as suffix of path.
*
* @author Keith Godfrey
* @author Maxym Mykhalchuk
* @author Thomas Huriaux
* @author Martin Fleurke
* @author Alex Buloichik (alex73mail@gmail.com)
* @author Didier Briel
* @author Enrique Estevez
*/
public class PoFilter extends AbstractFilter {
public static final String OPTION_ALLOW_BLANK = "disallowBlank";
public static final String OPTION_ALLOW_EDITING_BLANK_SEGMENT = "disallowEditingBlankSegment";
public static final String OPTION_SKIP_HEADER = "skipHeader";
public static final String OPTION_AUTO_FILL_IN_PLURAL_STATEMENT = "autoFillInPluralStatement";
public static final String OPTION_FORMAT_MONOLINGUAL = "monolingualFormat";
private static class PluralInfo {
public int plurals;
public String expression;
PluralInfo(int nrOfPlurals, String pluralExpression) {
plurals = nrOfPlurals;
expression = pluralExpression;
}
}
private static final Map<String, PluralInfo> PLURAL_INFOS;
static {
HashMap<String, PluralInfo> info = new HashMap<String, PluralInfo>();
// list taken from http://translate.sourceforge.net/wiki/l10n/pluralforms d.d. 14-09-2012
// See also http://unicode.org/repos/cldr-tmp/trunk/diff/supplemental/language_plural_rules.html
info.put("ach", new PluralInfo(2, "(n > 1)"));
info.put("af", new PluralInfo(2, "(n != 1)"));
info.put("ak", new PluralInfo(2, "(n > 1)"));
info.put("am", new PluralInfo(2, "(n > 1)"));
info.put("an", new PluralInfo(2, "(n != 1)"));
info.put("ar", new PluralInfo(6, " n==0 ? 0 : n==1 ? 1 : n==2 ? 2 : n%100>=3 && n%100<=10 ? 3 : n%100>=11 ? 4 : 5"));
info.put("arn", new PluralInfo(2, "(n > 1)"));
info.put("ast", new PluralInfo(2, "(n != 1)"));
info.put("ay", new PluralInfo(1, "0"));
info.put("az", new PluralInfo(2, "(n != 1) "));
info.put("be", new PluralInfo(3, "(n%10==1 && n%100!=11 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2)"));
info.put("bg", new PluralInfo(2, "(n != 1)"));
info.put("bn", new PluralInfo(2, "(n != 1)"));
info.put("bo", new PluralInfo(1, "0"));
info.put("br", new PluralInfo(2, "(n > 1)"));
info.put("brx", new PluralInfo(2, "(n != 1)"));
info.put("bs", new PluralInfo(3, "(n%10==1 && n%100!=11 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2) "));
info.put("ca", new PluralInfo(2, "(n != 1)"));
info.put("cgg", new PluralInfo(1, "0"));
info.put("cs", new PluralInfo(3, "(n==1) ? 0 : (n>=2 && n<=4) ? 1 : 2"));
info.put("csb", new PluralInfo(3, "n==1 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2"));
info.put("cy", new PluralInfo(4, " (n==1) ? 0 : (n==2) ? 1 : (n != 8 && n != 11) ? 2 : 3"));
info.put("da", new PluralInfo(2, "(n != 1)"));
info.put("de", new PluralInfo(2, "(n != 1)"));
info.put("doi", new PluralInfo(2, "(n != 1)"));
info.put("dz", new PluralInfo(1, "0"));
info.put("el", new PluralInfo(2, "(n != 1)"));
info.put("en", new PluralInfo(2, "(n != 1)"));
info.put("eo", new PluralInfo(2, "(n != 1)"));
info.put("es", new PluralInfo(2, "(n != 1)"));
info.put("et", new PluralInfo(2, "(n != 1)"));
info.put("eu", new PluralInfo(2, "(n != 1)"));
info.put("fa", new PluralInfo(1, "0"));
info.put("ff", new PluralInfo(2, "(n != 1)"));
info.put("fi", new PluralInfo(2, "(n != 1)"));
info.put("fil", new PluralInfo(2, "n > 1"));
info.put("fo", new PluralInfo(2, "(n != 1)"));
info.put("fr", new PluralInfo(2, "(n > 1)"));
info.put("fur", new PluralInfo(2, "(n != 1)"));
info.put("fy", new PluralInfo(2, "(n != 1)"));
info.put("ga", new PluralInfo(5, "n==1 ? 0 : n==2 ? 1 : n<7 ? 2 : n<11 ? 3 : 4"));
info.put("gd", new PluralInfo(4, "(n==1 || n==11) ? 0 : (n==2 || n==12) ? 1 : (n > 2 && n < 20) ? 2 : 3"));
info.put("gl", new PluralInfo(2, "(n != 1)"));
info.put("gu", new PluralInfo(2, "(n != 1)"));
info.put("gun", new PluralInfo(2, "(n > 1)"));
info.put("ha", new PluralInfo(2, "(n != 1)"));
info.put("he", new PluralInfo(2, "(n != 1)"));
info.put("hi", new PluralInfo(2, "(n != 1)"));
info.put("hne", new PluralInfo(2, "(n != 1)"));
info.put("hy", new PluralInfo(2, "(n != 1)"));
info.put("hr", new PluralInfo(3, "(n%10==1 && n%100!=11 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2)"));
info.put("hu", new PluralInfo(2, "(n != 1)"));
info.put("ia", new PluralInfo(2, "(n != 1)"));
info.put("id", new PluralInfo(1, "0"));
info.put("is", new PluralInfo(2, "(n%10!=1 || n%100==11)"));
info.put("it", new PluralInfo(2, "(n != 1)"));
info.put("ja", new PluralInfo(1, "0"));
info.put("jbo", new PluralInfo(1, "0"));
info.put("jv", new PluralInfo(2, "n!=0"));
info.put("ka", new PluralInfo(1, "0"));
info.put("kk", new PluralInfo(1, "0"));
info.put("km", new PluralInfo(1, "0"));
info.put("kn", new PluralInfo(2, "(n!=1)"));
info.put("ko", new PluralInfo(1, "0"));
info.put("ku", new PluralInfo(2, "(n!= 1)"));
info.put("kw", new PluralInfo(4, " (n==1) ? 0 : (n==2) ? 1 : (n == 3) ? 2 : 3"));
info.put("ky", new PluralInfo(1, "0"));
info.put("lb", new PluralInfo(2, "(n != 1)"));
info.put("ln", new PluralInfo(2, "n>1"));
info.put("lo", new PluralInfo(1, "0"));
info.put("lt", new PluralInfo(3, "(n%10==1 && n%100!=11 ? 0 : n%10>=2 && (n%100<10 or n%100>=20) ? 1 : 2)"));
info.put("lv", new PluralInfo(3, "(n%10==1 && n%100!=11 ? 0 : n != 0 ? 1 : 2)"));
info.put("mai", new PluralInfo(2, "(n != 1)"));
info.put("mfe", new PluralInfo(2, "(n > 1)"));
info.put("mg", new PluralInfo(2, "(n > 1)"));
info.put("mi", new PluralInfo(2, "(n > 1)"));
info.put("mk", new PluralInfo(2, " n==1 || n%10==1 ? 0 : 1"));
info.put("ml", new PluralInfo(2, "(n != 1)"));
info.put("mn", new PluralInfo(2, "(n != 1)"));
info.put("mni", new PluralInfo(2, "(n != 1)"));
info.put("mnk", new PluralInfo(3, "(n==0 ? 0 : n==1 ? 1 : 2"));
info.put("mr", new PluralInfo(2, "(n != 1)"));
info.put("ms", new PluralInfo(1, "0"));
info.put("mt", new PluralInfo(4, "(n==1 ? 0 : n==0 || ( n%100>1 && n%100<11) ? 1 : (n%100>10 && n%100<20 ) ? 2 : 3)"));
info.put("my", new PluralInfo(1, "0"));
info.put("nah", new PluralInfo(2, "(n != 1)"));
info.put("nap", new PluralInfo(2, "(n != 1)"));
info.put("nb", new PluralInfo(2, "(n != 1)"));
info.put("ne", new PluralInfo(2, "(n != 1)"));
info.put("nl", new PluralInfo(2, "(n != 1)"));
info.put("se", new PluralInfo(2, "(n != 1)"));
info.put("nn", new PluralInfo(2, "(n != 1)"));
info.put("no", new PluralInfo(2, "(n != 1)"));
info.put("nso", new PluralInfo(2, "(n != 1)"));
info.put("oc", new PluralInfo(2, "(n > 1)"));
info.put("or", new PluralInfo(2, "(n != 1)"));
info.put("ps", new PluralInfo(2, "(n != 1)"));
info.put("pa", new PluralInfo(2, "(n != 1)"));
info.put("pap", new PluralInfo(2, "(n != 1)"));
info.put("pl", new PluralInfo(3, "(n==1 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2)"));
info.put("pms", new PluralInfo(2, "(n != 1)"));
info.put("pt", new PluralInfo(2, "(n != 1)"));
info.put("rm", new PluralInfo(2, "(n!=1)"));
info.put("ro", new PluralInfo(3, "(n==1 ? 0 : (n==0 || (n%100 > 0 && n%100 < 20)) ? 1 : 2)"));
info.put("ru", new PluralInfo(3, "(n%10==1 && n%100!=11 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2)"));
info.put("rw", new PluralInfo(2, "(n != 1)"));
info.put("sah", new PluralInfo(1, "0"));
info.put("sat", new PluralInfo(2, "(n != 1)"));
info.put("sco", new PluralInfo(2, "(n != 1)"));
info.put("sd", new PluralInfo(2, "(n != 1)"));
info.put("si", new PluralInfo(2, "(n != 1)"));
info.put("sk", new PluralInfo(3, "(n==1) ? 0 : (n>=2 && n<=4) ? 1 : 2"));
info.put("sl", new PluralInfo(4, "(n%100==1 ? 1 : n%100==2 ? 2 : n%100==3 || n%100==4 ? 3 : 0)"));
info.put("so", new PluralInfo(2, "n != 1"));
info.put("son", new PluralInfo(2, "(n != 1)"));
info.put("sq", new PluralInfo(2, "(n != 1)"));
info.put("sr", new PluralInfo(3, "(n%10==1 && n%100!=11 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2)"));
info.put("su", new PluralInfo(1, "0"));
info.put("sw", new PluralInfo(2, "(n != 1)"));
info.put("sv", new PluralInfo(2, "(n != 1)"));
info.put("ta", new PluralInfo(2, "(n != 1)"));
info.put("te", new PluralInfo(2, "(n != 1)"));
info.put("tg", new PluralInfo(2, "(n > 1)"));
info.put("ti", new PluralInfo(2, "n > 1"));
info.put("th", new PluralInfo(1, "0"));
info.put("tk", new PluralInfo(2, "(n != 1)"));
info.put("tr", new PluralInfo(2, "(n>1)"));
info.put("tt", new PluralInfo(1, "0"));
info.put("ug", new PluralInfo(1, "0"));
info.put("uk", new PluralInfo(3, "(n%10==1 && n%100!=11 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2)"));
info.put("ur", new PluralInfo(2, "(n != 1)"));
info.put("uz", new PluralInfo(2, "(n > 1)"));
info.put("vi", new PluralInfo(1, "0"));
info.put("wa", new PluralInfo(2, "(n > 1)"));
info.put("wo", new PluralInfo(1, "0"));
info.put("yo", new PluralInfo(2, "(n != 1)"));
info.put("zh", new PluralInfo(1, "0 "));
PLURAL_INFOS = Collections.unmodifiableMap(info);
}
/**
* If true, non-translated segments will contain the source text in ms
*/
private boolean allowBlank = false;
/**
* If false, the blank source segments will be skipped (not shown in editor)
*/
private boolean allowEditingBlankSegment = false;
/**
* If true, the header will be skipped (not shown in editor)
*/
private boolean skipHeader = false;
/**
* If true, wrong but widely used format support, where msgid contains ID, msgstr contains original text.
*/
private boolean formatMonolingual = false;
/**
* If true, the "Plural-Forms: nplurals=INTEGER; plural=EXPRESSION;" section
* in the header will be updated with the correct INTEGER and EXPRESSION
* based on the chosen targetLanguage
*/
private boolean autoFillInPluralStatement = false;
protected static final Pattern COMMENT_FUZZY = Pattern.compile("#, fuzzy");
protected static final Pattern COMMENT_FUZZY_OTHER = Pattern.compile("#,.* fuzzy.*");
protected static final Pattern COMMENT_FUZZY_MSGID = Pattern.compile("#\\|.* msgid.*\"(.*)\"");
protected static final Pattern COMMENT_NOWRAP = Pattern.compile("#,.* no-wrap.*");
protected static final Pattern COMMENT_TRANSLATOR = Pattern.compile("# (.*)");
protected static final Pattern COMMENT_EXTRACTED = Pattern.compile("#\\. (.*)");
protected static final Pattern COMMENT_REFERENCE = Pattern.compile("#: (.*)");
protected static final Pattern MSG_ID = Pattern.compile("msgid(_plural)?\\s+\"(.*)\"");
protected static final Pattern MSG_STR = Pattern.compile("msgstr(\\[([0-9]+)\\])?\\s+\"(.*)\"");
protected static final Pattern MSG_CTX = Pattern.compile("msgctxt\\s+\"(.*)\"");
protected static final Pattern MSG_OTHER = Pattern.compile("\"(.*)\"");
protected static final Pattern PLURAL_FORMS = Pattern.compile("Plural-Forms: *nplurals= *([0-9]+) *; *plural",
Pattern.CASE_INSENSITIVE);
protected static final Pattern MSG_FUZZY = Pattern.compile("#\\|\\s\"(.*)\"");
enum MODE {
MSGID, MSGSTR, MSGID_PLURAL, MSGSTR_PLURAL, MSGCTX
};
private StringBuilder[] sources, targets;
private StringBuilder translatorComments, extractedComments, references, sourceFuzzyTrue;
private int plurals = 2;
private String path;
private boolean nowrap, fuzzy, fuzzyTrue;
private BufferedWriter out;
@Override
public String getFileFormatName() {
return OStrings.getString("POFILTER_FILTER_NAME");
}
@Override
public Instance[] getDefaultInstances() {
return new Instance[]
{ new Instance("*.po", StandardCharsets.UTF_8.name(), StandardCharsets.UTF_8.name()),
new Instance("*.pot", StandardCharsets.UTF_8.name(), StandardCharsets.UTF_8.name()) };
}
@Override
public boolean isSourceEncodingVariable() {
return true;
}
@Override
public boolean isTargetEncodingVariable() {
return true;
}
@Override
public String getFuzzyMark() {
return "PO-fuzzy";
}
@Override
public void processFile(File inFile, File outFile, FilterContext fc) throws IOException,
TranslationException {
String disallowBlankStr = processOptions.get(OPTION_ALLOW_BLANK);
allowBlank = disallowBlankStr == null || disallowBlankStr.equalsIgnoreCase("true");
String disallowEditingBlankSegmentStr = processOptions.get(OPTION_ALLOW_EDITING_BLANK_SEGMENT);
allowEditingBlankSegment = disallowEditingBlankSegmentStr == null
|| disallowEditingBlankSegmentStr.equalsIgnoreCase("true");
String skipHeaderStr = processOptions.get(OPTION_SKIP_HEADER);
skipHeader = "true".equalsIgnoreCase(skipHeaderStr);
String autoFillInPluralStatementStr = processOptions.get(OPTION_AUTO_FILL_IN_PLURAL_STATEMENT);
autoFillInPluralStatement = "true".equalsIgnoreCase(autoFillInPluralStatementStr);
String formatMonolingualStr = processOptions.get(OPTION_FORMAT_MONOLINGUAL);
formatMonolingual = "true".equalsIgnoreCase(formatMonolingualStr);
inEncodingLastParsedFile = fc.getInEncoding();
BufferedReader reader = createReader(inFile, inEncodingLastParsedFile);
try {
BufferedWriter writer;
if (outFile != null) {
writer = createWriter(outFile, fc.getOutEncoding());
} else {
writer = null;
}
try {
processFile(reader, writer, fc);
} finally {
if (writer != null) {
writer.close();
}
}
} finally {
reader.close();
}
}
@Override
protected void alignFile(BufferedReader sourceFile, BufferedReader translatedFile, FilterContext fc) throws Exception {
// BOM (byte order mark) bugfix
translatedFile.mark(1);
int ch = translatedFile.read();
if (ch != 0xFEFF) {
translatedFile.reset();
}
this.out = null;
processPoFile(translatedFile, fc);
}
@Override
public void processFile(BufferedReader in, BufferedWriter out, FilterContext fc) throws IOException {
// BOM (byte order mark) bugfix
in.mark(1);
int ch = in.read();
if (ch != 0xFEFF) {
in.reset();
}
this.out = out;
processPoFile(in, fc);
}
private void processPoFile(BufferedReader in, FilterContext fc) throws IOException {
fuzzy = false;
fuzzyTrue = false;
nowrap = false;
MODE currentMode = null;
int currentPlural = 0;
sources = new StringBuilder[2];
sources[0] = new StringBuilder();
sources[1] = new StringBuilder();
// can be overridden when header has been read and the number of plurals is different.
targets = new StringBuilder[2];
targets[0] = new StringBuilder();
targets[1] = new StringBuilder();
translatorComments = new StringBuilder();
extractedComments = new StringBuilder();
references = new StringBuilder();
sourceFuzzyTrue = new StringBuilder();
path = "";
String s;
while ((s = in.readLine()) != null) {
// We trim trailing spaces, otherwise the regexps could fail, thus making some segments
// invisible to OmegaT
s = s.trim();
// We have a real fuzzy
Matcher mTrueFuzzy = COMMENT_FUZZY_MSGID.matcher(s);
if (mTrueFuzzy.matches()) {
fuzzyTrue = true;
sourceFuzzyTrue.append(mTrueFuzzy.group(1));
continue;
}
/*
* Removing the fuzzy markers, as it has no meanings after being processed by omegat
*/
if (COMMENT_FUZZY.matcher(s).matches()) {
currentPlural = 0;
fuzzy = true;
flushTranslation(currentMode, fc);
continue;
} else if (COMMENT_FUZZY_OTHER.matcher(s).matches()) {
currentPlural = 0;
fuzzy = true;
flushTranslation(currentMode, fc);
s = s.replaceAll("(.*), fuzzy(.*)", "$1$2");
}
// FSM for po files
if (COMMENT_NOWRAP.matcher(s).matches()) {
currentPlural = 0;
flushTranslation(currentMode, fc);
/*
* Read the no-wrap comment, indicating that the creator of the po-file did not want long
* messages to be wrapped on multiple lines. See 5.6.2 no-wrap of http://docs.oasis-open
* .org/xliff/v1.2/xliff-profile-po/xliff -profile-po-1.2-cd02.html for an example.
*/
nowrap = true;
eol(s);
continue;
}
Matcher mId = MSG_ID.matcher(s);
if (mId.matches()) { // msg_id(_plural)
currentPlural = 0;
String text = mId.group(2);
if (mId.group(1) == null) {
// non-plural ID ('msg_id')
// we can start a new translation. Flush current translation.
// This has not happened when no empty lines are in between 'segments'.
if (sources[0].length() > 0) {
flushTranslation(currentMode, fc);
}
currentMode = MODE.MSGID;
sources[0].append(text);
} else {
// plural ID ('msg_id_plural')
currentMode = MODE.MSGID_PLURAL;
sources[1].append(text);
}
eol(s);
continue;
}
Matcher mStr = MSG_STR.matcher(s);
if (mStr.matches()) {
// Hack to be able to translate empty segments
// If the source segment is empty and there is a reference then
// it copies the reference of the segment and the localization note into the source segment
if (allowEditingBlankSegment && sources[0].length() == 0 && references.length() > 0) {
String aux = references.toString() + extractedComments.toString();
sources[0].append(aux);
}
String text = mStr.group(3);
if (mStr.group(1) == null) {
// non-plural lines
currentMode = MODE.MSGSTR;
targets[0].append(text);
currentPlural = 0;
} else {
currentMode = MODE.MSGSTR_PLURAL;
// plurals, i.e. msgstr[N] lines
currentPlural = Integer.parseInt(mStr.group(2));
if (currentPlural < plurals) {
targets[currentPlural].append(text);
}
}
continue;
}
Matcher mCtx = MSG_CTX.matcher(s);
if (mCtx.matches()) {
currentMode = MODE.MSGCTX;
currentPlural = 0;
path = mCtx.group(1);
eol(s);
continue;
}
Matcher mReference = COMMENT_REFERENCE.matcher(s);
if (mReference.matches()) {
currentPlural = 0;
references.append(mReference.group(1));
references.append("\n");
eol(s);
continue;
}
Matcher mExtracted = COMMENT_EXTRACTED.matcher(s);
if (mExtracted.matches()) {
currentPlural = 0;
extractedComments.append(mExtracted.group(1));
extractedComments.append("\n");
eol(s);
continue;
}
Matcher mTranslator = COMMENT_TRANSLATOR.matcher(s);
if (mTranslator.matches()) {
currentPlural = 0;
translatorComments.append(mTranslator.group(1));
translatorComments.append("\n");
eol(s);
continue;
}
// True fuzzy
Matcher mMsgFuzzy = MSG_FUZZY.matcher(s);
if (mMsgFuzzy.matches()) {
sourceFuzzyTrue.append(mMsgFuzzy.group(1));
continue;
}
Matcher mOther = MSG_OTHER.matcher(s);
if (mOther.matches()) {
String text = mOther.group(1);
if (currentMode == null) {
throw new IOException(OStrings.getString("POFILTER_INVALID_FORMAT"));
}
switch (currentMode) {
case MSGID:
sources[0].append(text);
eol(s);
break;
case MSGID_PLURAL:
sources[1].append(text);
eol(s);
break;
case MSGSTR:
targets[0].append(text);
break;
case MSGSTR_PLURAL:
targets[currentPlural].append(text);
break;
case MSGCTX:
eol(s);
break;
}
continue;
}
flushTranslation(currentMode, fc);
eol(s);
}
flushTranslation(currentMode, fc);
}
protected void eol(String s) throws IOException {
if (out != null) {
out.write(s);
out.write('\n');
}
}
protected void align(int pair) {
String pathSuffix;
String s;
String c = "";
if (pair > 0) {
s = unescape(sources[1].toString());
pathSuffix = "[" + pair + "]";
c += StringUtil.format(OStrings.getString("POFILTER_PLURAL_FORM_COMMENT"), pair) + "\n";
} else {
s = unescape(sources[pair].toString());
pathSuffix = "";
}
String t = unescape(targets[pair].toString());
if (translatorComments.length() > 0) {
c += OStrings.getString("POFILTER_TRANSLATOR_COMMENTS") + "\n"
+ unescape(translatorComments.toString() + "\n");
}
if (extractedComments.length() > 0) {
c += OStrings.getString("POFILTER_EXTRACTED_COMMENTS") + "\n"
+ unescape(extractedComments.toString() + "\n");
}
if (references.length() > 0) {
c += OStrings.getString("POFILTER_REFERENCES") + "\n" + unescape(references.toString() + "\n");
}
if (c.length() == 0) {
c = null;
}
align(s, t, c, pathSuffix);
}
/**
*
* @param source
* @param translation
* @param comments
* @param pathSuffix
* suffix for path to distinguish plural forms. It will be empty for first one, and [1],[2],...
* for next
*/
protected void align(String source, String translation, String comments, String pathSuffix) {
if (translation.isEmpty()) {
translation = null;
}
if (entryParseCallback != null) {
if (formatMonolingual) {
List<ProtectedPart> protectedParts = TagUtil.applyCustomProtectedParts(translation,
PatternConsts.PRINTF_VARS, null);
entryParseCallback.addEntry(source, translation, null, fuzzy, comments, path + pathSuffix,
this, protectedParts);
} else {
List<ProtectedPart> protectedParts = TagUtil.applyCustomProtectedParts(source,
PatternConsts.PRINTF_VARS, null);
entryParseCallback.addEntry(null, source, translation, fuzzy, comments, path + pathSuffix, this,
protectedParts);
if (fuzzyTrue) { // We add a reference entry
String[] props = { SegmentProperties.COMMENT, comments, SegmentProperties.REFERENCE, "true" };
entryParseCallback.addEntryWithProperties(null, sourceFuzzyTrue.toString(), translation, false,
props, path + pathSuffix, this, null);
fuzzyTrue = false;
}
}
} else if (entryAlignCallback != null) {
entryAlignCallback.addTranslation(null, source, translation, fuzzy, path + pathSuffix, this);
}
}
protected void alignHeader(String header, FilterContext fc) {
if (entryParseCallback != null && !skipHeader) {
header = unescape(autoFillInPluralStatement(header, fc));
List<ProtectedPart> protectedParts = TagUtil.applyCustomProtectedParts(header,
PatternConsts.PRINTF_VARS, null);
entryParseCallback.addEntry(null, header, null, false, null, path, this, protectedParts);
}
}
protected void flushTranslation(MODE currentMode, FilterContext fc) throws IOException {
if (sources[0].length() == 0 && path.isEmpty()) {
if (targets[0].length() == 0) {
// there is no text to translate yet
return;
} else {
// header
// check existing plural statement. If it contains the number of plurals, then use it!
StringBuilder targets0 = targets[0];
String header = targets[0].toString();
Matcher pluralMatcher = PLURAL_FORMS.matcher(header);
if (pluralMatcher.find()) {
String nrOfPluralsString = header.substring(pluralMatcher.start(1), pluralMatcher.end(1));
plurals = Integer.parseInt(nrOfPluralsString);
} else {
//else use predefined number of plurals, if it exists
Language targetLang = fc.getTargetLang();
String lang = targetLang.getLanguageCode().toLowerCase();
PluralInfo pluralInfo = PLURAL_INFOS.get(lang);
if (pluralInfo != null) {
plurals = pluralInfo.plurals;
}
}
//update the number of targets according to new plural number
targets = new StringBuilder[plurals];
targets[0] = targets0;
for (int i = 1; i < plurals; i++) {
targets[i] = new StringBuilder();
}
if (out != null) {
// Header is always written
out.write("msgstr " + getTranslation(null, targets[0], false, true, fc, 0) + "\n");
} else {
alignHeader(targets[0].toString(), fc);
}
}
fuzzy = false;
} else {
// source exist
if (sources[1].length() == 0) {
// non-plurals
if (out != null) {
if (formatMonolingual) {
out.write("msgstr "
+ getTranslation(sources[0].toString(), targets[0], allowBlank, false, fc, 0)
+ "\n");
} else {
out.write("msgstr " + getTranslation(null, sources[0], allowBlank, false, fc, 0)
+ "\n");
}
} else {
align(0);
}
} else {
// plurals
if (out != null) {
out.write("msgstr[0] " + getTranslation(null, sources[0], allowBlank, false, fc, 0) + "\n");
for (int i = 1; i < plurals; i++) {
out.write("msgstr[" + i + "] " + getTranslation(null, sources[1], allowBlank, false, fc, i)
+ "\n");
}
} else {
align(0);
for (int i = 1; i < plurals; i++) {
align(i);
}
}
}
fuzzy = false;
}
sources[0].setLength(0);
sources[1].setLength(0);
for (int i = 0; i < plurals; i++) {
targets[i].setLength(0);
}
path = "";
translatorComments.setLength(0);
extractedComments.setLength(0);
references.setLength(0);
sourceFuzzyTrue.setLength(0);
}
protected static final Pattern R1 = Pattern.compile("(?<!\\\\)((\\\\\\\\)*)\\\\\"");
protected static final Pattern R2 = Pattern.compile("(?<!\\\\)((\\\\\\\\)*)\\\\n");
protected static final Pattern R3 = Pattern.compile("(?<!\\\\)((\\\\\\\\)*)\\\\t");
protected static final Pattern R4 = Pattern.compile("^\\\\n");
/**
* Private processEntry to do pre- and postprocessing.<br>
* The given entry is interpreted to a string (e.g. escaped quotes are unescaped, '\n' is translated into newline
* character, '\t' into tab character.) then translated and then returned as a PO-string-notation (e.g. double
* quotes escaped, newline characters represented as '\n' and surrounded by double quotes, possibly split up over
* multiple lines)<Br>
* Long translations are not split up over multiple lines as some PO editors do, but when there are newline
* characters in a translation, it is split up at the newline markers.<Br>
* If the nowrap parameter is true, a translation that exists of multiple lines starts with an empty string-line to
* left-align all lines. [With nowrap set to true, long lines are also never wrapped (except for at newline
* characters), but that was already not done without nowrap.] [ 1869069 ] Escape support for PO
*
* @param en
* The entire source text
* @param allowNull
* Allow to output a blank translation in msgstr
* @param isHeader
* is the given string the PO-header string?
* @param fc
* The FilterContext, for targetLanguage
* @param plural
* if the source text is a plural, which plural number / variant are we on? 0 = no plural, 1.. are the
* plurals for the given target language.
* @return The translated entry, within double quotes on each line (thus ready to be printed to target file
* immediately)
**/
private String getTranslation(String id, StringBuilder en, boolean allowNull, boolean isHeader, FilterContext fc,
int plural) {
String entry = unescape(en.toString());
String pathSuffix;
if (plural > 0) {
pathSuffix = "[" + plural + "]";
} else {
pathSuffix = "";
}
// Do real translation
String translation = null;
if (isHeader) {
entry = autoFillInPluralStatement(entry, fc);
}
if (isHeader && skipHeader) {
translation = entry;
} else {
translation = entryTranslateCallback.getTranslation(id, entry, path + pathSuffix);
}
if (translation == null && !allowNull) { // We write the source in translation
translation = entry;
}
if (translation != null) {
return "\"" + escape(translation) + "\"";
} else {
return "\"\"";
}
}
/**
* Replaces Plural-Forms: nplurals=INTEGER; plural=EXPRESSION; when selected
* @param header The header text that contains the Plural-forms line.
* @return Header with the correct plural forms line according to target language.
*/
private String autoFillInPluralStatement(String header, FilterContext fc) {
if (autoFillInPluralStatement) {
Language targetLang = fc.getTargetLang();
String lang = targetLang.getLanguageCode().toLowerCase();
PluralInfo pluralInfo = PLURAL_INFOS.get(lang);
if (pluralInfo != null) {
return header.replaceAll("Plural-Forms: nplurals=INTEGER; plural=EXPRESSION;",
"Plural-Forms: nplurals=" + pluralInfo.plurals + "; plural=" + pluralInfo.expression + ";");
}
}
return header;
}
/**
* Unescape text from .po format.
*/
private String unescape(String entry) {
// Removes escapes from quotes. ( \" becomes " unless the \
// was escaped itself.) The number of preceding slashes before \"
// should not be odd, else the \ is escaped and not part of \".
// The regex is: no backslash before an optional even number
// of backslashes before \". Replace only the \" with " and keep the
// other escaped backslashes )
entry = R1.matcher(entry).replaceAll("$1\"");
// Interprets newline sequence, except when preceded by \
// \n becomes Linefeed, unless the \ was escaped itself.
// The number of preceding slashes before \n should not be odd,
// else the \ is escaped and not part of \n.
// The regex is: no backslash before an optional even number of
// backslashes before \n. Replace only the \n with <newline> and
// keep
// the other escaped backslashes.
entry = R2.matcher(entry).replaceAll("$1\n");
// same for \t, the tab character
entry = R3.matcher(entry).replaceAll("$1\t");
// Interprets newline sequence at the beginning of a line
entry = R4.matcher(entry).replaceAll("\\\n");
// Removes escape from backslash
entry = entry.replace("\\\\", "\\");
return entry;
}
/**
* Escape text to .po format.
*/
private String escape(String translation) {
// Escapes backslash
translation = translation.replace("\\", "\\\\");
// Adds escapes to quotes. ( " becomes \" )
translation = translation.replace("\"", "\\\"");
/*
* Normally, long lines are wrapped at 'output page width', which defaults to ?76?, and always at
* newlines. IF the no-wrap indicator is present, long lines should not be wrapped, except on newline
* characters, in which case the first line should be empty, so that the different lines are aligned
* the same. OmegaT < 2.0 has never wrapped any line, and it is quite useless when the po-file is not
* edited with a plain-text-editor. But it is simple to wrap at least at newline characters (which is
* necessary for the translation of the po-header anyway) We can also honor the no-wrap instruction at
* least by letting the first line of a multi-line translation not be on the same line as 'msgstr'.
*/
// Interprets newline chars. 'blah<br>blah' becomes
// 'blah\n"<br>"blah'
translation = translation.replace("\n", "\\n\"\n\"");
// don't make empty new line at the end (in case the last 'blah' is
// empty string)
if (translation.endsWith("\"\n\"")) {
translation = translation.substring(0, translation.length() - 3);
}
if (nowrap && translation.contains("\n")) {
// start with empty string, to align all lines of translation
translation = "\"\n\"" + translation;
}
// Interprets tab chars. 'blah<tab>blah' becomes 'blah\tblah'
// (<tab> representing the tab character '\u0009')
translation = translation.replace("\t", "\\t");
return translation;
}
@Override
public Map<String, String> changeOptions(Window parent, Map<String, String> config) {
try {
PoOptionsDialog dialog = new PoOptionsDialog(parent, config);
dialog.setVisible(true);
if (PoOptionsDialog.RET_OK == dialog.getReturnStatus()) {
return dialog.getOptions();
} else {
return null;
}
} catch (Exception e) {
Log.log(OStrings.getString("POFILTER_EXCEPTION"));
Log.log(e);
return null;
}
}
/**
* Returns true to indicate that Text filter has options.
*
* @return True, because the PO filter has options.
*/
@Override
public boolean hasOptions() {
return true;
}
@Override
public boolean isBilingual() {
return true;
}
}