PoFilter.java example

Explorer
OmegaT-master
/**************************************************************************
 OmegaT - Computer Assisted Translation (CAT) tool
          with fuzzy matching, translation memory, keyword search,
          glossaries, and translation leveraging into updated projects.

 Copyright (C) 2000-2006 Keith Godfrey and Maxym Mykhalchuk
               2006 Thomas Huriaux
               2008 Martin Fleurke
               2009 Alex Buloichik
               2011 Didier Briel
               2013-1014 Alex Buloichik, Enrique Estevez
               2017 Didier Briel
               Home page: http://www.omegat.org/
               Support center: http://groups.yahoo.com/group/OmegaT/

 This file is part of OmegaT.

 OmegaT is free software: you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
 the Free Software Foundation, either version 3 of the License, or
 (at your option) any later version.

 OmegaT is distributed in the hope that it will be useful,
 but WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 GNU General Public License for more details.

 You should have received a copy of the GNU General Public License
 along with this program.  If not, see <http://www.gnu.org/licenses/>.
 **************************************************************************/

package org.omegat.filters2.po;

import java.awt.Window;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.omegat.core.data.ProtectedPart;
import org.omegat.core.data.SegmentProperties;
import org.omegat.filters2.AbstractFilter;
import org.omegat.filters2.FilterContext;
import org.omegat.filters2.Instance;
import org.omegat.filters2.TranslationException;
import org.omegat.util.Language;
import org.omegat.util.Log;
import org.omegat.util.OStrings;
import org.omegat.util.PatternConsts;
import org.omegat.util.StringUtil;
import org.omegat.util.TagUtil;

/**
 * Filter to support po files (in various encodings).
 *
 * Format described on http://www.gnu.org/software/hello/manual/gettext/PO-Files.html
 *
 * Filter is not thread-safe !
 *
 * Filter uses msgctx field as path, and plural index as suffix of path.
 *
 * @author Keith Godfrey
 * @author Maxym Mykhalchuk
 * @author Thomas Huriaux
 * @author Martin Fleurke
 * @author Alex Buloichik (alex73mail@gmail.com)
 * @author Didier Briel
 * @author Enrique Estevez
 */
public class PoFilter extends AbstractFilter {

    public static final String OPTION_ALLOW_BLANK = "disallowBlank";
    public static final String OPTION_ALLOW_EDITING_BLANK_SEGMENT = "disallowEditingBlankSegment";
    public static final String OPTION_SKIP_HEADER = "skipHeader";
    public static final String OPTION_AUTO_FILL_IN_PLURAL_STATEMENT = "autoFillInPluralStatement";
    public static final String OPTION_FORMAT_MONOLINGUAL = "monolingualFormat";

    private static class PluralInfo {
        public int plurals;
        public String  expression;

        PluralInfo(int nrOfPlurals, String pluralExpression) {
            plurals = nrOfPlurals;
            expression = pluralExpression;
        }
    }

    private static final Map<String, PluralInfo> PLURAL_INFOS;
    static {
        HashMap<String, PluralInfo> info = new HashMap<String, PluralInfo>();
        // list taken from http://translate.sourceforge.net/wiki/l10n/pluralforms d.d. 14-09-2012
        // See also http://unicode.org/repos/cldr-tmp/trunk/diff/supplemental/language_plural_rules.html
        info.put("ach", new PluralInfo(2, "(n > 1)"));
        info.put("af", new PluralInfo(2, "(n != 1)"));
        info.put("ak", new PluralInfo(2, "(n > 1)"));
        info.put("am", new PluralInfo(2, "(n > 1)"));
        info.put("an", new PluralInfo(2, "(n != 1)"));
        info.put("ar", new PluralInfo(6, " n==0 ? 0 : n==1 ? 1 : n==2 ? 2 : n%100>=3 && n%100<=10 ? 3 : n%100>=11 ? 4 : 5"));
        info.put("arn", new PluralInfo(2, "(n > 1)"));
        info.put("ast", new PluralInfo(2, "(n != 1)"));
        info.put("ay", new PluralInfo(1, "0"));
        info.put("az", new PluralInfo(2, "(n != 1) "));
        info.put("be", new PluralInfo(3, "(n%10==1 && n%100!=11 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2)"));
        info.put("bg", new PluralInfo(2, "(n != 1)"));
        info.put("bn", new PluralInfo(2, "(n != 1)"));
        info.put("bo", new PluralInfo(1, "0"));
        info.put("br", new PluralInfo(2, "(n > 1)"));
        info.put("brx", new PluralInfo(2, "(n != 1)"));
        info.put("bs", new PluralInfo(3, "(n%10==1 && n%100!=11 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2) "));
        info.put("ca", new PluralInfo(2, "(n != 1)"));
        info.put("cgg", new PluralInfo(1, "0"));
        info.put("cs", new PluralInfo(3, "(n==1) ? 0 : (n>=2 && n<=4) ? 1 : 2"));
        info.put("csb", new PluralInfo(3, "n==1 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2"));
        info.put("cy", new PluralInfo(4, " (n==1) ? 0 : (n==2) ? 1 : (n != 8 && n != 11) ? 2 : 3"));
        info.put("da", new PluralInfo(2, "(n != 1)"));
        info.put("de", new PluralInfo(2, "(n != 1)"));
        info.put("doi", new PluralInfo(2, "(n != 1)"));
        info.put("dz", new PluralInfo(1, "0"));
        info.put("el", new PluralInfo(2, "(n != 1)"));
        info.put("en", new PluralInfo(2, "(n != 1)"));
        info.put("eo", new PluralInfo(2, "(n != 1)"));
        info.put("es", new PluralInfo(2, "(n != 1)"));
        info.put("et", new PluralInfo(2, "(n != 1)"));
        info.put("eu", new PluralInfo(2, "(n != 1)"));
        info.put("fa", new PluralInfo(1, "0"));
        info.put("ff", new PluralInfo(2, "(n != 1)"));
        info.put("fi", new PluralInfo(2, "(n != 1)"));
        info.put("fil", new PluralInfo(2, "n > 1"));
        info.put("fo", new PluralInfo(2, "(n != 1)"));
        info.put("fr", new PluralInfo(2, "(n > 1)"));
        info.put("fur", new PluralInfo(2, "(n != 1)"));
        info.put("fy", new PluralInfo(2, "(n != 1)"));
        info.put("ga", new PluralInfo(5, "n==1 ? 0 : n==2 ? 1 : n<7 ? 2 : n<11 ? 3 : 4"));
        info.put("gd", new PluralInfo(4, "(n==1 || n==11) ? 0 : (n==2 || n==12) ? 1 : (n > 2 && n < 20) ? 2 : 3"));
        info.put("gl", new PluralInfo(2, "(n != 1)"));
        info.put("gu", new PluralInfo(2, "(n != 1)"));
        info.put("gun", new PluralInfo(2, "(n > 1)"));
        info.put("ha", new PluralInfo(2, "(n != 1)"));
        info.put("he", new PluralInfo(2, "(n != 1)"));
        info.put("hi", new PluralInfo(2, "(n != 1)"));
        info.put("hne", new PluralInfo(2, "(n != 1)"));
        info.put("hy", new PluralInfo(2, "(n != 1)"));
        info.put("hr", new PluralInfo(3, "(n%10==1 && n%100!=11 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2)"));
        info.put("hu", new PluralInfo(2, "(n != 1)"));
        info.put("ia", new PluralInfo(2, "(n != 1)"));
        info.put("id", new PluralInfo(1, "0"));
        info.put("is", new PluralInfo(2, "(n%10!=1 || n%100==11)"));
        info.put("it", new PluralInfo(2, "(n != 1)"));
        info.put("ja", new PluralInfo(1, "0"));
        info.put("jbo", new PluralInfo(1, "0"));
        info.put("jv", new PluralInfo(2, "n!=0"));
        info.put("ka", new PluralInfo(1, "0"));
        info.put("kk", new PluralInfo(1, "0"));
        info.put("km", new PluralInfo(1, "0"));
        info.put("kn", new PluralInfo(2, "(n!=1)"));
        info.put("ko", new PluralInfo(1, "0"));
        info.put("ku", new PluralInfo(2, "(n!= 1)"));
        info.put("kw", new PluralInfo(4, " (n==1) ? 0 : (n==2) ? 1 : (n == 3) ? 2 : 3"));
        info.put("ky", new PluralInfo(1, "0"));
        info.put("lb", new PluralInfo(2, "(n != 1)"));
        info.put("ln", new PluralInfo(2, "n>1"));
        info.put("lo", new PluralInfo(1, "0"));
        info.put("lt", new PluralInfo(3, "(n%10==1 && n%100!=11 ? 0 : n%10>=2 && (n%100<10 or n%100>=20) ? 1 : 2)"));
        info.put("lv", new PluralInfo(3, "(n%10==1 && n%100!=11 ? 0 : n != 0 ? 1 : 2)"));
        info.put("mai", new PluralInfo(2, "(n != 1)"));
        info.put("mfe", new PluralInfo(2, "(n > 1)"));
        info.put("mg", new PluralInfo(2, "(n > 1)"));
        info.put("mi", new PluralInfo(2, "(n > 1)"));
        info.put("mk", new PluralInfo(2, " n==1 || n%10==1 ? 0 : 1"));
        info.put("ml", new PluralInfo(2, "(n != 1)"));
        info.put("mn", new PluralInfo(2, "(n != 1)"));
        info.put("mni", new PluralInfo(2, "(n != 1)"));
        info.put("mnk", new PluralInfo(3, "(n==0 ? 0 : n==1 ? 1 : 2"));
        info.put("mr", new PluralInfo(2, "(n != 1)"));
        info.put("ms", new PluralInfo(1, "0"));
        info.put("mt", new PluralInfo(4, "(n==1 ? 0 : n==0 || ( n%100>1 && n%100<11) ? 1 : (n%100>10 && n%100<20 ) ? 2 : 3)"));
        info.put("my", new PluralInfo(1, "0"));
        info.put("nah", new PluralInfo(2, "(n != 1)"));
        info.put("nap", new PluralInfo(2, "(n != 1)"));
        info.put("nb", new PluralInfo(2, "(n != 1)"));
        info.put("ne", new PluralInfo(2, "(n != 1)"));
        info.put("nl", new PluralInfo(2, "(n != 1)"));
        info.put("se", new PluralInfo(2, "(n != 1)"));
        info.put("nn", new PluralInfo(2, "(n != 1)"));
        info.put("no", new PluralInfo(2, "(n != 1)"));
        info.put("nso", new PluralInfo(2, "(n != 1)"));
        info.put("oc", new PluralInfo(2, "(n > 1)"));
        info.put("or", new PluralInfo(2, "(n != 1)"));
        info.put("ps", new PluralInfo(2, "(n != 1)"));
        info.put("pa", new PluralInfo(2, "(n != 1)"));
        info.put("pap", new PluralInfo(2, "(n != 1)"));
        info.put("pl", new PluralInfo(3, "(n==1 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2)"));
        info.put("pms", new PluralInfo(2, "(n != 1)"));
        info.put("pt", new PluralInfo(2, "(n != 1)"));
        info.put("rm", new PluralInfo(2, "(n!=1)"));
        info.put("ro", new PluralInfo(3, "(n==1 ? 0 : (n==0 || (n%100 > 0 && n%100 < 20)) ? 1 : 2)"));
        info.put("ru", new PluralInfo(3, "(n%10==1 && n%100!=11 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2)"));
        info.put("rw", new PluralInfo(2, "(n != 1)"));
        info.put("sah", new PluralInfo(1, "0"));
        info.put("sat", new PluralInfo(2, "(n != 1)"));
        info.put("sco", new PluralInfo(2, "(n != 1)"));
        info.put("sd", new PluralInfo(2, "(n != 1)"));
        info.put("si", new PluralInfo(2, "(n != 1)"));
        info.put("sk", new PluralInfo(3, "(n==1) ? 0 : (n>=2 && n<=4) ? 1 : 2"));
        info.put("sl", new PluralInfo(4, "(n%100==1 ? 1 : n%100==2 ? 2 : n%100==3 || n%100==4 ? 3 : 0)"));
        info.put("so", new PluralInfo(2, "n != 1"));
        info.put("son", new PluralInfo(2, "(n != 1)"));
        info.put("sq", new PluralInfo(2, "(n != 1)"));
        info.put("sr", new PluralInfo(3, "(n%10==1 && n%100!=11 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2)"));
        info.put("su", new PluralInfo(1, "0"));
        info.put("sw", new PluralInfo(2, "(n != 1)"));
        info.put("sv", new PluralInfo(2, "(n != 1)"));
        info.put("ta", new PluralInfo(2, "(n != 1)"));
        info.put("te", new PluralInfo(2, "(n != 1)"));
        info.put("tg", new PluralInfo(2, "(n > 1)"));
        info.put("ti", new PluralInfo(2, "n > 1"));
        info.put("th", new PluralInfo(1, "0"));
        info.put("tk", new PluralInfo(2, "(n != 1)"));
        info.put("tr", new PluralInfo(2, "(n>1)"));
        info.put("tt", new PluralInfo(1, "0"));
        info.put("ug", new PluralInfo(1, "0"));
        info.put("uk", new PluralInfo(3, "(n%10==1 && n%100!=11 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2)"));
        info.put("ur", new PluralInfo(2, "(n != 1)"));
        info.put("uz", new PluralInfo(2, "(n > 1)"));
        info.put("vi", new PluralInfo(1, "0"));
        info.put("wa", new PluralInfo(2, "(n > 1)"));
        info.put("wo", new PluralInfo(1, "0"));
        info.put("yo", new PluralInfo(2, "(n != 1)"));
        info.put("zh", new PluralInfo(1, "0 "));
        PLURAL_INFOS = Collections.unmodifiableMap(info);
    }

    /**
     * If true, non-translated segments will contain the source text in ms
     */
    private boolean allowBlank = false;
    /**
     * If false, the blank source segments will be skipped (not shown in editor)
     */
    private boolean allowEditingBlankSegment = false;
    /**
     * If true, the header will be skipped (not shown in editor)
     */
    private boolean skipHeader = false;
    /**
     * If true, wrong but widely used format support, where msgid contains ID, msgstr contains original text.
     */
    private boolean formatMonolingual = false;

    /**
     * If true, the "Plural-Forms: nplurals=INTEGER; plural=EXPRESSION;" section
     * in the header will be updated with the correct INTEGER and EXPRESSION
     * based on the chosen targetLanguage
     */
    private boolean autoFillInPluralStatement = false;

    protected static final Pattern COMMENT_FUZZY = Pattern.compile("#, fuzzy");
    protected static final Pattern COMMENT_FUZZY_OTHER = Pattern.compile("#,.* fuzzy.*");
    protected static final Pattern COMMENT_FUZZY_MSGID = Pattern.compile("#\\|.* msgid.*\"(.*)\"");
    protected static final Pattern COMMENT_NOWRAP = Pattern.compile("#,.* no-wrap.*");
    protected static final Pattern COMMENT_TRANSLATOR = Pattern.compile("# (.*)");
    protected static final Pattern COMMENT_EXTRACTED = Pattern.compile("#\\. (.*)");
    protected static final Pattern COMMENT_REFERENCE = Pattern.compile("#: (.*)");
    protected static final Pattern MSG_ID = Pattern.compile("msgid(_plural)?\\s+\"(.*)\"");
    protected static final Pattern MSG_STR = Pattern.compile("msgstr(\\[([0-9]+)\\])?\\s+\"(.*)\"");
    protected static final Pattern MSG_CTX = Pattern.compile("msgctxt\\s+\"(.*)\"");
    protected static final Pattern MSG_OTHER = Pattern.compile("\"(.*)\"");
    protected static final Pattern PLURAL_FORMS = Pattern.compile("Plural-Forms: *nplurals= *([0-9]+) *; *plural",
            Pattern.CASE_INSENSITIVE);
    protected static final Pattern MSG_FUZZY = Pattern.compile("#\\|\\s\"(.*)\"");

    enum MODE {
        MSGID, MSGSTR, MSGID_PLURAL, MSGSTR_PLURAL, MSGCTX
    };

    private StringBuilder[] sources, targets;
    private StringBuilder translatorComments, extractedComments, references, sourceFuzzyTrue;
    private int plurals = 2;
    private String path;
    private boolean nowrap, fuzzy, fuzzyTrue;

    private BufferedWriter out;

    @Override
    public String getFileFormatName() {
        return OStrings.getString("POFILTER_FILTER_NAME");
    }

    @Override
    public Instance[] getDefaultInstances() {
        return new Instance[]
        { new Instance("*.po", StandardCharsets.UTF_8.name(), StandardCharsets.UTF_8.name()),
                new Instance("*.pot", StandardCharsets.UTF_8.name(), StandardCharsets.UTF_8.name()) };
    }

    @Override
    public boolean isSourceEncodingVariable() {
        return true;
    }

    @Override
    public boolean isTargetEncodingVariable() {
        return true;
    }

    @Override
    public String getFuzzyMark() {
        return "PO-fuzzy";
    }

    @Override
    public void processFile(File inFile, File outFile, FilterContext fc) throws IOException,
            TranslationException {

        String disallowBlankStr = processOptions.get(OPTION_ALLOW_BLANK);
        allowBlank = disallowBlankStr == null || disallowBlankStr.equalsIgnoreCase("true");

        String disallowEditingBlankSegmentStr = processOptions.get(OPTION_ALLOW_EDITING_BLANK_SEGMENT);
        allowEditingBlankSegment = disallowEditingBlankSegmentStr == null
                || disallowEditingBlankSegmentStr.equalsIgnoreCase("true");

        String skipHeaderStr = processOptions.get(OPTION_SKIP_HEADER);
        skipHeader = "true".equalsIgnoreCase(skipHeaderStr);

        String autoFillInPluralStatementStr = processOptions.get(OPTION_AUTO_FILL_IN_PLURAL_STATEMENT);
        autoFillInPluralStatement = "true".equalsIgnoreCase(autoFillInPluralStatementStr);

        String formatMonolingualStr = processOptions.get(OPTION_FORMAT_MONOLINGUAL);
        formatMonolingual = "true".equalsIgnoreCase(formatMonolingualStr);

        inEncodingLastParsedFile = fc.getInEncoding();
        BufferedReader reader = createReader(inFile, inEncodingLastParsedFile);
        try {
            BufferedWriter writer;

            if (outFile != null) {
                writer = createWriter(outFile, fc.getOutEncoding());
            } else {
                writer = null;
            }

            try {
                processFile(reader, writer, fc);
            } finally {
                if (writer != null) {
                    writer.close();
                }
            }
        } finally {
            reader.close();
        }
    }

    @Override
    protected void alignFile(BufferedReader sourceFile, BufferedReader translatedFile, FilterContext fc) throws Exception {
        // BOM (byte order mark) bugfix
        translatedFile.mark(1);
        int ch = translatedFile.read();
        if (ch != 0xFEFF) {
            translatedFile.reset();
        }
        this.out = null;
        processPoFile(translatedFile, fc);
    }

    @Override
    public void processFile(BufferedReader in, BufferedWriter out, FilterContext fc) throws IOException {
        // BOM (byte order mark) bugfix
        in.mark(1);
        int ch = in.read();
        if (ch != 0xFEFF) {
            in.reset();
        }
        this.out = out;
        processPoFile(in, fc);
    }

    private void processPoFile(BufferedReader in, FilterContext fc) throws IOException {
        fuzzy = false;
        fuzzyTrue = false;
        nowrap = false;
        MODE currentMode = null;
        int currentPlural = 0;

        sources = new StringBuilder[2];
        sources[0] = new StringBuilder();
        sources[1] = new StringBuilder();
        // can be overridden when header has been read and the number of plurals is different.
        targets = new StringBuilder[2];
        targets[0] = new StringBuilder();
        targets[1] = new StringBuilder();

        translatorComments = new StringBuilder();
        extractedComments = new StringBuilder();
        references = new StringBuilder();
        sourceFuzzyTrue = new StringBuilder();
        path = "";

        String s;
        while ((s = in.readLine()) != null) {

            // We trim trailing spaces, otherwise the regexps could fail, thus making some segments
            // invisible to OmegaT
            s = s.trim();

            // We have a real fuzzy
            Matcher mTrueFuzzy = COMMENT_FUZZY_MSGID.matcher(s);
            if (mTrueFuzzy.matches()) {
                fuzzyTrue = true;
                sourceFuzzyTrue.append(mTrueFuzzy.group(1));
                continue;
            }

            /*
             * Removing the fuzzy markers, as it has no meanings after being processed by omegat
             */
            if (COMMENT_FUZZY.matcher(s).matches()) {
                currentPlural = 0;
                fuzzy = true;
                flushTranslation(currentMode, fc);
                continue;
            } else if (COMMENT_FUZZY_OTHER.matcher(s).matches()) {
                currentPlural = 0;
                fuzzy = true;
                flushTranslation(currentMode, fc);
                s = s.replaceAll("(.*), fuzzy(.*)", "$1$2");
            }

            // FSM for po files
            if (COMMENT_NOWRAP.matcher(s).matches()) {
                currentPlural = 0;
                flushTranslation(currentMode, fc);
                /*
                 * Read the no-wrap comment, indicating that the creator of the po-file did not want long
                 * messages to be wrapped on multiple lines. See 5.6.2 no-wrap of http://docs.oasis-open
                 * .org/xliff/v1.2/xliff-profile-po/xliff -profile-po-1.2-cd02.html for an example.
                 */
                nowrap = true;
                eol(s);
                continue;
            }

            Matcher mId = MSG_ID.matcher(s);
            if (mId.matches()) { // msg_id(_plural)
                currentPlural = 0;
                String text = mId.group(2);
                if (mId.group(1) == null) {
                    // non-plural ID ('msg_id')
                    // we can start a new translation. Flush current translation.
                    // This has not happened when no empty lines are in between 'segments'.
                    if (sources[0].length() > 0) {
                        flushTranslation(currentMode, fc);
                    }
                    currentMode = MODE.MSGID;
                    sources[0].append(text);
                } else {
                    // plural ID ('msg_id_plural')
                    currentMode = MODE.MSGID_PLURAL;
                    sources[1].append(text);
                }
                eol(s);

                continue;
            }

            Matcher mStr = MSG_STR.matcher(s);
            if (mStr.matches()) {

                // Hack to be able to translate empty segments
                // If the source segment is empty and there is a reference then
                // it copies the reference of the segment and the localization note into the source segment
                if (allowEditingBlankSegment && sources[0].length() == 0 && references.length() > 0) {
                    String aux = references.toString() + extractedComments.toString();
                    sources[0].append(aux);
                }

                String text = mStr.group(3);
                if (mStr.group(1) == null) {
                    // non-plural lines
                    currentMode = MODE.MSGSTR;
                    targets[0].append(text);
                    currentPlural = 0;
                } else {
                    currentMode = MODE.MSGSTR_PLURAL;
                    // plurals, i.e. msgstr[N] lines
                    currentPlural = Integer.parseInt(mStr.group(2));
                    if (currentPlural < plurals) {
                        targets[currentPlural].append(text);
                    }
                }
                continue;
            }

            Matcher mCtx = MSG_CTX.matcher(s);
            if (mCtx.matches()) {
                currentMode = MODE.MSGCTX;
                currentPlural = 0;
                path = mCtx.group(1);
                eol(s);

                continue;
            }

            Matcher mReference = COMMENT_REFERENCE.matcher(s);
            if (mReference.matches()) {
                currentPlural = 0;
                references.append(mReference.group(1));
                references.append("\n");
                eol(s);

                continue;
            }
            Matcher mExtracted = COMMENT_EXTRACTED.matcher(s);
            if (mExtracted.matches()) {
                currentPlural = 0;
                extractedComments.append(mExtracted.group(1));
                extractedComments.append("\n");
                eol(s);

                continue;
            }
            Matcher mTranslator = COMMENT_TRANSLATOR.matcher(s);
            if (mTranslator.matches()) {
                currentPlural = 0;
                translatorComments.append(mTranslator.group(1));
                translatorComments.append("\n");
                eol(s);

                continue;
            }

            // True fuzzy
            Matcher mMsgFuzzy = MSG_FUZZY.matcher(s);
            if (mMsgFuzzy.matches()) {
                sourceFuzzyTrue.append(mMsgFuzzy.group(1));
                continue;
            }

            Matcher mOther = MSG_OTHER.matcher(s);
            if (mOther.matches()) {
                String text = mOther.group(1);
                if (currentMode == null) {
                    throw new IOException(OStrings.getString("POFILTER_INVALID_FORMAT"));
                }
                switch (currentMode) {
                case MSGID:
                    sources[0].append(text);
                    eol(s);
                    break;
                case MSGID_PLURAL:
                    sources[1].append(text);
                    eol(s);
                    break;
                case MSGSTR:
                    targets[0].append(text);
                    break;
                case MSGSTR_PLURAL:
                    targets[currentPlural].append(text);
                    break;
                case MSGCTX:
                    eol(s);
                    break;
                }
                continue;
            }

            flushTranslation(currentMode, fc);
            eol(s);
        }
        flushTranslation(currentMode, fc);
    }

    protected void eol(String s) throws IOException {
        if (out != null) {
            out.write(s);
            out.write('\n');
        }
    }

    protected void align(int pair) {
        String pathSuffix;
        String s;
        String c = "";
        if (pair > 0) {
            s = unescape(sources[1].toString());
            pathSuffix = "[" + pair + "]";
            c += StringUtil.format(OStrings.getString("POFILTER_PLURAL_FORM_COMMENT"), pair) + "\n";
        } else {
            s = unescape(sources[pair].toString());
            pathSuffix = "";
        }
        String t = unescape(targets[pair].toString());

        if (translatorComments.length() > 0) {
            c += OStrings.getString("POFILTER_TRANSLATOR_COMMENTS") + "\n"
                    + unescape(translatorComments.toString() + "\n");
        }
        if (extractedComments.length() > 0) {
            c += OStrings.getString("POFILTER_EXTRACTED_COMMENTS") + "\n"
                    + unescape(extractedComments.toString() + "\n");
        }
        if (references.length() > 0) {
            c += OStrings.getString("POFILTER_REFERENCES") + "\n" + unescape(references.toString() + "\n");
        }
        if (c.length() == 0) {
            c = null;
        }
        align(s, t, c, pathSuffix);
    }

    /**
     *
     * @param source
     * @param translation
     * @param comments
     * @param pathSuffix
     *            suffix for path to distinguish plural forms. It will be empty for first one, and [1],[2],...
     *            for next
     */
    protected void align(String source, String translation, String comments, String pathSuffix) {
        if (translation.isEmpty()) {
            translation = null;
        }
        if (entryParseCallback != null) {
            if (formatMonolingual) {
                List<ProtectedPart> protectedParts = TagUtil.applyCustomProtectedParts(translation,
                        PatternConsts.PRINTF_VARS, null);
                entryParseCallback.addEntry(source, translation, null, fuzzy, comments, path + pathSuffix,
                        this, protectedParts);
            } else {
                List<ProtectedPart> protectedParts = TagUtil.applyCustomProtectedParts(source,
                        PatternConsts.PRINTF_VARS, null);
                entryParseCallback.addEntry(null, source, translation, fuzzy, comments, path + pathSuffix, this,
                        protectedParts);
                if (fuzzyTrue) { // We add a reference entry
                    String[] props = { SegmentProperties.COMMENT, comments, SegmentProperties.REFERENCE, "true" };
                    entryParseCallback.addEntryWithProperties(null, sourceFuzzyTrue.toString(), translation, false,
                            props, path + pathSuffix, this, null);
                    fuzzyTrue = false;
                }
            }
        } else if (entryAlignCallback != null) {
            entryAlignCallback.addTranslation(null, source, translation, fuzzy, path + pathSuffix, this);
        }
    }

    protected void alignHeader(String header, FilterContext fc) {
        if (entryParseCallback != null && !skipHeader) {
            header = unescape(autoFillInPluralStatement(header, fc));
            List<ProtectedPart> protectedParts = TagUtil.applyCustomProtectedParts(header,
                    PatternConsts.PRINTF_VARS, null);
            entryParseCallback.addEntry(null, header, null, false, null, path, this, protectedParts);
        }
    }

    protected void flushTranslation(MODE currentMode, FilterContext fc) throws IOException {
        if (sources[0].length() == 0 && path.isEmpty()) {
            if (targets[0].length() == 0) {
                // there is no text to translate yet
                return;
            } else {
                // header

                // check existing plural statement. If it contains the number of plurals, then use it!
                StringBuilder targets0 = targets[0];
                String header = targets[0].toString();
                Matcher pluralMatcher = PLURAL_FORMS.matcher(header);
                if (pluralMatcher.find()) {
                    String nrOfPluralsString = header.substring(pluralMatcher.start(1), pluralMatcher.end(1));
                    plurals = Integer.parseInt(nrOfPluralsString);
                } else {
                    //else use predefined number of plurals, if it exists
                    Language targetLang = fc.getTargetLang();
                    String lang = targetLang.getLanguageCode().toLowerCase();
                    PluralInfo pluralInfo = PLURAL_INFOS.get(lang);
                    if (pluralInfo != null) {
                        plurals = pluralInfo.plurals;
                    }
                }
                //update the number of targets according to new plural number
                targets = new StringBuilder[plurals];
                targets[0] = targets0;
                for (int i = 1; i < plurals; i++) {
                    targets[i] = new StringBuilder();
                }

                if (out != null) {
                    // Header is always written
                    out.write("msgstr " + getTranslation(null, targets[0], false, true, fc, 0) + "\n");
                } else {
                    alignHeader(targets[0].toString(), fc);
                }
            }
            fuzzy = false;
        } else {
            // source exist
            if (sources[1].length() == 0) {
                // non-plurals
                if (out != null) {
                    if (formatMonolingual) {
                        out.write("msgstr "
                                + getTranslation(sources[0].toString(), targets[0], allowBlank, false, fc, 0)
                                + "\n");
                    } else {
                        out.write("msgstr " + getTranslation(null, sources[0], allowBlank, false, fc, 0)
                                + "\n");
                    }
                } else {
                    align(0);
                }
            } else {
                // plurals
                if (out != null) {
                    out.write("msgstr[0] " + getTranslation(null, sources[0], allowBlank, false, fc, 0) + "\n");
                    for (int i = 1; i < plurals; i++) {
                        out.write("msgstr[" + i + "] " + getTranslation(null, sources[1], allowBlank, false, fc, i)
                                + "\n");
                    }
                } else {
                    align(0);
                    for (int i = 1; i < plurals; i++) {
                        align(i);
                    }
                }
            }
            fuzzy = false;
        }
        sources[0].setLength(0);
        sources[1].setLength(0);
        for (int i = 0; i < plurals; i++) {
            targets[i].setLength(0);
        }
        path = "";
        translatorComments.setLength(0);
        extractedComments.setLength(0);
        references.setLength(0);
        sourceFuzzyTrue.setLength(0);
    }

    protected static final Pattern R1 = Pattern.compile("(?<!\\\\)((\\\\\\\\)*)\\\\\"");
    protected static final Pattern R2 = Pattern.compile("(?<!\\\\)((\\\\\\\\)*)\\\\n");
    protected static final Pattern R3 = Pattern.compile("(?<!\\\\)((\\\\\\\\)*)\\\\t");
    protected static final Pattern R4 = Pattern.compile("^\\\\n");

    /**
     * Private processEntry to do pre- and postprocessing.<br>
     * The given entry is interpreted to a string (e.g. escaped quotes are unescaped, '\n' is translated into newline
     * character, '\t' into tab character.) then translated and then returned as a PO-string-notation (e.g. double
     * quotes escaped, newline characters represented as '\n' and surrounded by double quotes, possibly split up over
     * multiple lines)<Br>
     * Long translations are not split up over multiple lines as some PO editors do, but when there are newline
     * characters in a translation, it is split up at the newline markers.<Br>
     * If the nowrap parameter is true, a translation that exists of multiple lines starts with an empty string-line to
     * left-align all lines. [With nowrap set to true, long lines are also never wrapped (except for at newline
     * characters), but that was already not done without nowrap.] [ 1869069 ] Escape support for PO
     *
     * @param en
     *            The entire source text
     * @param allowNull
     *            Allow to output a blank translation in msgstr
     * @param isHeader
     *            is the given string the PO-header string?
     * @param fc
     *            The FilterContext, for targetLanguage
     * @param plural
     *            if the source text is a plural, which plural number / variant are we on? 0 = no plural, 1.. are the
     *            plurals for the given target language.
     * @return The translated entry, within double quotes on each line (thus ready to be printed to target file
     *         immediately)
     **/
    private String getTranslation(String id, StringBuilder en, boolean allowNull, boolean isHeader, FilterContext fc,
            int plural) {
        String entry = unescape(en.toString());

        String pathSuffix;
        if (plural > 0) {
            pathSuffix = "[" + plural + "]";
        } else {
            pathSuffix = "";
        }

        // Do real translation
        String translation = null;
        if (isHeader) {
            entry = autoFillInPluralStatement(entry, fc);
        }
        if (isHeader && skipHeader) {
            translation = entry;
        } else {
            translation = entryTranslateCallback.getTranslation(id, entry, path + pathSuffix);
        }

        if (translation == null && !allowNull) { // We write the source in translation
            translation = entry;
        }

        if (translation != null) {
            return "\"" + escape(translation) + "\"";
        } else {
            return "\"\"";
        }
    }

    /**
     * Replaces Plural-Forms: nplurals=INTEGER; plural=EXPRESSION; when selected
     * @param header The header text that contains the Plural-forms line.
     * @return Header with the correct plural forms line according to target language.
     */
    private String autoFillInPluralStatement(String header, FilterContext fc) {
        if (autoFillInPluralStatement) {
            Language targetLang = fc.getTargetLang();
            String lang = targetLang.getLanguageCode().toLowerCase();
            PluralInfo pluralInfo = PLURAL_INFOS.get(lang);
            if (pluralInfo != null) {
                return header.replaceAll("Plural-Forms: nplurals=INTEGER; plural=EXPRESSION;",
                        "Plural-Forms: nplurals=" + pluralInfo.plurals + "; plural=" + pluralInfo.expression + ";");
            }
        }
        return header;
    }

    /**
     * Unescape text from .po format.
     */
    private String unescape(String entry) {
        // Removes escapes from quotes. ( \" becomes " unless the \
        // was escaped itself.) The number of preceding slashes before \"
        // should not be odd, else the \ is escaped and not part of \".
        // The regex is: no backslash before an optional even number
        // of backslashes before \". Replace only the \" with " and keep the
        // other escaped backslashes )
        entry = R1.matcher(entry).replaceAll("$1\"");
        // Interprets newline sequence, except when preceded by \
        // \n becomes Linefeed, unless the \ was escaped itself.
        // The number of preceding slashes before \n should not be odd,
        // else the \ is escaped and not part of \n.
        // The regex is: no backslash before an optional even number of
        // backslashes before \n. Replace only the \n with <newline> and
        // keep
        // the other escaped backslashes.
        entry = R2.matcher(entry).replaceAll("$1\n");
        // same for \t, the tab character
        entry = R3.matcher(entry).replaceAll("$1\t");
        // Interprets newline sequence at the beginning of a line
        entry = R4.matcher(entry).replaceAll("\\\n");
        // Removes escape from backslash
        entry = entry.replace("\\\\", "\\");

        return entry;
    }

    /**
     * Escape text to .po format.
     */
    private String escape(String translation) {
        // Escapes backslash
        translation = translation.replace("\\", "\\\\");
        // Adds escapes to quotes. ( " becomes \" )
        translation = translation.replace("\"", "\\\"");

        /*
         * Normally, long lines are wrapped at 'output page width', which defaults to ?76?, and always at
         * newlines. IF the no-wrap indicator is present, long lines should not be wrapped, except on newline
         * characters, in which case the first line should be empty, so that the different lines are aligned
         * the same. OmegaT < 2.0 has never wrapped any line, and it is quite useless when the po-file is not
         * edited with a plain-text-editor. But it is simple to wrap at least at newline characters (which is
         * necessary for the translation of the po-header anyway) We can also honor the no-wrap instruction at
         * least by letting the first line of a multi-line translation not be on the same line as 'msgstr'.
         */
        // Interprets newline chars. 'blah<br>blah' becomes
        // 'blah\n"<br>"blah'
        translation = translation.replace("\n", "\\n\"\n\"");
        // don't make empty new line at the end (in case the last 'blah' is
        // empty string)
        if (translation.endsWith("\"\n\"")) {
            translation = translation.substring(0, translation.length() - 3);
        }
        if (nowrap && translation.contains("\n")) {
            // start with empty string, to align all lines of translation
            translation = "\"\n\"" + translation;
        }
        // Interprets tab chars. 'blah<tab>blah' becomes 'blah\tblah'
        // (<tab> representing the tab character '\u0009')
        translation = translation.replace("\t", "\\t");

        return translation;
    }

    @Override
    public Map<String, String> changeOptions(Window parent, Map<String, String> config) {
        try {
            PoOptionsDialog dialog = new PoOptionsDialog(parent, config);
            dialog.setVisible(true);
            if (PoOptionsDialog.RET_OK == dialog.getReturnStatus()) {
                return dialog.getOptions();
            } else {
                return null;
            }
        } catch (Exception e) {
            Log.log(OStrings.getString("POFILTER_EXCEPTION"));
            Log.log(e);
            return null;
        }
    }

    /**
     * Returns true to indicate that Text filter has options.
     *
     * @return True, because the PO filter has options.
     */
    @Override
    public boolean hasOptions() {
        return true;
    }

    @Override
    public boolean isBilingual() {
        return true;
    }
}