ILIASFilter.java example

Explorer
OmegaT-master
/**************************************************************************
 OmegaT - Computer Assisted Translation (CAT) tool
          with fuzzy matching, translation memory, keyword search,
          glossaries, and translation leveraging into updated projects.

 Copyright (C) 2000-2006 Keith Godfrey and Maxym Mykhalchuk
               2011-2014 Michael Zakharov
               Home page: http://www.omegat.org/
               Support center: http://groups.yahoo.com/group/OmegaT/

 This file is part of OmegaT.

 OmegaT is free software: you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
 the Free Software Foundation, either version 3 of the License, or
 (at your option) any later version.

 OmegaT is distributed in the hope that it will be useful,
 but WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 GNU General Public License for more details.

 You should have received a copy of the GNU General Public License
 along with this program.  If not, see <http://www.gnu.org/licenses/>.
 **************************************************************************/

package org.omegat.filters2.text.ilias;

import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.omegat.filters2.AbstractFilter;
import org.omegat.filters2.FilterContext;
import org.omegat.filters2.Instance;
import org.omegat.util.LinebreakPreservingReader;
import org.omegat.util.NullBufferedWriter;
import org.omegat.util.OStrings;
import org.omegat.util.StringUtil;

/**
 * Filter to support language files for ILIAS. The files are a kind of UTF8
 * encoded text where the lines look like module_name#:#identifier#:#string to
 * translate where neither module_name, nor identifier can be modified but must
 * be copied into the translated version as they are including #:# separators.
 * The file contains a header that should be copied into the translated version.
 * The translated stings should not contain any \n\r symbols but may include
 * simple HTML entities such as <p> ... </p> and <br />
 *
 * @see <a href=
 *      "http://www.ilias.de/docu/ilias.php?ref_id=37&from_page=129&obj_id=133&obj_type=PageObject&cmd=layout&cmdClass=illmpresentationgui&cmdNode=ih&baseClass=ilLMPresentationGUI">
 *      docs</a>
 *
 * @author Michael Zakharov <trapman.hunt@gmail.com>
 */
public class ILIASFilter extends AbstractFilter {
    protected Map<String, String> align;

    private final Pattern patternMark = Pattern.compile("<!-- language file start -->");
    private final Pattern patternText = Pattern.compile("^(\\S+)#:#(\\S+)#:#(.+)$");

    @Override
    public String getFileFormatName() {
        return OStrings.getString("ILIASFILTER_FILTER_NAME");
    }

    @Override
    public boolean isSourceEncodingVariable() {
        return false;
    }

    @Override
    public boolean isTargetEncodingVariable() {
        return false;
    }

    @Override
    public Instance[] getDefaultInstances() {
        String targetFile = "ilias_" + AbstractFilter.TFP_TARGET_LANG_CODE + "." + TFP_EXTENSION;
        return new Instance[] { new Instance
                ("*.lang", null, "UTF-8", targetFile), new Instance("*.lang.local", null, "UTF-8", targetFile),};
    }

    /**
     * Doing the processing of the file...
     * @param reader
     * @param outfile
     */
    @Override
    public void processFile(BufferedReader reader, BufferedWriter outfile, FilterContext fc) throws IOException {
        LinebreakPreservingReader lbpr = new LinebreakPreservingReader(reader); // fix
                                                                                // for
                                                                                // bug
                                                                                // 1462566
        String line;
        /*
         * ILIAS strings look like module_name#:#identifier#:#string to translate
         * The file usually begins from some text that does not match the pattern
         */

        while ((line = lbpr.readLine()) != null) {

            String trimmed = line.trim();

            // skipping empty strings
            if (trimmed.isEmpty()) {
                outfile.write(line + lbpr.getLinebreak());
                continue;
            }

            Matcher mat = patternText.matcher(line);
            if (!mat.matches()) {
                outfile.write(line + lbpr.getLinebreak());
                continue;
            }
            String key = mat.group(1) + "#:#" + mat.group(2);
            String value = mat.group(3);

            if(value.isEmpty()) { // If original text is empty, the translated is empty too
                outfile.write(line + lbpr.getLinebreak());
                continue;
            }

            // writing out: "module_name#:#identifier#:#"
            outfile.write(key + "#:#");

            String trans = process(key, value);

            outfile.write(trans); // Translation
            outfile.write(lbpr.getLinebreak());
        }
        lbpr.close();
    }

    @Override
    protected boolean isFileSupported(BufferedReader reader) {
        boolean markFound = false;
        boolean textFound = false;
        final int MAX_LINES_TO_CHECK = 128;

        LinebreakPreservingReader lbpr = new LinebreakPreservingReader(reader);
        try {
            String line;
            int more = MAX_LINES_TO_CHECK + 1;
            while ((line = lbpr.readLine()) != null && --more > 0) {
                line = line.trim();
                if (line.isEmpty()) {
                    continue;
                }
                markFound = patternMark.matcher(line).matches();
                if (markFound) {
                    break;
                }
                textFound = patternText.matcher(line).matches();
            }
        } catch (IOException e) {
            return false;
        } finally {
            try {
                lbpr.close();
            } catch (IOException e) {
                // Ignore
            }
        }
        return markFound & !textFound;
    }

    @Override
    protected void alignFile(BufferedReader sourceFile, BufferedReader translatedFile, org.omegat.filters2.FilterContext fc) throws Exception {
        Map<String, String> source = new HashMap<String, String>();
        Map<String, String> translated = new HashMap<String, String>();

        align = source;
        processFile(sourceFile, new NullBufferedWriter(), fc);
        align = translated;
        processFile(translatedFile, new NullBufferedWriter(), fc);
        for (Map.Entry<String, String> en : source.entrySet()) {
            String tr = translated.get(en.getKey());
            if (!StringUtil.isEmpty(tr)) {
                entryAlignCallback.addTranslation(en.getKey(), en.getValue(), tr, false, null, this);
            }
        }
    }

    /**
     *
     * @param key
     * @param value
     * @return
     */
    private String process(String key, String value) {
        if (entryParseCallback != null) {
            entryParseCallback.addEntry(key, value, null, false, null, this);
            return value;
        } else if (entryTranslateCallback != null) {
            String trans = entryTranslateCallback.getTranslation(key, value);
            return trans != null ? trans : value;
        } else if (entryAlignCallback != null) {
            align.put(key, value);
        }
        return value;
    }

}