/**************************************************************************
OmegaT - Computer Assisted Translation (CAT) tool
with fuzzy matching, translation memory, keyword search,
glossaries, and translation leveraging into updated projects.
Copyright (C) 2000-2006 Keith Godfrey and Maxym Mykhalchuk
2011-2014 Michael Zakharov
Home page: http://www.omegat.org/
Support center: http://groups.yahoo.com/group/OmegaT/
This file is part of OmegaT.
OmegaT is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
OmegaT is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
**************************************************************************/
package org.omegat.filters2.text.ilias;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.omegat.filters2.AbstractFilter;
import org.omegat.filters2.FilterContext;
import org.omegat.filters2.Instance;
import org.omegat.util.LinebreakPreservingReader;
import org.omegat.util.NullBufferedWriter;
import org.omegat.util.OStrings;
import org.omegat.util.StringUtil;
/**
* Filter to support language files for ILIAS. The files are a kind of UTF8
* encoded text where the lines look like module_name#:#identifier#:#string to
* translate where neither module_name, nor identifier can be modified but must
* be copied into the translated version as they are including #:# separators.
* The file contains a header that should be copied into the translated version.
* The translated stings should not contain any \n\r symbols but may include
* simple HTML entities such as <p> ... </p> and <br />
*
* @see <a href=
* "http://www.ilias.de/docu/ilias.php?ref_id=37&from_page=129&obj_id=133&obj_type=PageObject&cmd=layout&cmdClass=illmpresentationgui&cmdNode=ih&baseClass=ilLMPresentationGUI">
* docs</a>
*
* @author Michael Zakharov <trapman.hunt@gmail.com>
*/
public class ILIASFilter extends AbstractFilter {
protected Map<String, String> align;
private final Pattern patternMark = Pattern.compile("<!-- language file start -->");
private final Pattern patternText = Pattern.compile("^(\\S+)#:#(\\S+)#:#(.+)$");
@Override
public String getFileFormatName() {
return OStrings.getString("ILIASFILTER_FILTER_NAME");
}
@Override
public boolean isSourceEncodingVariable() {
return false;
}
@Override
public boolean isTargetEncodingVariable() {
return false;
}
@Override
public Instance[] getDefaultInstances() {
String targetFile = "ilias_" + AbstractFilter.TFP_TARGET_LANG_CODE + "." + TFP_EXTENSION;
return new Instance[] { new Instance
("*.lang", null, "UTF-8", targetFile), new Instance("*.lang.local", null, "UTF-8", targetFile),};
}
/**
* Doing the processing of the file...
* @param reader
* @param outfile
*/
@Override
public void processFile(BufferedReader reader, BufferedWriter outfile, FilterContext fc) throws IOException {
LinebreakPreservingReader lbpr = new LinebreakPreservingReader(reader); // fix
// for
// bug
// 1462566
String line;
/*
* ILIAS strings look like module_name#:#identifier#:#string to translate
* The file usually begins from some text that does not match the pattern
*/
while ((line = lbpr.readLine()) != null) {
String trimmed = line.trim();
// skipping empty strings
if (trimmed.isEmpty()) {
outfile.write(line + lbpr.getLinebreak());
continue;
}
Matcher mat = patternText.matcher(line);
if (!mat.matches()) {
outfile.write(line + lbpr.getLinebreak());
continue;
}
String key = mat.group(1) + "#:#" + mat.group(2);
String value = mat.group(3);
if(value.isEmpty()) { // If original text is empty, the translated is empty too
outfile.write(line + lbpr.getLinebreak());
continue;
}
// writing out: "module_name#:#identifier#:#"
outfile.write(key + "#:#");
String trans = process(key, value);
outfile.write(trans); // Translation
outfile.write(lbpr.getLinebreak());
}
lbpr.close();
}
@Override
protected boolean isFileSupported(BufferedReader reader) {
boolean markFound = false;
boolean textFound = false;
final int MAX_LINES_TO_CHECK = 128;
LinebreakPreservingReader lbpr = new LinebreakPreservingReader(reader);
try {
String line;
int more = MAX_LINES_TO_CHECK + 1;
while ((line = lbpr.readLine()) != null && --more > 0) {
line = line.trim();
if (line.isEmpty()) {
continue;
}
markFound = patternMark.matcher(line).matches();
if (markFound) {
break;
}
textFound = patternText.matcher(line).matches();
}
} catch (IOException e) {
return false;
} finally {
try {
lbpr.close();
} catch (IOException e) {
// Ignore
}
}
return markFound & !textFound;
}
@Override
protected void alignFile(BufferedReader sourceFile, BufferedReader translatedFile, org.omegat.filters2.FilterContext fc) throws Exception {
Map<String, String> source = new HashMap<String, String>();
Map<String, String> translated = new HashMap<String, String>();
align = source;
processFile(sourceFile, new NullBufferedWriter(), fc);
align = translated;
processFile(translatedFile, new NullBufferedWriter(), fc);
for (Map.Entry<String, String> en : source.entrySet()) {
String tr = translated.get(en.getKey());
if (!StringUtil.isEmpty(tr)) {
entryAlignCallback.addTranslation(en.getKey(), en.getValue(), tr, false, null, this);
}
}
}
/**
*
* @param key
* @param value
* @return
*/
private String process(String key, String value) {
if (entryParseCallback != null) {
entryParseCallback.addEntry(key, value, null, false, null, this);
return value;
} else if (entryTranslateCallback != null) {
String trans = entryTranslateCallback.getTranslation(key, value);
return trans != null ? trans : value;
} else if (entryAlignCallback != null) {
align.put(key, value);
}
return value;
}
}