/**************************************************************************
OmegaT - Computer Assisted Translation (CAT) tool
with fuzzy matching, translation memory, keyword search,
glossaries, and translation leveraging into updated projects.
Copyright (C) 2000-2006 Keith Godfrey and Maxym Mykhalchuk
2009 Alex Buloichik
2011 Martin Fleurke
2013-2014 Enrique Estevez, Didier Briel
2015 Aaron Madlon-Kay, Enrique Estevez
2016 Aaron Madlon-Kay
Home page: http://www.omegat.org/
Support center: http://groups.yahoo.com/group/OmegaT/
This file is part of OmegaT.
OmegaT is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
OmegaT is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
**************************************************************************/
package org.omegat.filters2.text.bundles;
import java.awt.Window;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.io.UnsupportedEncodingException;
import java.nio.charset.Charset;
import java.nio.charset.CharsetEncoder;
import java.nio.charset.StandardCharsets;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.omegat.core.data.ProtectedPart;
import org.omegat.filters2.AbstractFilter;
import org.omegat.filters2.FilterContext;
import org.omegat.filters2.Instance;
import org.omegat.filters2.TranslationException;
import org.omegat.util.LinebreakPreservingReader;
import org.omegat.util.Log;
import org.omegat.util.NullBufferedWriter;
import org.omegat.util.OStrings;
import org.omegat.util.PatternConsts;
import org.omegat.util.StringUtil;
import org.omegat.util.TagUtil;
/**
* Filter to support Java Resource Bundles - the files that are used to I18ze
* Java applications.
*
* @author Maxym Mykhalchuk
* @author Keith Godfrey
* @author Alex Buloichik (alex73mail@gmail.com)
* @author Martin Fleurke
* @author Enrique Estevez (keko.gl@gmail.com)
* @author Didier Briel
* @author Aaron Madlon-Kay
*
* Option to remove untranslated segments in the target files
* Code adapted from the file: MozillaDTDFilter.java
* Support for encoding outside the ASCII encoding. The management depends of the user.
* The user have to choose the encoding of the file, source and target.
* The default is ASCII, which corresponds to the standard behaviour: in that case, any character above 127 is encoded
* according to the specifications of the bundle files. If another character set is chosen, no encoding takes place
* and it's up to the user to select a charset compatible with the characters used.
* "auto" for the target encoding is considered as being ASCII.
*
* Support for the comments into the Comments panel (localization notes).
* Optionally can leave Unicode literals (\\uXXXX) unescaped.
*/
public class ResourceBundleFilter extends AbstractFilter {
/**
* Key=value pairs with a preceding comment containing this string are not
* translated, and are output verbatim.
* <p>
* TODO: Make this optional
*/
public static final String DO_NOT_TRANSLATE_COMMENT = "NOI18N";
public static final String OPTION_REMOVE_STRINGS_UNTRANSLATED = "unremoveStringsUntranslated";
public static final String OPTION_DONT_UNESCAPE_U_LITERALS = "dontUnescapeULiterals";
public static final String DEFAULT_TARGET_ENCODING = StandardCharsets.US_ASCII.name();
protected Map<String, String> align;
private String targetEncoding = DEFAULT_TARGET_ENCODING;
/**
* If true, will remove non-translated segments in the target files
*/
private boolean removeStringsUntranslated = false;
/**
* If true, will not convert characters into \\uXXXX notation
*/
private boolean dontUnescapeULiterals = false;
@Override
public String getFileFormatName() {
return OStrings.getString("RBFILTER_FILTER_NAME");
}
/**
*
* @return true, because it is possible to change source encoding
*/
@Override
public boolean isSourceEncodingVariable() {
return true;
}
/**
*
* @return true, because it is possible to change target encoding
*/
@Override
public boolean isTargetEncodingVariable() {
return true;
}
/**
* The default encoding is OConsts.ASCII
*/
@Override
public Instance[] getDefaultInstances() {
return new Instance[] { new Instance("*.properties", StandardCharsets.US_ASCII.name(),
StandardCharsets.US_ASCII.name(), TFP_NAMEONLY + "_"
+ TFP_TARGET_LOCALE + "." + TFP_EXTENSION) };
}
/**
* Creating an output stream to save a localized resource bundle.
* <p>
* NOTE: the name of localized resource bundle is different from the name of
* original one. e.g. "Bundle.properties" -> Russian =
* "Bundle_ru.properties"
*/
@Override
public BufferedWriter createWriter(File outfile, String encoding) throws UnsupportedEncodingException,
IOException {
if (encoding != null) {
targetEncoding = encoding;
}
return new BufferedWriter(new OutputStreamWriter(new FileOutputStream(outfile), targetEncoding));
}
@Override
protected String getOutputEncoding(FilterContext fc) {
String encoding = fc.getOutEncoding();
// Use default if the user didn't specify anything ("<auto>")
return encoding == null ? DEFAULT_TARGET_ENCODING : encoding;
}
/**
* Processes an input line for use in OmegaT by doing the following:
* <ul>
* <li>Converts ASCII-encoded \\uxxxx chars to normal characters.
* <li>Converts \r, \n and \t to CR, line feed and tab.
* <li>But! Keeps a backspace in '\ ', '\=', '\:', etc. (non-trimmable space
* or non-key-value-breaking equals).
* <ul>
*/
protected String normalizeInputLine(String line) throws IOException, TranslationException {
// Whitespace at the beginning of lines is ignored
boolean strippingWhitespace = true;
StringBuilder result = new StringBuilder(line.length());
for (int cp, len = line.length(), i = 0; i < len; i += Character.charCount(cp)) {
cp = line.codePointAt(i);
if (strippingWhitespace && (strippingWhitespace = Character.isWhitespace(cp))) {
continue;
}
if (cp == '\\' && line.codePointCount(i, len) > 1) {
i += Character.charCount(cp);
cp = line.codePointAt(i);
if (cp != 'u') {
if (cp == 'n') {
cp = '\n';
} else if (cp == 'r') {
cp = '\r';
} else if (cp == 't') {
cp = '\t';
} else {
result.append('\\');
}
} else if (dontUnescapeULiterals) {
// Put back the \ we swallowed
result.append('\\');
} else {
// checking if the string is long enough
if (line.codePointCount(i, len) < 1 + 4) {
throw new TranslationException(OStrings.getString("RBFH_ERROR_ILLEGAL_U_SEQUENCE"));
}
int uStart = line.offsetByCodePoints(i, 1);
int uEnd = line.offsetByCodePoints(uStart, 4);
String uStr = line.substring(uStart, uEnd);
try {
cp = Integer.parseInt(uStr, 16);
if (!Character.isValidCodePoint(cp)) {
throw new TranslationException(OStrings.getString("RBFH_ERROR_ILLEGAL_U_SEQUENCE"));
}
i = uEnd - Character.charCount(cp);
} catch (NumberFormatException ex) {
throw new TranslationException(OStrings.getString("RBFH_ERROR_ILLEGAL_U_SEQUENCE"), ex);
}
}
}
result.appendCodePoint(cp);
}
return result.toString();
}
private enum EscapeMode {
KEY, VALUE, COMMENT
}
/**
* Converts normal strings to ascii-encoded ones.
*
* @param text
* Text to convert.
* @param key
* Whether it's a key of the key-value pair (' ', ':', '=' MUST
* be escaped in a key and MAY be escaped in value, but we don't
* escape these).
* @param encodingAscii
* If false, keep the text in the source encoding (if assume what
* it is UTF-8, what is the another supported encoding)
*/
private String toAscii(String text, EscapeMode mode) {
CharsetEncoder charsetEncoder = Charset.forName(targetEncoding).newEncoder();
StringBuilder result = new StringBuilder();
for (int cp, len = text.length(), i = 0; i < len; i += Character.charCount(cp)) {
cp = text.codePointAt(i);
if (mode != EscapeMode.COMMENT && cp == '\\') {
if (dontUnescapeULiterals && containsUEscapeAt(text, i)) {
result.append("\\");
} else {
result.append("\\\\");
}
} else if (cp == '\n') {
result.append("\\n");
} else if (cp == '\r') {
result.append("\\r");
} else if (cp == '\t') {
result.append("\\t");
} else if (mode == EscapeMode.KEY && cp == ' ') {
result.append("\\ ");
} else if (mode == EscapeMode.KEY && cp == '=') {
result.append("\\=");
} else if (mode == EscapeMode.KEY && cp == ':') {
result.append("\\:");
} else if ((cp >= 32 && cp < 127) || charsetEncoder.canEncode(text.substring(i, i + Character.charCount(cp)))) {
result.appendCodePoint(cp);
} else {
for (char c : Character.toChars(cp)) {
String code = Integer.toString(c, 16);
while (code.codePointCount(0, code.length()) < 4) {
code = '0' + code;
}
result.append("\\u" + code);
}
}
}
return result.toString();
}
private static boolean containsUEscapeAt(String text, int offset) {
if (text.codePointCount(offset, text.length()) < 1 + 1 + 4) {
return false;
}
if (text.codePointAt(text.offsetByCodePoints(offset, 1)) != 'u') {
return false;
}
int uStart = text.offsetByCodePoints(offset, 2);
int uEnd = text.offsetByCodePoints(uStart, 4);
String uStr = text.substring(uStart, uEnd);
try {
int uChr = Integer.parseInt(uStr, 16);
return Character.isValidCodePoint(uChr);
} catch (NumberFormatException ex) {
return false;
}
}
/**
* Removes extra slashes from, e.g. "\ ", "\=" and "\:" typical in
* machine-generated resource bundles. A slash at the end of a string means
* a mandatory space has been trimmed.
*
* @see <a href="https://sourceforge.net/p/omegat/bugs/266/">bug #266</a>
*/
private String removeExtraSlashes(String string) {
StringBuilder result = new StringBuilder(string.length());
for (int cp, len = string.length(), i = 0; i < len; i += Character.charCount(cp)) {
cp = string.codePointAt(i);
if (cp == '\\') {
if (dontUnescapeULiterals && containsUEscapeAt(string, i)) {
// Don't remove \ before \\uXXXX if we are not unescaping
} else if (string.codePointCount(i, len) > 1) {
// Fix for [ 1812183 ] Properties: space before "=" shouldn't
// be part of the key, contributed by Arno Peters
i += Character.charCount(cp);
cp = string.codePointAt(i);
} else {
cp = ' ';
}
}
result.appendCodePoint(cp);
}
return result.toString();
}
/**
* Doing the processing of the file...
*/
@Override
public void processFile(BufferedReader reader, BufferedWriter outfile, FilterContext fc)
throws IOException, TranslationException {
// Parameter in the options of filter to customize the target file
removeStringsUntranslated = processOptions != null
&& "true".equalsIgnoreCase(processOptions.get(OPTION_REMOVE_STRINGS_UNTRANSLATED));
// Parameter in the options of filter to customize the behavior of the
// filter
dontUnescapeULiterals = processOptions != null
&& "true".equalsIgnoreCase(processOptions.get(OPTION_DONT_UNESCAPE_U_LITERALS));
String raw;
boolean noi18n = false;
// Support to show the comments (localization notes) into the Comments
// panel
String comments = null;
LinebreakPreservingReader lbpr = new LinebreakPreservingReader(reader); // fix for bug 1462566
try {
while ((raw = lbpr.readLine()) != null) {
String trimmed = raw.trim();
// skipping empty strings
if (trimmed.isEmpty()) {
outfile.write(raw);
outfile.write(lbpr.getLinebreak());
// Delete the comments
comments = null;
continue;
}
// Variable to check if a segment is translated
boolean translatedSegment = true;
// We are going to use the content of this line,
// so trim and unescape
String processed = normalizeInputLine(raw);
// skipping comments
int firstCp = trimmed.codePointAt(0);
if (firstCp == '#' || firstCp == '!') {
outfile.write(toAscii(raw, EscapeMode.COMMENT));
outfile.write(lbpr.getLinebreak());
// Save the comments
comments = (comments == null ? processed : comments + "\n" + processed);
// checking if the next string shouldn't be
// internationalized
if (raw.contains(DO_NOT_TRANSLATE_COMMENT)) {
noi18n = true;
}
continue;
}
// reading the glued lines
while (processed.codePointBefore(processed.length()) == '\\') {
String next = lbpr.readLine();
if (next == null) {
next = "";
}
// gluing this line (w/o '\' on this line)
// with next line (w/o leading spaces)
processed = processed.substring(0, processed.offsetByCodePoints(processed.length(), -1))
+ normalizeInputLine(next);
}
// key=value pairs
int equalsPos = searchEquals(processed);
// writing out key
String key;
if (equalsPos >= 0) {
key = processed.substring(0, equalsPos).trim();
} else {
key = processed.trim();
}
key = removeExtraSlashes(key);
// advance if there're spaces or tabs after =
if (equalsPos >= 0) {
int equalsEnd = processed.offsetByCodePoints(equalsPos, 1);
while (equalsEnd < processed.length()) {
int cp = processed.codePointAt(equalsEnd);
if (cp != ' ' && cp != '\t') {
break;
}
equalsEnd += Character.charCount(cp);
}
String equals = processed.substring(equalsPos, equalsEnd);
// value, if any
String value;
if (equalsEnd < processed.length()) {
value = removeExtraSlashes(processed.substring(equalsEnd));
} else {
value = "";
}
if (noi18n) {
// if we don't need to internationalize
outfile.write(toAscii(key, EscapeMode.KEY));
outfile.write(equals);
outfile.write(toAscii(value, EscapeMode.VALUE));
outfile.write(lbpr.getLinebreak());
noi18n = false;
} else {
value = value.replaceAll("\\n\\n", "\n \n");
// If there is a comment, show it into the Comments panel
String trans = process(key, value, comments);
// Delete the comments
comments = null;
// Check if the segment is not translated
if ("--untranslated_yet--".equals(trans)) {
translatedSegment = false;
trans = value;
}
trans = trans.replaceAll("\\n\\s\\n", "\n\n");
trans = toAscii(trans, EscapeMode.VALUE);
if (!trans.isEmpty() && trans.codePointAt(0) == ' ') {
trans = '\\' + trans;
}
// Non-translated segments are written based on the
// filter options
if (translatedSegment || !removeStringsUntranslated) {
outfile.write(toAscii(key, EscapeMode.KEY));
outfile.write(equals);
outfile.write(trans);
outfile.write(lbpr.getLinebreak()); // fix for bug 1462566
}
}
}
}
} finally {
lbpr.close();
}
}
/**
* Looks for the key-value separator (=,: or ' ') in the string.
*
* @return The char number of key-value separator in a string. Not that if
* the string does not contain any separator this string is
* considered to be a key with empty string value, and this method
* returns <code>-1</code> to indicate there's no equals.
* @see <a href="https://sourceforge.net/p/omegat/bugs/266/">bug #266</a>
*/
private int searchEquals(String str) {
int prevCp = 'a';
for (int cp, i = 0; i < str.length(); i += Character.charCount(cp)) {
cp = str.codePointAt(i);
if (prevCp != '\\') {
if (cp == '=' || cp == ':') {
return i;
} else if (cp == ' ' || cp == '\t') {
for (int cp2, j = str.offsetByCodePoints(i, 1); j < str.length(); j += Character.charCount(cp2)) {
cp2 = str.codePointAt(j);
if (cp2 == ':' || cp2 == '=') {
return j;
}
if (cp2 != ' ' && cp2 != '\t') {
return i;
}
}
return i;
}
}
prevCp = cp;
}
return -1;
}
// Support to show the comments (localization notes) into the Comments panel
// Added the c parameter, of type String, which is the comment showed in the interface
protected String process(String key, String value, String c) {
if (entryParseCallback != null) {
List<ProtectedPart> protectedParts = TagUtil.applyCustomProtectedParts(value,
PatternConsts.SIMPLE_JAVA_MESSAGEFORMAT_PATTERN_VARS, null);
entryParseCallback.addEntry(key, value, null, false, c, null, this, protectedParts);
return value;
} else if (entryTranslateCallback != null) {
String trans = entryTranslateCallback.getTranslation(key, value, null);
return trans != null ? trans : "--untranslated_yet--";
} else if (entryAlignCallback != null) {
align.put(key, value);
}
return value;
}
@Override
protected void alignFile(BufferedReader sourceFile, BufferedReader translatedFile, org.omegat.filters2.FilterContext fc) throws Exception {
Map<String, String> source = new HashMap<String, String>();
Map<String, String> translated = new HashMap<String, String>();
align = source;
processFile(sourceFile, new NullBufferedWriter(), fc);
align = translated;
processFile(translatedFile, new NullBufferedWriter(), fc);
for (Map.Entry<String, String> en : source.entrySet()) {
String tr = translated.get(en.getKey());
if (!StringUtil.isEmpty(tr)) {
entryAlignCallback.addTranslation(en.getKey(), en.getValue(), tr, false, null, this);
}
}
}
@Override
public Map<String, String> changeOptions(Window parent, Map<String, String> config) {
try {
ResourceBundleOptionsDialog dialog = new ResourceBundleOptionsDialog(parent, config);
dialog.setVisible(true);
if (ResourceBundleOptionsDialog.RET_OK == dialog.getReturnStatus())
return dialog.getOptions();
else
return null;
} catch (Exception e) {
Log.log(OStrings.getString("RB_FILTER_EXCEPTION"));
Log.log(e);
return null;
}
}
/**
* Returns true to indicate that Java Resource Bundles filter has options.
*
*/
@Override
public boolean hasOptions() {
return true;
}
}