/**************************************************************************
OmegaT - Computer Assisted Translation (CAT) tool
with fuzzy matching, translation memory, keyword search,
glossaries, and translation leveraging into updated projects.
Copyright (C) 2000-2006 Keith Godfrey and Maxym Mykhalchuk
2010 Volker Berlin
Home page: http://www.omegat.org/
Support center: http://groups.yahoo.com/group/OmegaT/
This file is part of OmegaT.
OmegaT is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
OmegaT is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
**************************************************************************/
package org.omegat.filters2.text.dokuwiki;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.IOException;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.omegat.filters2.AbstractFilter;
import org.omegat.filters2.FilterContext;
import org.omegat.filters2.Instance;
import org.omegat.util.LinebreakPreservingReader;
import org.omegat.util.OStrings;
/**
* Filter to support Files with the DokuWiki syntax
* http://www.dokuwiki.org/syntax. The DokuWiki save it content in *.txt files
*
* @author Volker Berlin
*/
public class DokuWikiFilter extends AbstractFilter {
private Pattern codeTag = Pattern.compile("\\<code|\\<file|\\<html|\\<php|\\/\\*");
@Override
public String getFileFormatName() {
return OStrings.getString("DWFILTER_FILTER_NAME");
}
@Override
public boolean isSourceEncodingVariable() {
return false;
}
@Override
public boolean isTargetEncodingVariable() {
return false;
}
@Override
public Instance[] getDefaultInstances() {
return new Instance[] { new Instance("*.txt", "UTF-8", "UTF-8"), };
}
protected boolean requirePrevNextFields() {
return true;
}
@Override
protected boolean isFileSupported(BufferedReader reader) {
try (LinebreakPreservingReader lbpr = new LinebreakPreservingReader(reader)) {
String line;
while ((line = lbpr.readLine()) != null) {
String trimmed = line.trim();
if (getHeadingLevel(trimmed) > 0) {
return true;
}
}
} catch (IOException e) {
e.printStackTrace();
}
return false;
}
/**
* {@inheritDoc} Syntax see at http://www.dokuwiki.org/syntax
*/
@Override
public void processFile(BufferedReader reader, BufferedWriter outfile, FilterContext fc) throws IOException {
LinebreakPreservingReader lbpr = new LinebreakPreservingReader(reader); // fix
// for
// bug
// 1462566
String line;
StringBuilder text = new StringBuilder();
while ((line = lbpr.readLine()) != null) {
String trimmed = line.trim();
// skipping empty strings
if (trimmed.isEmpty()) {
writeTranslate(outfile, text, lbpr);
outfile.write(line + lbpr.getLinebreak());
continue;
}
// heading like "=== Abc ==="
int headingLevel = getHeadingLevel(trimmed);
if (headingLevel > 0) {
writeTranslate(outfile, text, lbpr);
String header = trimmed.substring(headingLevel, trimmed.length() - headingLevel).trim();
if (!header.isEmpty()) {
String trans = processEntry(header);
line = line.replace(header, trans);
}
outfile.write(line + lbpr.getLinebreak());
continue;
}
// list like " * Abc" or " - Abc"
if (line.startsWith(" *") || line.startsWith(" -")) {
writeTranslate(outfile, text, lbpr);
outfile.write(line.substring(0, 3));
outfile.write(' ');
writeTranslate(outfile, line.substring(3), lbpr);
continue;
}
// image alone like "{{any}}" or macros alone like "~~any~~"
if ((trimmed.startsWith("{{") && trimmed.endsWith("}}"))
|| (trimmed.startsWith("~~") && trimmed.endsWith("~~") && trimmed.length() > 5)) {
writeTranslate(outfile, text, lbpr);
outfile.write(line + lbpr.getLinebreak());
continue;
}
// tables
if (line.startsWith("|") || line.startsWith("^")) {
writeTranslate(outfile, text, lbpr);
int start = 0;
int braceCount = 0;
for (int cp, i = 0; i < line.length(); i += Character.charCount(cp)) {
cp = line.codePointAt(i);
switch (cp) {
case '|':
case '^':
if (braceCount == 0) {
String value = line.substring(start, i);
if (start > 0) {
outfile.write(' ');
writeTranslate(outfile, value, null);
outfile.write(' ');
}
outfile.write(Character.toChars(cp));
start = i + Character.charCount(cp);
}
break;
case '{':
braceCount++;
break;
case '}':
braceCount--;
break;
}
}
outfile.write(lbpr.getLinebreak());
continue;
}
// skip code fragments
trimmed = skipCode(outfile, text, lbpr, line);
if (trimmed == null) {
return;
}
text.append(' ');
text.append(trimmed);
}
writeTranslate(outfile, text, lbpr);
lbpr.close();
}
/**
* Check if the line is a heading and which level of heading
*
* @param line
* the lien to check
* @return the level, 0 means no heading
*/
public static int getHeadingLevel(String line) {
int level = 0;
int start = 0;
int end = line.length();
while (start < end) {
int scp = line.codePointAt(start);
int ecp = line.codePointBefore(end);
if (scp != '=' || ecp != '=') {
break;
}
start += Character.charCount(scp);
end -= Character.charCount(ecp);
level++;
}
if (start < end && line.codePointCount(start, end) > 1) {
return level;
} else {
return 0;
}
}
/**
* Check if there are data to translate in the StringBuilder. If yes then it
* translate it and reset the StringBuilder.
*
* @param outfile
* Writer of the target file on compilation
* @param text
* The possible to translate text
* @param lbpr
* the line breaker
* @throws IOException
* If an I/O error occurs
*/
private void writeTranslate(BufferedWriter outfile, StringBuilder text, LinebreakPreservingReader lbpr)
throws IOException {
if (text.length() > 0) {
String value = text.toString();
text.setLength(0);
writeTranslate(outfile, value, lbpr);
}
}
/**
* Check if there are data to translate. If yes then it translate it.
*
* @param outfile
* Writer of the target file on compilation
* @param value
* The possible to translate text
* @param lbpr
* the line breaker or null if no line break should be added
* @throws IOException
* If an I/O error occurs
*/
private void writeTranslate(BufferedWriter outfile, String value, LinebreakPreservingReader lbpr)
throws IOException {
value = value.trim();
if (!value.isEmpty()) {
while (true) {
// reduce all spaces to a single space
String newValue = value.replace(" ", " ");
if (newValue.equals(value)) {
break;
}
value = newValue;
}
String trans = processEntry(value);
outfile.write(trans);
if (lbpr != null) {
outfile.write(lbpr.getLinebreak());
}
}
}
/**
* Skip comments and code blocks.
*
* @param outfile
* Writer of the target file on compilation
* @param text
* The possible to translate text
* @param lbpr
* the line breaker
* @param trimmed
* the current trimmed line
* @return the new trimmed line after skip
* @throws IOException
* If an I/O error occurs
*/
private String skipCode(BufferedWriter outfile, StringBuilder text, LinebreakPreservingReader lbpr,
String line) throws IOException {
while (true) {
Matcher matcher = codeTag.matcher(line);
if (matcher.find()) {
int start = matcher.start();
String tagName = line.substring(start + 1, matcher.end());
boolean isAsterisk = tagName.equals("*");
text.append(' ');
text.append(line.substring(0, start));
if (!isAsterisk) {
writeTranslate(outfile, text, lbpr);
}
String endTagPattern = isAsterisk ? "\\*\\/" : "\\</" + tagName + "\\>";
Pattern endTag = Pattern.compile(endTagPattern);
line = line.substring(start);
matcher = endTag.matcher(line);
while (!matcher.find()) {
outfile.write(line + lbpr.getLinebreak());
line = lbpr.readLine();
if (line == null) {
return null;
}
matcher = endTag.matcher(line);
}
int end = matcher.end();
outfile.write(line.substring(0, end) + lbpr.getLinebreak());
line = line.substring(end);
} else {
return line;
}
}
}
}