/**************************************************************************
OmegaT - Computer Assisted Translation (CAT) tool
with fuzzy matching, translation memory, keyword search,
glossaries, and translation leveraging into updated projects.
Copyright (C) 2000-2006 Keith Godfrey and Maxym Mykhalchuk
2014 Didier Briel
Home page: http://www.omegat.org/
Support center: http://groups.yahoo.com/group/OmegaT/
This file is part of OmegaT.
OmegaT is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
OmegaT is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
**************************************************************************/
package org.omegat.filters2.text;
import java.awt.Window;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.IOException;
import java.io.Writer;
import java.nio.charset.StandardCharsets;
import java.util.Map;
import org.omegat.core.Core;
import org.omegat.filters2.AbstractFilter;
import org.omegat.filters2.FilterContext;
import org.omegat.filters2.Instance;
import org.omegat.util.LinebreakPreservingReader;
import org.omegat.util.Log;
import org.omegat.util.OStrings;
/**
* Filter to support plain text files (in various encodings).
*
* @author Keith Godfrey
* @author Maxym Mykhalchuk
* @author Didier Briel
*/
public class TextFilter extends AbstractFilter {
public static final String ISO88592 = "ISO-8859-2";
/**
* Text filter should segmentOn text into paragraphs on line breaks.
*/
public static final String SEGMENT_BREAKS = "BREAKS";
/**
* Default. Text filter should segmentOn text into paragraphs on empty lines.
*/
public static final String SEGMENT_EMPTYLINES = "EMPTYLINES";
/**
* Text filter should not segmentOn text into paragraphs.
*/
public static final String SEGMENT_NEVER = "NEVER";
public static final String OPTION_SEGMENT_ON = "segmentOn";
/**
* Length at which a line break should occur in target documents
*/
public static final String OPTION_LINE_LENGTH = "lineLength";
/**
* Maximum line length in target documents
*/
public static final String OPTION_MAX_LINE_LENGTH = "maxLineLength";
/**
* Register plugin into OmegaT.
*/
public static void loadPlugins() {
Core.registerFilterClass(TextFilter.class);
}
public static void unloadPlugins() {
}
@Override
public String getFileFormatName() {
return OStrings.getString("TEXTFILTER_FILTER_NAME");
}
@Override
public Instance[] getDefaultInstances() {
return new Instance[] { new Instance("*.txt"),
new Instance("*.txt1", StandardCharsets.ISO_8859_1.name(), StandardCharsets.ISO_8859_1.name()),
new Instance("*.txt2", ISO88592, ISO88592),
new Instance("*.utf8", StandardCharsets.UTF_8.name(), StandardCharsets.UTF_8.name()) };
}
@Override
public boolean isSourceEncodingVariable() {
return true;
}
@Override
public boolean isTargetEncodingVariable() {
return true;
}
@Override
protected boolean requirePrevNextFields() {
return true;
}
@Override
public void processFile(BufferedReader in, BufferedWriter out, FilterContext fc) throws IOException {
// BOM (byte order mark) bugfix
in.mark(1);
int ch = in.read();
if (ch != 0xFEFF)
in.reset();
int lineLength, maxLineLength;
try {
lineLength = Integer.parseInt(processOptions.get(TextFilter.OPTION_LINE_LENGTH));
} catch (Exception ex) {
lineLength = 0;
}
try {
maxLineLength = Integer.parseInt(processOptions.get(TextFilter.OPTION_MAX_LINE_LENGTH));
} catch (Exception ex) {
maxLineLength = 0;
}
Writer output;
if (lineLength != 0 && maxLineLength != 0) {
output = new LineLengthLimitWriter(out, lineLength, maxLineLength, fc.getTargetTokenizer());
} else {
output = out;
}
String segmentOn = processOptions.get(TextFilter.OPTION_SEGMENT_ON);
if (SEGMENT_BREAKS.equals(segmentOn)) {
processSegLineBreaks(in, output);
} else if (SEGMENT_NEVER.equals(segmentOn)) {
processNonSeg(in, output);
} else {
processSegEmptyLines(in, output);
}
output.close();
}
/** Process the file without segmenting it. */
private void processNonSeg(BufferedReader in, Writer out) throws IOException {
StringBuilder segment = new StringBuilder();
char[] buf = new char[4096];
int len;
while ((len = in.read(buf)) >= 0)
segment.append(buf, 0, len);
out.write(processEntry(segment.toString()));
}
/** Processes the file segmenting on line breaks. */
private void processSegLineBreaks(BufferedReader in, Writer out) throws IOException {
LinebreakPreservingReader lpin = new LinebreakPreservingReader(in);
String nontrans = "";
String s;
while ((s = lpin.readLine()) != null) {
if (s.trim().isEmpty()) {
nontrans += s + lpin.getLinebreak();
continue;
}
String srcText = s;
out.write(nontrans);
nontrans = "";
String translation = processEntry(srcText);
out.write(translation);
nontrans += lpin.getLinebreak();
}
lpin.close();
if (!nontrans.isEmpty()) {
out.write(nontrans);
}
}
/** Processes the file segmenting on line breaks. */
private void processSegEmptyLines(BufferedReader in, Writer out) throws IOException {
LinebreakPreservingReader lpin = new LinebreakPreservingReader(in);
StringBuilder nontrans = new StringBuilder();
StringBuilder trans = new StringBuilder();
String s;
while ((s = lpin.readLine()) != null) {
if (s.isEmpty()) {
out.write(nontrans.toString());
nontrans.setLength(0);
out.write(processEntry(trans.toString()));
trans.setLength(0);
nontrans.append(lpin.getLinebreak());
} else {
if (s.trim().isEmpty() && trans.length() == 0) {
nontrans.append(s);
nontrans.append(lpin.getLinebreak());
} else {
trans.append(s);
trans.append(lpin.getLinebreak());
}
}
}
lpin.close();
if (nontrans.length() >= 0) {
out.write(nontrans.toString());
}
if (trans.length() >= 0) {
out.write(processEntry(trans.toString()));
}
}
@Override
public Map<String, String> changeOptions(Window parent, Map<String, String> config) {
try {
TextOptionsDialog dialog = new TextOptionsDialog(parent, config);
dialog.setVisible(true);
if (TextOptionsDialog.RET_OK == dialog.getReturnStatus())
return dialog.getOptions();
else
return null;
} catch (Exception e) {
Log.log("Text filter threw an exception:");
Log.log(e);
return null;
}
}
/**
* Returns true to indicate that Text filter has options.
*
* @return True, because Text filter has options.
*/
@Override
public boolean hasOptions() {
return true;
}
}