/**************************************************************************
OmegaT - Computer Assisted Translation (CAT) tool
with fuzzy matching, translation memory, keyword search,
glossaries, and translation leveraging into updated projects.
Copyright (C) 2000-2006 Keith Godfrey and Maxym Mykhalchuk
2010 Alex Buloichik
2015 Aaron Madlon-Kay
Home page: http://www.omegat.org/
Support center: http://groups.yahoo.com/group/OmegaT/
This file is part of OmegaT.
OmegaT is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
OmegaT is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
**************************************************************************/
package org.omegat.gui.glossary;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.io.Writer;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.List;
import org.omegat.util.EncodingDetector;
import org.omegat.util.OConsts;
import org.omegat.util.StringUtil;
/**
* Reader for tab separated glossaries.
*
* @author Keith Godfrey
* @author Maxym Mykhalchuk
* @author Alex Buloichik <alex73mail@gmail.com>
* @author Aaron Madlon-Kay
*/
public final class GlossaryReaderTSV {
private GlossaryReaderTSV() {
}
public static String getFileEncoding(final File file) throws IOException {
return getFileEncoding(file, Charset.defaultCharset().name());
}
public static String getFileEncoding(final File file, String defaultEncoding) throws IOException {
String fnameLower = file.getName().toLowerCase();
if (fnameLower.endsWith(OConsts.EXT_TSV_UTF8)) {
return StandardCharsets.UTF_8.name();
} else {
return EncodingDetector.detectEncodingDefault(file, defaultEncoding);
}
}
public static List<GlossaryEntry> read(final File file, boolean priorityGlossary) throws IOException {
String encoding = getFileEncoding(file);
List<GlossaryEntry> result = new ArrayList<GlossaryEntry>();
try (BufferedReader in = new BufferedReader(new InputStreamReader(new FileInputStream(file), encoding))) {
// BOM (byte order mark) bugfix
in.mark(1);
int ch = in.read();
if (ch != 0xFEFF) {
in.reset();
}
for (String s = in.readLine(); s != null; s = in.readLine()) {
// skip lines that start with '#'
if (s.startsWith("#")) {
continue;
}
// divide lines on tabs
String[] tokens = s.split("\t");
// check token list to see if it has a valid string
if (tokens.length < 2 || tokens[0].isEmpty()) {
continue;
}
// creating glossary entry and add it to the hash
// (even if it's already there!)
String comment = "";
if (tokens.length >= 3) {
comment = tokens[2];
}
result.add(new GlossaryEntry(tokens[0], tokens[1], comment, priorityGlossary));
}
}
return result;
}
/**
* Appends entry to glossary file. If file does not exist yet, it will be created.
*
* @param file The file to (create and) append to
* @param newEntry the entry to append.
* @throws IOException
*/
public static synchronized void append(final File file, GlossaryEntry newEntry) throws IOException {
String encoding = StandardCharsets.UTF_8.name();
if (!file.exists()) {
file.getParentFile().mkdirs();
file.createNewFile();
} else {
encoding = getFileEncoding(file, StandardCharsets.UTF_8.name());
}
try (Writer wr = new OutputStreamWriter(new FileOutputStream(file, true), encoding)) {
wr.append(newEntry.getSrcText()).append('\t').append(newEntry.getLocText());
if (!StringUtil.isEmpty(newEntry.getCommentText())) {
wr.append('\t').append(newEntry.getCommentText());
}
wr.append(System.lineSeparator());
}
}
}