/************************************************************************** OmegaT - Computer Assisted Translation (CAT) tool with fuzzy matching, translation memory, keyword search, glossaries, and translation leveraging into updated projects. Copyright (C) 2000-2006 Keith Godfrey and Maxym Mykhalchuk 2010 Alex Buloichik 2015 Aaron Madlon-Kay Home page: http://www.omegat.org/ Support center: http://groups.yahoo.com/group/OmegaT/ This file is part of OmegaT. OmegaT is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. OmegaT is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see <http://www.gnu.org/licenses/>. **************************************************************************/ package org.omegat.gui.glossary; import java.io.BufferedReader; import java.io.File; import java.io.FileInputStream; import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStreamReader; import java.io.OutputStreamWriter; import java.io.Writer; import java.nio.charset.Charset; import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.List; import org.omegat.util.EncodingDetector; import org.omegat.util.OConsts; import org.omegat.util.StringUtil; /** * Reader for tab separated glossaries. * * @author Keith Godfrey * @author Maxym Mykhalchuk * @author Alex Buloichik <alex73mail@gmail.com> * @author Aaron Madlon-Kay */ public final class GlossaryReaderTSV { private GlossaryReaderTSV() { } public static String getFileEncoding(final File file) throws IOException { return getFileEncoding(file, Charset.defaultCharset().name()); } public static String getFileEncoding(final File file, String defaultEncoding) throws IOException { String fnameLower = file.getName().toLowerCase(); if (fnameLower.endsWith(OConsts.EXT_TSV_UTF8)) { return StandardCharsets.UTF_8.name(); } else { return EncodingDetector.detectEncodingDefault(file, defaultEncoding); } } public static List<GlossaryEntry> read(final File file, boolean priorityGlossary) throws IOException { String encoding = getFileEncoding(file); List<GlossaryEntry> result = new ArrayList<GlossaryEntry>(); try (BufferedReader in = new BufferedReader(new InputStreamReader(new FileInputStream(file), encoding))) { // BOM (byte order mark) bugfix in.mark(1); int ch = in.read(); if (ch != 0xFEFF) { in.reset(); } for (String s = in.readLine(); s != null; s = in.readLine()) { // skip lines that start with '#' if (s.startsWith("#")) { continue; } // divide lines on tabs String[] tokens = s.split("\t"); // check token list to see if it has a valid string if (tokens.length < 2 || tokens[0].isEmpty()) { continue; } // creating glossary entry and add it to the hash // (even if it's already there!) String comment = ""; if (tokens.length >= 3) { comment = tokens[2]; } result.add(new GlossaryEntry(tokens[0], tokens[1], comment, priorityGlossary)); } } return result; } /** * Appends entry to glossary file. If file does not exist yet, it will be created. * * @param file The file to (create and) append to * @param newEntry the entry to append. * @throws IOException */ public static synchronized void append(final File file, GlossaryEntry newEntry) throws IOException { String encoding = StandardCharsets.UTF_8.name(); if (!file.exists()) { file.getParentFile().mkdirs(); file.createNewFile(); } else { encoding = getFileEncoding(file, StandardCharsets.UTF_8.name()); } try (Writer wr = new OutputStreamWriter(new FileOutputStream(file, true), encoding)) { wr.append(newEntry.getSrcText()).append('\t').append(newEntry.getLocText()); if (!StringUtil.isEmpty(newEntry.getCommentText())) { wr.append('\t').append(newEntry.getCommentText()); } wr.append(System.lineSeparator()); } } }