/************************************************************************** OmegaT - Computer Assisted Translation (CAT) tool with fuzzy matching, translation memory, keyword search, glossaries, and translation leveraging into updated projects. Copyright (C) 2000-2006 Keith Godfrey and Maxym Mykhalchuk 2008 Didier Briel Home page: http://www.omegat.org/ Support center: http://groups.yahoo.com/group/OmegaT/ This file is part of OmegaT. OmegaT is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. OmegaT is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see <http://www.gnu.org/licenses/>. **************************************************************************/ package org.omegat.filters2.xtagqxp; import java.io.BufferedReader; import java.io.BufferedWriter; import java.io.IOException; import java.io.Writer; import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.List; import org.omegat.filters2.AbstractFilter; import org.omegat.filters2.FilterContext; import org.omegat.filters2.Instance; import org.omegat.filters2.TranslationException; import org.omegat.util.OStrings; /** * Filter to support Xtag files generated by CopyFlow Gold for QuarkXPress * * @author Keith Godfrey * @author Maxym Mykhalchuk * @author Didier Briel */ public class XtagFilter extends AbstractFilter { protected static final String EOL = "\r\n"; @Override public String getFileFormatName() { return OStrings.getString("XTAGFILTER_FILTER_NAME"); } @Override public Instance[] getDefaultInstances() { return new Instance[] { new Instance("*.tag", StandardCharsets.UTF_16LE.name(), StandardCharsets.UTF_16LE.name()), new Instance("*.xtg", StandardCharsets.UTF_16LE.name(), StandardCharsets.UTF_16LE.name()), }; } @Override public boolean isSourceEncodingVariable() { return true; } @Override public boolean isTargetEncodingVariable() { return true; } @Override protected boolean requirePrevNextFields() { return true; } @Override public void processFile(BufferedReader in, BufferedWriter out, FilterContext fc) throws IOException, TranslationException { // BOM (byte order mark) bugfix in.mark(1); int ch = in.read(); if (ch != 0xFEFF) in.reset(); else out.write(ch); // If there was a BOM, we rewrite it processXtagFile(in, out); } /** * Processes a CopyFlow Gold for QuarkXpress document. Transmits the * translatable parts to privateProcessEntry. * * @param inFile * Source document * @param outFile * Target document * @throws java.io.IOException * @throws org.omegat.filters2.TranslationException */ private void processXtagFile(BufferedReader inFile, Writer outFile) throws IOException, TranslationException { final int STATE_WAIT_TEXT = 1; final int STATE_READ_TEXT = 2; int state = STATE_WAIT_TEXT; String tr; String s; s = inFile.readLine(); while (s != null) { // Translatable text if (s.startsWith("@$:")) { outFile.write("@$:"); s = s.substring(3); state = STATE_READ_TEXT; } else if (s.startsWith("#boxname")) { state = STATE_WAIT_TEXT; } if (state == STATE_READ_TEXT) { tr = privateProcessEntry(s); } else tr = s; outFile.write(tr); s = inFile.readLine(); if (s != null) outFile.write(EOL); } } /** * Lists of Xtags in an entry */ private List<Xtag> listTags = new ArrayList<Xtag>(); /** * Finds the Xtag corresponding to an OmegaT tag * * @param tag * OmegaT tag, without < and > * @return either the original Xtag, or the tag with < and > * characters converted to the Xtag equivalent */ private String findTag(StringBuilder tag) { for (Xtag oneTag : listTags) { if (oneTag.toShortcut().equals(tag.toString())) { return oneTag.toOriginal(); } } // It was not a real tag // We must convert < to <\<> and > to <\>> StringBuilder changedString = new StringBuilder(); for (int cp, i = 0; i < tag.length(); i += Character.charCount(cp)) { cp = tag.codePointAt(i); changedString.append(convertSpecialCharacter(cp)); } return changedString.toString(); } /** * Receives a character, and convert it to the Xtag equivalent if necessary * * @param c * A character * @return either the original character, or an Xtag version of that * character */ private String convertSpecialCharacter(int cp) { if (cp == '<') { return "<\\<>"; } else if (cp == '>') { return "<\\>>"; } else { return String.valueOf(Character.toChars(cp)); } } /** * Receives an entry with CopyFlow Gold for QuarkXpress pseudo tags (Xtags) * Transforms the Xtags into OmegaT tags * * @param s * An entry with Xtags to process * @return the entry with OmegaT tags */ private String convertToTags(String s) { StringBuilder changedString = new StringBuilder(); final int STATE_NORMAL = 1; final int STATE_COLLECT_TAG = 2; int state = STATE_NORMAL; int num = 0; listTags.clear(); StringBuilder tag = new StringBuilder(s.length()); for (int cp, i = 0; i < s.length(); i += Character.charCount(cp)) { cp = s.codePointAt(i); // Start of a tag if ((cp == '<') && (!(state == STATE_COLLECT_TAG))) { tag.setLength(0); state = STATE_COLLECT_TAG; // Possible end of a tag // Exception for <\>>, which is how CopyFlow stores a > } else if ((cp == '>') && (tag.lastIndexOf("\\") != tag.offsetByCodePoints(tag.length(), -1))) { num++; Xtag oneTag = new Xtag(tag.toString(), num); changedString.append(oneTag.toShortcut()); listTags.add(oneTag); tag.setLength(0); state = STATE_NORMAL; } else if (state == STATE_COLLECT_TAG) { tag.appendCodePoint(cp); } else { changedString.appendCodePoint(cp); } } return changedString.toString(); } /** * Receives an entry with OmegaT tags. Transorms the OmegaT tags back into * the original Xtags * * @param s * An entry with OmegaT tags to process * @return the entry with the original Xtags */ private String convertToXtags(String s) { StringBuilder changedString = new StringBuilder(); final int STATE_NORMAL = 1; final int STATE_COLLECT_TAG = 2; int state = STATE_NORMAL; StringBuilder tag = new StringBuilder(s.length()); for (int cp, i = 0; i < s.length(); i += Character.charCount(cp)) { cp = s.codePointAt(i); // Start of a tag if ((cp == '<') && (state != STATE_COLLECT_TAG)) { tag.setLength(0); tag.appendCodePoint(cp); state = STATE_COLLECT_TAG; // End of a tag } else if ((cp == '>') && (state == STATE_COLLECT_TAG)) { tag.appendCodePoint(cp); changedString.append(findTag(tag)); state = STATE_NORMAL; tag.setLength(0); } else if (state == STATE_COLLECT_TAG) { tag.appendCodePoint(cp); } else { changedString.append(convertSpecialCharacter(cp)); } } // Copy what might remain at the end of the string changedString.append(findTag(tag)); return changedString.toString(); } /** * Processes Xtags before and after sending the entry to OmegaT. The Xtags * in the entry are converted to OmegaT tags, then the entry is sent to * OmegaT, and the OmegaT tags are converted back to Xtags. * * @param entry * An entry to process * @return The entry for the target document */ private String privateProcessEntry(String entry) { entry = convertToTags(entry); entry = processEntry(entry); entry = convertToXtags(entry); return entry; } }