Entry.java example

Explorer
OmegaT-master
/**************************************************************************
 OmegaT - Computer Assisted Translation (CAT) tool
          with fuzzy matching, translation memory, keyword search,
          glossaries, and translation leveraging into updated projects.

 Copyright (C) 2000-2006 Keith Godfrey and Maxym Mykhalchuk
               2007 Didier Briel
               2010 Antonio Vilei
               2012 Didier Briel
               2013 Alex Buloichik, Didier Briel
               Home page: http://www.omegat.org/
               Support center: http://groups.yahoo.com/group/OmegaT/

 This file is part of OmegaT.

 OmegaT is free software: you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
 the Free Software Foundation, either version 3 of the License, or
 (at your option) any later version.

 OmegaT is distributed in the hope that it will be useful,
 but WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 GNU General Public License for more details.

 You should have received a copy of the GNU General Public License
 along with this program.  If not, see <http://www.gnu.org/licenses/>.
 **************************************************************************/

package org.omegat.filters3;

import java.util.ArrayList;
import java.util.List;

import org.omegat.core.Core;
import org.omegat.core.data.ProtectedPart;
import org.omegat.filters2.TranslationException;
import org.omegat.filters3.xml.Handler;
import org.omegat.filters3.xml.XMLContentBasedTag;
import org.omegat.filters3.xml.XMLDialect;
import org.omegat.filters3.xml.XMLText;
import org.omegat.util.StringUtil;
import org.omegat.util.TagUtil;

/**
 * Translatable entry. Holds a list of source tags and text, translated text and
 * maintains correspondence between tags in source and in target.
 *
 * @author Maxym Mykhalchuk
 * @author Didier Briel
 * @author Alex Buloichik (alex73mail@gmail.com)
 */
public class Entry {
    final XMLDialect xmlDialect;
    final Handler handler;

    public Entry(XMLDialect xmlDialect, Handler handler) {
        this.xmlDialect = xmlDialect;
        this.handler = handler;
    }

    /**
     * Cleans up this entry.
     */
    public void clear() {
        tagsDetected = false;
        elements.clear();
        translatedEntry = null;
        textInstance = null;
    }

    // //////////////////////////////////////////////////////////////////////////
    // Dealing with source here
    // //////////////////////////////////////////////////////////////////////////

    private boolean tagsAggregationEnabled = false;

    /**
     * Whether the "first translatable" and "last translatable" tags were
     * detected. They are the first starting tag that has its ending in the
     * paragraph and the last ending tag that has its beginning in the
     * paragraph, respectively.
     */
    private boolean tagsDetected = false;

    private int firstGood;

    /** Returns index of the "first translatable" tag. */
    private int getFirstGood() {
        detectAndEnumerateTags();
        return firstGood;
    }

    private int lastGood;

    /** Returns index of the "last translatable" tag. */
    private int getLastGood() {
        detectAndEnumerateTags();
        return lastGood;
    }

    private Text textInstance = null;

    /** Returns an instance of {@link Text} class used to populate this entry. */
    private Text getTextInstance() {
        detectAndEnumerateTags();
        return textInstance;
    }

    /**
     * Detects the first and the last translatable tags and assigns all tags in
     * translatable region the shortcuts. Basically calls {@link #detectTags()}
     * and {@link #enumerateTags(int, int)} if tags were not detected, i.e.
     * {@link #tagsDetected} is false. in the paragraph "last translatable".
     */
    private void detectAndEnumerateTags() {
        if (!tagsDetected) {
            if (tagsAggregationEnabled) {
                aggregateTags();
            }
            detectTags();
            tagsDetected = true;
            enumerateTags(getFirstGood(), getLastGood());
        }
    }

    public void resetTagDetected() {
        tagsDetected = false;
    }

    /**
     * Aggregate tags. The current OpenXML filter finds too many tags, usually
     * causing what users call the "tag soup". Tags aggregation can help
     * alleviate this problem, but can sometimes lead to semantic issues.
     * Aggregation is OK only as a temporary hack, until we improve the OpenXML
     * filter.
     */
    private void aggregateTags() {
        List<Element> newElements = new ArrayList<Element>();
        AggregatedTag aggregated = null;

        for (Element elem : elements) {
            if (elem instanceof Tag) {
                // Add this tag to the aggregated tag
                if (aggregated == null) {
                    aggregated = new AggregatedTag("tag", null, Tag.Type.ALONE, new Attributes());
                }
                aggregated.add((Tag) elem);
            } else {
                /*
                 * This element is not a tag: - add previous aggregated tag (if
                 * any) - add this element
                 */
                if (aggregated != null) {
                    newElements.add(aggregated);
                    aggregated = null;
                }
                newElements.add(elem);
            }
        }
        // Check if there is remaining aggregated tag
        if (aggregated != null) {
            newElements.add(aggregated);
            aggregated = null;
        }

        // Copy everything to elements
        elements.clear();
        for (Element elem : newElements) {
            elements.add(elem);
        }
        newElements.clear();
    }

    /**
     * Detects the first starting tag that has its ending in the paragraph
     * "first translatable" and the last ending tag that has its beginning in
     * the paragraph "last translatable".
     */
    private void detectTags() {
        // first, detecting if we have any text and where we have it
        int textStart = -1;
        for (int i = 0; i < size(); i++) {
            Element elem = get(i);
            if ((elem instanceof Text) && ((Text) elem).isMeaningful()) {
                textStart = i;
                break;
            }
            if (elem instanceof XMLContentBasedTag) {
                textStart = i;
            }
        }
        for (int i = 0; i < size(); i++) {
            Element elem = get(i);
            if ((elem instanceof Text) && ((Text) elem).isMeaningful()) {
                textInstance = (Text) elem;
                break;
            }
        }
        if (textStart < 0) {
            // we have no translatable text in the whole entry
            firstGood = -1;
            lastGood = -2;
            textInstance = null;
            return;
        }

        int textEnd = textStart;
        for (int i = size() - 1; i >= 0; i--) {
            Element elem = get(i);
            if ((elem instanceof Text) && ((Text) elem).isMeaningful()) {
                textEnd = i;
                break;
            }
        }

        // if content-based tag is inside text, then expand text into paired content-based tag
        for (int i = textStart; i <= textEnd; i++) {
            Element elem = get(i);
            if (elem instanceof XMLContentBasedTag) {
                XMLContentBasedTag tag = (XMLContentBasedTag) elem;
                if (tag.getTag().equals("bpt") || tag.getTag().equals("ept")) {
                    // find id of paired tag
                    String id = StringUtil.nvl(tag.getAttribute("rid"), tag.getAttribute("id"),
                            tag.getAttribute("i"));
                    if (id == null) {
                        continue;
                    }
                    // find paired tag before
                    for (int j = textStart - 1; j >= 0; j--) {
                        if (get(j) instanceof XMLContentBasedTag) {
                            XMLContentBasedTag tag2 = (XMLContentBasedTag) get(j);
                            if (tag2.getTag().equals("bpt") || tag2.getTag().equals("ept")) {
                                // find id of paired tag
                                String id2 = StringUtil.nvl(tag2.getAttribute("rid"),
                                        tag2.getAttribute("id"), tag2.getAttribute("i"));
                                if (id.equals(id2)) {
                                    textStart = j;
                                }
                            }
                        }
                    }
                    // find paired tag after
                    for (int j = textEnd + 1; j < size(); j++) {
                        if (get(j) instanceof XMLContentBasedTag) {
                            XMLContentBasedTag tag2 = (XMLContentBasedTag) get(j);
                            if (tag2.getTag().equals("bpt") || tag2.getTag().equals("ept")) {
                                // find id of paired tag
                                String id2 = StringUtil.nvl(tag2.getAttribute("rid"),
                                        tag2.getAttribute("id"), tag2.getAttribute("i"));
                                if (id.equals(id2)) {
                                    textEnd = j;
                                }
                            }
                        }
                    }
                }
            }
        }

        // //////////////////////////////////////////////////////////////////////
        // "first good"
        // detecting the first starting tag that has its ending in the paragraph
        boolean found = false;
        for (firstGood = 0; firstGood < textStart; firstGood++) {
            Element goodElem = get(firstGood);
            if (!(goodElem instanceof Tag)) {
                continue;
            }

            Tag good = (Tag) goodElem;
            if (Tag.Type.BEGIN != good.getType()) {
                continue;
            }

            // trying to test
            int recursion = 1;
            for (int i = firstGood + 1; i < textEnd; i++) {
                Element candElement = get(i);
                if (candElement instanceof Tag) {
                    Tag cand = (Tag) candElement;
                    if (cand.getTag().equals(good.getTag())) {
                        if (Tag.Type.BEGIN == cand.getType()) {
                            recursion++;
                        } else if (Tag.Type.END == cand.getType()) {
                            recursion--;
                            if (recursion == 0) {
                                if (i > textStart) {
                                    found = true;
                                }
                                break;
                            }
                        }
                    }
                }
            }
            // if we could find an ending, this is a "good one"
            if (found) {
                break;
            }
        }
        if (!found) {
            firstGood = textStart;
        }

        // //////////////////////////////////////////////////////////////////////
        // "last good"
        // detecting the last ending tag that has its starting in the paragraph
        found = false;
        for (lastGood = size() - 1; lastGood > textEnd; lastGood--) {
            Element goodElem = get(lastGood);
            if (!(goodElem instanceof Tag)) {
                continue;
            }

            Tag good = (Tag) goodElem;
            if (Tag.Type.END != good.getType()) {
                continue;
            }

            // trying to test
            int recursion = 1;
            for (int i = lastGood - 1; i > textStart; i--) {
                Element candElement = get(i);
                if (candElement instanceof Tag) {
                    Tag cand = (Tag) candElement;
                    if (cand.getTag().equals(good.getTag())) {
                        if (Tag.Type.END == cand.getType()) {
                            recursion++;
                        } else if (Tag.Type.BEGIN == cand.getType()) {
                            recursion--;
                            if (recursion == 0) {
                                if (i < textEnd) {
                                    found = true;
                                }
                                break;
                            }
                        }
                    }
                }
            }
            // if we coud find a starting, this is a "good one"
            if (found) {
                break;
            }
        }
        if (!found) {
            lastGood = textEnd;
        }

        boolean removeTags;
        if (handler.getContext().isRemoveAllTags()) { // If Remove Tags is on,
            removeTags = true;                        // Remove leading and trailing tags must be on
        } else {
            removeTags = Core.getFilterMaster().getConfig().isRemoveTags();
        }
        // tags was already removed - restore they if need
        if (!removeTags) {
            for (int i = firstGood - 1; i >= 0; i--) {
                Element elem = get(i);
                if (elem instanceof Tag) {
                    if (handler.isParagraphTag((Tag) elem)) {
                        break;
                    }
                    firstGood = i;
                }
            }
            for (int i = lastGood + 1; i < size(); i++) {
                Element elem = get(i);
                if (elem instanceof Tag) {
                    if (handler.isParagraphTag((Tag) elem)) {
                        break;
                    }
                    lastGood = i;
                }
            }
        }

        boolean removeSpacesAround = Core.getFilterMaster().getConfig().isRemoveSpacesNonseg();
        // spaces was already removed - restore they if need
        if (!removeSpacesAround) {
            for (int i = firstGood - 1; i >= 0; i--) {
                Element elem = get(i);
                if (elem instanceof Tag) {
                    if (handler.isParagraphTag((Tag) elem)) {
                        break;
                    }
                }
                if ((elem instanceof Text) && !((Text) elem).isMeaningful()) {
                    firstGood = i;
                }
            }
            for (int i = lastGood + 1; i < size(); i++) {
                Element elem = get(i);
                if (elem instanceof Tag) {
                    if (handler.isParagraphTag((Tag) elem)) {
                        break;
                    }
                }
                if ((elem instanceof Text) && !((Text) elem).isMeaningful()) {
                    lastGood = i;
                }
            }
        }
    }

    /**
     * Enumerates tags to be properly shortcut.
     */
    private void enumerateTags(int firstGood, int lastGood) {
        int n = 0;
        for (int i = firstGood; i <= lastGood; i++) {
            Element elem = get(i);
            if (elem instanceof Tag) {
                Tag tag = (Tag) elem;
                if (Tag.Type.ALONE == tag.getType() || Tag.Type.BEGIN == tag.getType()) {
                    tag.setIndex(n);
                    n++;
                } else if (Tag.Type.END == tag.getType()) {
                    tag.setIndex(-1); // indication of an error
                    // trying to lookup for appropriate starting tag
                    int recursion = 1;
                    for (int j = i - 1; j >= firstGood; j--) {
                        Element otherElem = get(j);
                        if (otherElem instanceof Tag) {
                            Tag other = (Tag) otherElem;
                            if (other.getTag().equals(tag.getTag())) {
                                if (Tag.Type.END == other.getType()) {
                                    recursion++;
                                } else if (Tag.Type.BEGIN == other.getType()) {
                                    recursion--;
                                    if (recursion == 0) {
                                        tag.setIndex(other.getIndex());
                                        break;
                                    }
                                }
                            }
                        }
                    }
                    if (tag.getIndex() < 0) { // ending tag without a starting one
                        tag.setIndex(n);
                        n++;
                    }
                }
            }
        }
    }

    /**
     * Returns shortcut string representation of the entry source. This is what
     * the user translates. E.g. for
     * <code>Here's <b>bold text</b></code> should return
     * <code>Here's <b0>bold text</b0></code>.
     *
     * @param tagsAggregation
     *            Whether tags of this entry can be aggregated.
     * @param xmlDialect
     *            dialect for processing shortcuts
     * @param shortcutDetails
     *            shortcuts details
     */
    public String sourceToShortcut(boolean tagsAggregation, XMLDialect xmlDialect, List<ProtectedPart> protectedParts) {
        if (tagsAggregation != this.tagsAggregationEnabled) {
            this.tagsAggregationEnabled = tagsAggregation;
            // Each change to tags aggregation setting resets detected tags
            tagsDetected = false;
        }

        if (getFirstGood() <= getLastGood()) {
            return xmlDialect.constructShortcuts(elements.subList(getFirstGood(), getLastGood() + 1), protectedParts);
        } else {
            return "";
        }
    }

    private String sourceToShortcut(XMLDialect xmlDialect, List<ProtectedPart> protectedParts) {
        return sourceToShortcut(tagsAggregationEnabled, xmlDialect, protectedParts);
    }

    /**
     * Returns long XML-encoded representation of the source entry for storing
     * in TMX. E.g. for <code>Here's <b>bold text</b></code> should
     * return <code>Here's <bpt i="0">&b0&gt;</bpt>bold
     *       text<ept i="0">&lt;/b0&gt;</ept></code>.
     */
    public String sourceToTMX() {
        StringBuilder buf = new StringBuilder();
        for (int i = 0; i < size(); i++) {
            buf.append(get(i).toTMX());
        }
        return buf.toString();
    }

    /**
     * Returns the entry source in its original form as it was in original
     * document. E.g. for <code>Here's <b>bold text</b></code>
     * should return the same string
     * <code>Here's <b>bold text</b></code>.
     */
    public String sourceToOriginal() {
        StringBuilder buf = new StringBuilder();
        for (int i = 0; i < size(); i++) {
            buf.append(get(i).toOriginal());
        }
        return buf.toString();
    }

    // //////////////////////////////////////////////////////////////////////////
    // Dealing with translation
    // //////////////////////////////////////////////////////////////////////////

    Entry translatedEntry = null;

    /**
     * Sets the translation of the shortcut string returned by
     * {@link #toShortcut()}. Before setting translation checks whether the
     * translation contains all the same tags in weakly correct order:
     * <ul>
     * <li>All the tags present in source must be present in translation. For
     * example, <code>It's <b>bold</b> text</code> should <b>not</b>
     * be translated as <code>Etot tekst poluzhirnyi</code>.
     * <li>End tag goes after corresponding beginning tag. For example,
     * <code>It's <b>bold</b> text</code> should <b>not</b> be
     * translated as <code>Etot tekst </b>poluzhirnyi<b></code>.
     * <li>If standalone tag or tag pair was enclosed in another tag pair in
     * source, it should be enclosed in translation. For example,
     * <code>It's <b>bold and <i>bold italic</i></b> text</code>
     * should <b>not</b> be translated as
     * <code>Etot tekst <b>poluzhirnyi</b> i <i>naklonnyi</i></code>.
     * <li>Independent standalone tags and tag pairs may be reordered within
     * entry. For example,
     * <code>It's <b>bold</b> and <i>italic</i> text</code>
     * <b>can</b> be translated as
     * <code>Etot tekst <i>naklonnyi</i> i <b>poluzhirnyi</b></code>.
     * </ul>
     *
     * @throws TranslationException
     *             -- if any tag is missing or tags are ordered incorrectly.
     */
    public void setTranslation(String translation, XMLDialect xmlDialect, List<ProtectedPart> protectedParts)
            throws TranslationException {
        if (!sourceToShortcut(xmlDialect, protectedParts).equals(translation)) {
            checkAndRecoverTags(translation, protectedParts);
        }
    }

    /**
     * Before setting translation checks whether the translation contains all
     * the same tags in weakly correct order. See
     * {@link #setTranslation(String, XMLDialect, List)} for details.
     */
    private void checkAndRecoverTags(String translation, List<ProtectedPart> protectedParts) throws TranslationException {
        translatedEntry = new Entry(xmlDialect, handler);

        // /////////////////////////////////////////////////////////////////////
        // recovering tags
        List<TagUtil.Tag> shortTags = TagUtil.buildTagList(translation,
                protectedParts.toArray(new ProtectedPart[protectedParts.size()]));
        int pos = 0;
        for (TagUtil.Tag shortTag : shortTags) {
            if (pos < shortTag.pos) {
                translatedEntry.add(createTextInstance(translation.substring(pos, shortTag.pos)));
                pos = shortTag.pos;
            }
            for (int j = getFirstGood(); j <= getLastGood(); j++) {
                Element longElem = get(j);
                if (longElem instanceof Tag) {
                    Tag longTag = (Tag) longElem;
                    if (longTag.toShortcut().equals(shortTag.tag)) {
                        translatedEntry.add(longTag);
                        pos += shortTag.tag.length();
                        break;
                    }
                }
            }
            // P.S. If shortcut tag isn't found, probably we should issue a
            // warning.
        }
        if (pos < translation.length()) {
            translatedEntry.add(createTextInstance(translation.substring(pos)));
        }

        // /////////////////////////////////////////////////////////////////////
        // checking tags
        // TODO: implement checking
    }

    private Text createTextInstance(String str) {
        Text text = getTextInstance();
        if (text != null) {
            return text.createInstance(str);
        } else {
            return new XMLText(str, false);
        }
    }

    /**
     * Returns long XML-encoded representation of the entry translation for
     * storing in TMX.
     */
    public String translationToTMX() {
        if (translatedEntry == null) {
            return sourceToTMX();
        }

        StringBuilder buf = new StringBuilder();

        for (int i = 0; i < getFirstGood(); i++) {
            buf.append(get(i).toTMX());
        }

        buf.append(translatedEntry.sourceToTMX());

        for (int i = getLastGood() + 1; i < size(); i++) {
            buf.append(get(i).toTMX());
        }

        return buf.toString();
    }

    /**
     * Returns the translated entry as it should be stored in translated
     * document.
     */
    public String translationToOriginal() {
        if (translatedEntry == null) {
            return sourceToOriginal();
        }

        StringBuilder buf = new StringBuilder();

        for (int i = 0; i < getFirstGood(); i++) {
            buf.append(get(i).toOriginal());
        }

        buf.append(translatedEntry.sourceToOriginal());

        for (int i = getLastGood() + 1; i < size(); i++) {
            buf.append(get(i).toOriginal());
        }

        return buf.toString();
    }

    // /////////////////////////////////////////////////////////////////////////
    // List of EntryElement objects.
    // /////////////////////////////////////////////////////////////////////////

    /** Elements (tags and text) of this entry. */
    private List<Element> elements = new ArrayList<Element>();

    /**
     * Adds an element to this entry. Can be either a {@link Text} or a
     * {@link Tag}.
     */
    public void add(Element elem) {
        elements.add(elem);
        tagsDetected = false; // each addition of the new entry resets detected
                              // tags
    }

    /** Removes an element from this entry. */
    public void remove(int index) {
        elements.remove(index);
        tagsDetected = false; // each deletion of the entry resets detected tags
    }

    /** Gets an element. Can be either a {@link Text} or a {@link Tag}. */
    public Element get(int i) {
        return elements.get(i);
    }

    /** Returns the number of source elements. */
    public int size() {
        return elements.size();
    }

    /** Returns whether or not the elements list is empty. */
    public boolean isEmpty() {
        return elements.isEmpty();
    }
}