/************************************************************************** OmegaT - Computer Assisted Translation (CAT) tool with fuzzy matching, translation memory, keyword search, glossaries, and translation leveraging into updated projects. Copyright (C) 2013 Aaron Madlon-Kay Home page: http://www.omegat.org/ Support center: http://groups.yahoo.com/group/OmegaT/ This file is part of OmegaT. OmegaT is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. OmegaT is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see <http://www.gnu.org/licenses/>. **************************************************************************/ package org.omegat.core.tagvalidation; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Stack; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.omegat.core.data.SourceTextEntry; import org.omegat.core.tagvalidation.ErrorReport.TagError; import org.omegat.util.PatternConsts; import org.omegat.util.Preferences; import org.omegat.util.TagUtil; import org.omegat.util.TagUtil.Tag; /** * @author Aaron Madlon-Kay */ public class TagValidation { public static void inspectJavaMessageFormat(ErrorReport report) { Pattern pattern = PatternConsts.SIMPLE_JAVA_MESSAGEFORMAT_PATTERN_VARS; List<Tag> srcTags = new ArrayList<Tag>(); List<Tag> locTags = new ArrayList<Tag>(); Matcher javaMessageFormatMatcher = pattern.matcher(report.source); while (javaMessageFormatMatcher.find()) { srcTags.add(new Tag(javaMessageFormatMatcher.start(), javaMessageFormatMatcher.group(0))); } javaMessageFormatMatcher = pattern.matcher(report.translation); while (javaMessageFormatMatcher.find()) { locTags.add(new Tag(javaMessageFormatMatcher.start(), javaMessageFormatMatcher.group(0))); } inspectUnorderedTags(srcTags, locTags, report); } public static void inspectPrintfVariables(boolean simpleCheckOnly, ErrorReport report) { Pattern printfPattern = simpleCheckOnly? PatternConsts.SIMPLE_PRINTF_VARS : PatternConsts.PRINTF_VARS; // printf variables should be equal in number, // but order can change // (and with that also notation: e.g. from '%s' to '%1$s') // We check this by adding the string "index+type specifier" // of every found variable to a set. // (Actually a map, so we can keep track of the original // variable for display purposes.) // If the sets (map keys) of the source and target are not equal, then // there is a problem: either missing or extra variables, // or the type specifier has changed for the variable at the // given index. Map<String, Tag> srcTags = extractPrintfVars(printfPattern, report.source); Map<String, Tag> locTags = extractPrintfVars(printfPattern, report.translation); if (!srcTags.keySet().equals(locTags.keySet())) { for (Map.Entry<String, Tag> e : srcTags.entrySet()) { report.srcErrors.put(e.getValue(), TagError.UNSPECIFIED); } for (Map.Entry<String, Tag> e : locTags.entrySet()) { report.transErrors.put(e.getValue(), TagError.UNSPECIFIED); } } } public static Map<String, Tag> extractPrintfVars(Pattern printfPattern, String translation) { Matcher printfMatcher = printfPattern.matcher(translation); Map<String, Tag> nameMapping = new HashMap<String, Tag>(); int index = 1; while (printfMatcher.find()) { String printfVariable = printfMatcher.group(0); String argumentswapspecifier = printfMatcher.group(1); if (argumentswapspecifier != null && argumentswapspecifier.endsWith("$")) { String normalized = "" + argumentswapspecifier.substring(0, argumentswapspecifier.length() - 1) + printfVariable.substring(printfVariable.length() - 1, printfVariable.length()); nameMapping.put(normalized, new Tag(printfMatcher.start(), printfVariable)); } else { String normalized = "" + index + printfVariable.substring(printfVariable.length() - 1, printfVariable.length()); nameMapping.put(normalized, new Tag(printfMatcher.start(), printfVariable)); index++; } } return nameMapping; } public static void inspectPOWhitespace(ErrorReport report) { // check PO line start: boolean srcStartsWith = report.source.startsWith("\n"); boolean trgStartsWith = report.translation.startsWith("\n"); if (srcStartsWith && !trgStartsWith) { report.srcErrors.put(new Tag(0, "\n"), TagError.WHITESPACE); } if (!srcStartsWith && trgStartsWith) { report.transErrors.put(new Tag(0, "\n"), TagError.WHITESPACE); } // check PO line ending: boolean srcEndsWith = report.source.endsWith("\n"); boolean trgEndsWith = report.translation.endsWith("\n"); if (srcEndsWith && !trgEndsWith) { report.srcErrors.put(new Tag(report.source.length() - 1, "\n"), TagError.WHITESPACE); } if (!srcEndsWith && trgEndsWith) { report.transErrors.put(new Tag(report.translation.length() - 1, "\n"), TagError.WHITESPACE); } } public static void inspectOmegaTTags(SourceTextEntry ste, ErrorReport report) { // extract tags from src and loc string List<Tag> srcTags = TagUtil.buildTagList(report.source, ste.getProtectedParts()); List<Tag> locTags = TagUtil.buildTagList(report.translation, ste.getProtectedParts()); inspectOrderedTags(srcTags, locTags, Preferences.isPreference(Preferences.LOOSE_TAG_ORDERING), report); } public static void inspectRemovePattern(ErrorReport report) { Pattern removePattern = PatternConsts.getRemovePattern(); if (removePattern == null) { return; } Matcher removeMatcher = removePattern.matcher(report.translation); while (removeMatcher.find()) { report.transErrors.put(new Tag(removeMatcher.start(), removeMatcher.group()), TagError.EXTRANEOUS); } } protected static void inspectUnorderedTags(List<Tag> srcTags, List<Tag> locTags, ErrorReport report) { for (Tag tag : srcTags) { if (!containsTag(locTags, tag.tag)) { report.srcErrors.put(tag, TagError.MISSING); } } for (Tag tag : locTags) { if (!containsTag(srcTags, tag.tag)) { report.transErrors.put(tag, TagError.EXTRANEOUS); } } } /** * Check that translated tags are well-formed. * In order to accommodate tags orphaned by segmenting, * unmatched tags are allowed, but only if they don't interfere with * non-orphaned tags. * @param srcTags A list of tags in the source text * @param locTags A list of tags in the translated text * @param report The report to append errors to */ protected static void inspectOrderedTags(List<Tag> srcTags, List<Tag> locTags, boolean looseOrdering, ErrorReport report) { // If we're doing strict validation, pre-fill the report with warnings // about out-of-order tags. if (!looseOrdering) { List<Tag> commonTagsSrc = getCommonTags(srcTags, locTags); List<Tag> commonTagsLoc = getCommonTags(locTags, srcTags); for (int i = 0; i < commonTagsSrc.size(); i++) { Tag tag = commonTagsLoc.get(i); if (!tag.tag.equals(commonTagsSrc.get(i).tag)) { report.transErrors.put(tag, TagError.ORDER); commonTagsSrc.remove(i); commonTagsLoc.remove(i); i--; } } } // Check translation tags. List<Tag> expectedTags = new ArrayList<Tag>(srcTags); Stack<Tag> tagStack = new Stack<Tag>(); for (Tag tag : locTags) { // Make sure tag exists in source. if (!containsTag(srcTags, tag.tag)) { report.transErrors.put(tag, TagError.EXTRANEOUS); continue; } // Reduce count. If we're below zero, there's extra in the translation. Tag expected = removeTag(expectedTags, tag.tag); if (expected == null) { report.transErrors.put(tag, TagError.DUPLICATE); continue; } // Build stack of tags to check well-formedness. switch (tag.getType()) { case START: tagStack.push(tag); break; case END: if (!tagStack.isEmpty() && tagStack.peek().getName().equals(tag.getName())) { // Closing a tag normally. tagStack.pop(); } else { while (!tagStack.isEmpty()) { // Closing the wrong opening tag. // Rewind stack until we find its pair. Report everything along // the way as malformed. Tag last = tagStack.pop(); report.transErrors.put(last, TagError.MALFORMED); if (last.getName().equals(tag.getName())){ break; } } // If the stack was empty to begin with or we emptied it above, // report the tag, but only if it's not a valid orphan. if (tagStack.isEmpty()) { String pair = tag.getPairedTag(); if (containsTag(srcTags, pair)) { report.transErrors.put(tag, containsTag(locTags, pair) ? TagError.MALFORMED : TagError.ORPHANED); } } } break; case SINGLE: // Ignore } } // Check expected tags for anything left. for (Tag tag : expectedTags) { report.srcErrors.put(tag, TagError.MISSING); } // Check the stack to see if there are straggling open tags. while (!tagStack.isEmpty()) { // Allow stragglers only if they're orphans. Tag tag = tagStack.pop(); String pair = tag.getPairedTag(); if (containsTag(srcTags, pair)) { report.transErrors.put(tag, containsTag(locTags, pair) ? TagError.MALFORMED : TagError.ORPHANED); } } } private static List<Tag> getCommonTags(List<Tag> orig, List<Tag> compare) { List<Tag> result = new ArrayList<Tag>(); List<Tag> uninspected = new ArrayList<Tag>(compare); for (Tag oTag : orig) { for (Tag cTag : uninspected) { if (oTag.tag.equals(cTag.tag)) { result.add(oTag); uninspected.remove(cTag); break; } } } return result; } private static boolean containsTag(List<Tag> tags, String tag) { if (tag == null) { return false; } for (Tag t : tags) { if (t.tag.equals(tag)) { return true; } } return false; } private static Tag removeTag(List<Tag> tags, String tag) { for (int i = 0; i < tags.size(); i++) { Tag t = tags.get(i); if (t.tag.equals(tag)) { tags.remove(i); return t; } } return null; } }