/**************************************************************************
OmegaT - Computer Assisted Translation (CAT) tool
with fuzzy matching, translation memory, keyword search,
glossaries, and translation leveraging into updated projects.
Copyright (C) 2000-2006 Keith Godfrey and Maxym Mykhalchuk
2007-2013 Didier Briel
2015-2016 Didier Briel
Home page: http://www.omegat.org/
Support center: http://groups.yahoo.com/group/OmegaT/
This file is part of OmegaT.
OmegaT is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
OmegaT is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
**************************************************************************/
package org.omegat.filters3.xml.openxml;
import java.awt.Window;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.Collections;
import java.util.Enumeration;
import java.util.List;
import java.util.Map;
import java.util.logging.Level;
import java.util.logging.Logger;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.zip.ZipEntry;
import java.util.zip.ZipFile;
import java.util.zip.ZipOutputStream;
import org.apache.commons.io.FileUtils;
import org.apache.commons.io.IOUtils;
import org.omegat.filters2.AbstractFilter;
import org.omegat.filters2.FilterContext;
import org.omegat.filters2.Instance;
import org.omegat.filters2.TranslationException;
import org.omegat.util.Log;
import org.omegat.util.OStrings;
/**
* Filter for Open XML file format.
*
* @author Maxym Mykhalchuk
* @author Didier Briel
*/
public class OpenXMLFilter extends AbstractFilter {
private static final Logger LOGGER = Logger.getLogger(OpenXMLFilter.class.getName());
private String DOCUMENTS;
private Pattern TRANSLATABLE;
private static final Pattern DIGITS = Pattern.compile("(\\d+)\\.xml");
/**
* Defines the documents to read according to options
*/
private void defineDOCUMENTSOptions(Map<String, String> config) {
/*
Complete string when all options are enabled
Word
"(document\\.xml)|(comments\\.xml)|(footnotes\\.xml)|(endnotes\\.xml)|(header\\d+\\.xml)
|(footer\\d+\\.xml)|(core\\.xml)"
Excel
"|(sharedStrings\\.xml)|(comments\\d+\\.xml)"
PowerPoint
"|(slide\\d+\\.xml)|(slideMaster\\d+\\.xml)| (slideLayout\\d+\\.xml)|(notesSlide\\d+\\.xml)"
Global
"|(data\\d+\\.xml)|(chart\\d+\\.xml)|(drawing\\d+\\.xml)"
Excel
"|(workbook\\.xml)"
Visio
"|(page\\d+\\.xml)
*/
StringBuilder sb = new StringBuilder("(document\\.xml)");
OpenXMLOptions options = new OpenXMLOptions(config);
if (options.getTranslateComments()) {
sb.append("|(comments\\.xml)");
}
if (options.getTranslateFootnotes()) {
sb.append("|(footnotes\\.xml)");
}
if (options.getTranslateEndnotes()) {
sb.append("|(endnotes\\.xml)");
}
if (options.getTranslateHeaders()) {
sb.append("|(header\\d+\\.xml)");
}
if (options.getTranslateFooters()) {
sb.append("|(footer\\d+\\.xml)");
}
if (options.getTranslateDocumentProperties()) {
sb.append("|(core\\.xml)");
}
sb.append("|(sharedStrings\\.xml)");
if (options.getTranslateExcelComments()) {
sb.append("|(comments\\d+\\.xml)");
}
sb.append("|(slide\\d+\\.xml)");
if (options.getTranslateSlideMasters()) {
sb.append("|(slideMaster\\d+\\.xml)");
}
if (options.getTranslateSlideLayouts()) {
sb.append("|(slideLayout\\d+\\.xml)");
}
if (options.getTranslateSlideComments()) {
sb.append("|(notesSlide\\d+\\.xml)");
}
if (options.getTranslateDiagrams()) {
sb.append("|(data\\d+\\.xml)");
}
if (options.getTranslateCharts()) {
sb.append("|(chart\\d+\\.xml)");
}
if (options.getTranslateDrawings()) {
sb.append("|(drawing\\d+\\.xml)");
}
if (options.getTranslateSheetNames()) {
sb.append("|(workbook\\.xml)");
}
if (options.getTranslateLinks()) {
sb.append("|(\\w+\\d*\\.xml\\.rels)");
}
sb.append("|(page\\d+\\.xml)");
DOCUMENTS = sb.toString();
TRANSLATABLE = Pattern.compile(DOCUMENTS);
}
@Override
protected boolean requirePrevNextFields() {
return true;
}
/** Returns true if it's an Open XML file. */
@Override
public boolean isFileSupported(File inFile, Map<String, String> config, FilterContext fc) {
defineDOCUMENTSOptions(config); // Define the documents to read
try (ZipFile file = new ZipFile(inFile)) {
Enumeration<? extends ZipEntry> entries = file.entries();
while (entries.hasMoreElements()) {
ZipEntry entry = entries.nextElement();
String shortname = entry.getName();
shortname = removePath(shortname);
Matcher filematch = TRANSLATABLE.matcher(shortname);
if (filematch.matches()) {
return true;
}
}
} catch (IOException e) {
LOGGER.log(Level.FINE, e.getLocalizedMessage(), e);
}
return false;
}
private OpenXMLXMLFilter createXMLFilter() {
OpenXMLXMLFilter xmlfilter = new OpenXMLXMLFilter();
xmlfilter.setCallbacks(entryParseCallback, entryTranslateCallback);
// Defining the actual dialect, because at this step
// we have the options
OpenXMLDialect dialect = (OpenXMLDialect) xmlfilter.getDialect();
dialect.defineDialect(new OpenXMLOptions(processOptions));
return xmlfilter;
}
/** Returns a temporary file for Open XML. A nasty hack, to say polite way. */
private static File tmp() throws IOException {
return File.createTempFile("o-xml-temp", ".xml");
}
/**
* @param fileName A filename with a path
* @return A string without the path
*/
private static String removePath(String fileName) {
if (fileName.lastIndexOf('/') >= 0) {
fileName = fileName.substring(fileName.lastIndexOf('/') + 1);
} else if (fileName.lastIndexOf('\\') >= 0) { // Some weird files may use a backslash
fileName = fileName.substring(fileName.lastIndexOf('\\') + 1);
}
return fileName;
}
/**
* @param fileName A filename
* @return The filename without an .xml extension if found in it
*/
private static String removeXML(String fileName) {
if (fileName.endsWith(".xml"))
fileName = fileName.substring(0, fileName.lastIndexOf(".xml"));
return fileName;
}
/**
* Processes a single OpenXML file, which is actually a ZIP file consisting of many XML files, some of
* which should be translated.
*/
@Override
public void processFile(File inFile, File outFile, FilterContext fc) throws IOException,
TranslationException {
defineDOCUMENTSOptions(processOptions); // Define the documents to read
ZipOutputStream zipout = null;
try (ZipFile zipfile = new ZipFile(inFile)) {
if (outFile != null) {
zipout = new ZipOutputStream(new FileOutputStream(outFile));
}
Enumeration<? extends ZipEntry> unsortedZipcontents = zipfile.entries();
List<? extends ZipEntry> filelist = Collections.list(unsortedZipcontents);
// Sort filenames, because zipfile.entries give a random order
// We use a simplified natural sort, to have slide1, slide2 ...
// slide10
// instead of slide1, slide10, slide 2
// We also order files arbitrarily, to have, for instance
// documents.xml before comments.xml
Collections.sort(filelist, this::compareZipEntries);
for (ZipEntry zipentry : filelist) {
String shortname = removePath(zipentry.getName());
if (TRANSLATABLE.matcher(shortname).matches()) {
File tmpin = tmp();
FileUtils.copyInputStreamToFile(zipfile.getInputStream(zipentry), tmpin);
File tmpout = null;
if (zipout != null) {
tmpout = tmp();
}
try {
createXMLFilter().processFile(tmpin, tmpout, fc);
} catch (Exception e) {
LOGGER.log(Level.SEVERE, e.getLocalizedMessage(), e);
throw new TranslationException(e.getLocalizedMessage() + "\n"
+ OStrings.getString("OpenXML_ERROR_IN_FILE") + inFile, e);
}
if (zipout != null) {
ZipEntry outEntry = new ZipEntry(zipentry.getName());
zipout.putNextEntry(outEntry);
FileUtils.copyFile(tmpout, zipout);
zipout.closeEntry();
}
if (!tmpin.delete()) {
tmpin.deleteOnExit();
}
if (tmpout != null && !tmpout.delete()) {
tmpout.deleteOnExit();
}
} else {
if (zipout != null) {
ZipEntry outEntry = new ZipEntry(zipentry.getName());
zipout.putNextEntry(outEntry);
try (InputStream is = zipfile.getInputStream(zipentry)) {
IOUtils.copy(is, zipout);
}
zipout.closeEntry();
}
}
}
} finally {
if (zipout != null) {
zipout.close();
}
}
}
public int compareZipEntries(ZipEntry z1, ZipEntry z2) {
String s1 = z1.getName();
String s2 = z2.getName();
String[] words1 = s1.split("\\d+\\.");
String[] words2 = s2.split("\\d+\\.");
// Digits at the end and same text
if ((words1.length > 1 && words2.length > 1) && // Digits
(words1[0].equals(words2[0]))) { // Same text
int number1 = 0;
int number2 = 0;
Matcher getDigits = DIGITS.matcher(s1);
if (getDigits.find()) {
number1 = Integer.parseInt(getDigits.group(1));
}
getDigits = DIGITS.matcher(s2);
if (getDigits.find()) {
number2 = Integer.parseInt(getDigits.group(1));
}
if (number1 > number2) {
return 1;
} else if (number1 < number2) {
return -1;
} else {
return 0;
}
} else {
String shortname1 = removePath(words1[0]);
shortname1 = removeXML(shortname1);
String shortname2 = removePath(words2[0]);
shortname2 = removeXML(shortname2);
// Specific case for Excel
// because "comments" is present twice in DOCUMENTS
if (shortname1.indexOf("sharedStrings") >= 0 || shortname2.indexOf("sharedStrings") >= 0) {
if (shortname2.indexOf("sharedStrings") >= 0) {
return 1; // sharedStrings must be first
} else {
return -1;
}
}
int index1 = DOCUMENTS.indexOf(shortname1);
int index2 = DOCUMENTS.indexOf(shortname2);
if (index1 > index2) {
return 1;
} else if (index1 < index2) {
return -1;
} else { // Documents were not in DOCUMENTS, we keep the normal order
return s1.compareTo(s2);
}
}
}
/** Human-readable Open XML filter name. */
@Override
public String getFileFormatName() {
return OStrings.getString("OpenXML_FILTER_NAME");
}
/** Extensions... */
@Override
public Instance[] getDefaultInstances() {
return new Instance[] {
new Instance("*.doc?"),
new Instance("*.dotx"),
new Instance("*.xls?"),
new Instance("*.ppt?"),
new Instance("*.vsdx")
};
}
/** Source encoding cannot be varied by the user. */
@Override
public boolean isSourceEncodingVariable() {
return false;
}
/** Target encoding cannot be varied by the user. */
@Override
public boolean isTargetEncodingVariable() {
return false;
}
/** Not implemented. */
@Override
protected void processFile(BufferedReader inFile, BufferedWriter outFile, FilterContext fc) throws IOException,
TranslationException {
throw new IOException("Not Implemented!");
}
/**
* Returns true to indicate that the OpenXML filter has options.
*
* @return True, because the OpenXML filter has options.
*/
@Override
public boolean hasOptions() {
return true;
}
/**
* OpenXML Filter shows a <b>modal</b> dialog to edit its own options.
*
* @param currentOptions
* Current options to edit.
* @return Updated filter options if user confirmed the changes, and current options otherwise.
*/
@Override
public Map<String, String> changeOptions(Window parent, Map<String, String> currentOptions) {
try {
EditOpenXMLOptionsDialog dialog = new EditOpenXMLOptionsDialog(parent, currentOptions);
dialog.setVisible(true);
if (EditOpenXMLOptionsDialog.RET_OK == dialog.getReturnStatus())
return dialog.getOptions().getOptionsMap();
else
return null;
} catch (Exception e) {
Log.logErrorRB("HTML_EXC_EDIT_OPTIONS");
Log.log(e);
return null;
}
}
@Override
public String getInEncodingLastParsedFile() {
// Encoding is 'binary', it is zipped. Inside there may be many files.
// It makes no sense to display the encoding of some xml file inside.
return "OpenXML";
}
}