/**
* Copyright (C) 2001-2017 by RapidMiner and the contributors
*
* Complete list of developers available at our web site:
*
* http://rapidminer.com
*
* This program is free software: you can redistribute it and/or modify it under the terms of the
* GNU Affero General Public License as published by the Free Software Foundation, either version 3
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without
* even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License along with this program.
* If not, see http://www.gnu.org/licenses/.
*/
package com.rapidminer.operator.nio.model.xlsx;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.TreeMap;
import java.util.zip.ZipEntry;
import java.util.zip.ZipFile;
import org.xml.sax.Attributes;
import org.xml.sax.SAXException;
import com.rapidminer.operator.Operator;
import com.rapidminer.operator.UserError;
import com.rapidminer.operator.nio.model.xlsx.XlsxWorkbookParser.XlsxWorkbookSheet;
import com.rapidminer.operator.nio.model.xlsx.XlsxWorkbookRelationParser.XlsxWorkbookRel;
/**
* SAX parser for XLSX Workbook relations.
*
* This parser extracts paths of files, which are included inside a XLSX archive.
*
* @author Adrian Wilke, Nils Woehler
* @since 6.3.0
*/
public class XlsxWorkbookRelationParser extends AbstractXlsxSAXHandler<XlsxWorkbookRel> {
/** Container for XLSX workbook relations. */
static final class XlsxWorkbookRel {
/** Path of the shared strings XML file */
public String sharedStringsPath;
/** Path to the styles XML file */
public String stylesPath;
/** Mapping of Relationship IDs and worksheet files */
public String worksheetsPath;
}
/** Path of the embedded workbook relation file */
private static final String FILE_WORKBOOK_REL = "xl/_rels/workbook.xml.rels";
private static final String ATT_RELATIONSHIP_ID = "Id";
private static final String ATT_RELATIONSHIP_TARGET = "Target";
private static final String ATT_RELATIONSHIP_TYPE = "Type";
private static final String TAG_RELATIONSHIP = "Relationship";
/**
* Possible type declarations of the shared strings file.
*
* @see Apache POI project org.apache.poi.xssf.usermodel.XSSFRelation.java
*/
private static final Set<String> TYPES_SHARED_STRINGS = new HashSet<>();
static {
TYPES_SHARED_STRINGS.add("application/vnd.openxmlformats-officedocument.spreadsheetml.sharedStrings+xml");
TYPES_SHARED_STRINGS.add("http://schemas.openxmlformats.org/officeDocument/2006/relationships/sharedStrings");
TYPES_SHARED_STRINGS.add("/xl/sharedStrings.xml");
}
/**
* Possible type declarations of styles files.
*
* @see Apache POI project org.apache.poi.xssf.usermodel.XSSFRelation.java
*/
public static final Set<String> TYPES_STYLES = new HashSet<>();
static {
TYPES_STYLES.add("application/vnd.openxmlformats-officedocument.spreadsheetml.styles+xml");
TYPES_STYLES.add("http://schemas.openxmlformats.org/officeDocument/2006/relationships/styles");
TYPES_STYLES.add("/xl/styles.xml");
}
/**
* Possible type declarations of worksheet files.
*
* @see Apache POI project org.apache.poi.xssf.usermodel.XSSFRelation.java
*/
private static final Set<String> TYPES_WORKSHEET = new HashSet<>();
static {
TYPES_WORKSHEET.add("application/vnd.openxmlformats-officedocument.spreadsheetml.worksheet+xml");
TYPES_WORKSHEET.add("http://schemas.openxmlformats.org/officeDocument/2006/relationships/worksheet");
TYPES_WORKSHEET.add("/xl/worksheets/sheet#.xml");
}
/** The container to access results */
private final XlsxWorkbookRel xlsxWorkbookRel = new XlsxWorkbookRel();
/** Map to store all parsed worksheet paths */
private final Map<String, String> worksheetsPaths = new HashMap<>();
/** The 0-based sheet index. */
private final int sheetIndex;
/** The list of parsed workbook sheets */
private final List<XlsxWorkbookSheet> xlsxWorkbookSheets;
/** The calling operator */
private final Operator callingOperator;
/** The XLSX zip file */
private final ZipFile zipFile;
public XlsxWorkbookRelationParser(Operator callingOperator, ZipFile zipFile, List<XlsxWorkbookSheet> xlsxWorkbookSheets,
int sheetIndex) {
this.callingOperator = callingOperator;
this.zipFile = zipFile;
this.xlsxWorkbookSheets = xlsxWorkbookSheets;
this.sheetIndex = sheetIndex;
}
/**
* Gets the result of the parsing process.
*
* @return A container object
*/
@Override
public XlsxWorkbookRel getResult() throws UserError {
if (sheetIndex >= xlsxWorkbookSheets.size()) {
throw new UserError(callingOperator, 953, sheetIndex + 1);
}
// Get name of zip entry by using the sheet relationship ID
String worksheetPath = worksheetsPaths.get(xlsxWorkbookSheets.get(sheetIndex).rId);
// Lookup zip entry to check if it exists
ZipEntry worksheetZipEntry = zipFile.getEntry(worksheetPath);
if (worksheetZipEntry == null) {
throw new UserError(callingOperator, "xlsx_file_missing_entry", worksheetPath);
}
// If found worksheet file was found, set worksheet path
xlsxWorkbookRel.worksheetsPath = worksheetPath;
return xlsxWorkbookRel;
}
/**
* Returns a map representation of the specified attributes.
*
* @param attributes
* The attributes which are parsed.
* @param useQualifiedNames
* If <code>true</code> prefixed attribute names are used, if <code>false</code>
* local names are used.
* @return A map with attribute names and the respective values.
*/
private Map<String, String> getAttributesMap(Attributes attributes, boolean useQualifiedNames) {
Map<String, String> map = new TreeMap<>();
for (int i = 0; i < attributes.getLength(); i++) {
if (useQualifiedNames) {
map.put(attributes.getQName(i), attributes.getValue(i));
} else {
map.put(attributes.getLocalName(i), attributes.getValue(i));
}
}
return map;
}
@Override
public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException {
if (qName.equals(TAG_RELATIONSHIP)) {
Map<String, String> attributesMap = getAttributesMap(attributes, true);
// Get the path of the shared strings
if (attributesMap.containsKey(ATT_RELATIONSHIP_TYPE)) {
String typeValue = attributesMap.get(ATT_RELATIONSHIP_TYPE);
// Check if relationship is shared strings
if (TYPES_SHARED_STRINGS.contains(typeValue)) {
xlsxWorkbookRel.sharedStringsPath = attributesMap.get(ATT_RELATIONSHIP_TARGET);
}
// Check if relationship is styles
if (TYPES_STYLES.contains(typeValue)) {
xlsxWorkbookRel.stylesPath = attributesMap.get(ATT_RELATIONSHIP_TARGET);
}
// Check if relationship is worksheet
if (TYPES_WORKSHEET.contains(typeValue)) {
worksheetsPaths.put(attributesMap.get(ATT_RELATIONSHIP_ID), XlsxUtilities.XLSX_PATH_PREFIX
+ attributesMap.get(ATT_RELATIONSHIP_TARGET));
}
} else {
throw new SAXException("Workbook relations entry malformed. XML attribute '" + ATT_RELATIONSHIP_TYPE
+ "' not found.");
}
}
}
@Override
protected String getZipEntryPath() {
return FILE_WORKBOOK_REL;
}
}