/* * ARX: Powerful Data Anonymization * Copyright 2014 - 2015 Karol Babioch, Fabian Prasser, Florian Kohlmayer * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.deidentifier.arx.io; import org.apache.commons.io.FilenameUtils; import org.apache.poi.ss.usermodel.Cell; import org.apache.poi.ss.usermodel.Row; /** * Configuration describing an Excel file * * This is used to describe Excel files. Both file types (XLS and XLSX) are * supported. The file type can either be detected automatically by the file * extension, or alternatively can be set manually. Furthermore there is a sheet * index {@link #sheetIndex}, which describes which sheet within the file should * be used. * * @author Karol Babioch * @author Fabian Prasser */ public class ImportConfigurationExcel extends ImportConfigurationFile implements IImportConfigurationWithHeader { /** * Valid file types for Excel files * * XLS is the "old" Excel file type, XLSX is the "new" Excel file type. */ public enum ExcelFileTypes { /** TODO */ XLS, /** TODO */ XLSX }; /** * Used file type * * This is the actual filetype that will be used. * * @see {@link #setExcelFileType(ExcelFileTypes excelFileType)} */ private ExcelFileTypes excelFileType; /** Sheet index. */ private int sheetIndex; /** * Indicates whether first row contains header (names of columns). * * @see {@link IImportConfigurationWithHeader} */ private boolean containsHeader; /** * Creates a new instance of this object. * * @param fileLocation {@link #setFileLocation(String)} * @param excelFileType {@link #setExcelFileType(ExcelFileTypes)} * @param sheetIndex {@link #setSheetIndex(int)} * @param containsHeader {@link #setContainsHeader(boolean)} */ public ImportConfigurationExcel(String fileLocation, ExcelFileTypes excelFileType, int sheetIndex, boolean containsHeader) { setFileLocation(fileLocation); setExcelFileType(excelFileType); setSheetIndex(sheetIndex); setContainsHeader(containsHeader); } /** * Creates a new instance of this object without specifying the file type * * The file type will be detected automatically using the file extension. By * default "xlsx" is assumed. In case the file extension is "xls" the file * type will be set to {@link ExcelFileTypes#XLS}. * * @param fileLocation * {@link #setFileLocation(String)} * @param sheetIndex * {@link #sheetIndex} * @param containsHeader * {@link #containsHeader} */ public ImportConfigurationExcel(String fileLocation, int sheetIndex, boolean containsHeader) { ExcelFileTypes excelFileType; String ext = FilenameUtils.getExtension(fileLocation); switch (ext) { case "xls": excelFileType = ExcelFileTypes.XLS; break; default: excelFileType = ExcelFileTypes.XLSX; break; } setFileLocation(fileLocation); setSheetIndex(sheetIndex); setContainsHeader(containsHeader); setExcelFileType(excelFileType); } /** * Adds a single column to import from * * This makes sure that only {@link ImportColumnExcel} can be added, * otherwise an {@link IllegalArgumentException} will be thrown. * * @param column * A single column to import from, {@link ImportColumnExcel} */ @Override public void addColumn(ImportColumn column) { if (!(column instanceof ImportColumnExcel)) { throw new IllegalArgumentException("Column needs to be of type ExcelColumn"); } if (!((ImportColumnExcel) column).isIndexSpecified() && !this.getContainsHeader()){ final String ERROR = "Adressing columns by name is only possible if the source contains a header"; throw new IllegalArgumentException(ERROR); } for (ImportColumn c : columns) { if (((ImportColumnExcel) column).isIndexSpecified() && ((ImportColumnExcel) column).getIndex() == ((ImportColumnExcel) c).getIndex()) { throw new IllegalArgumentException("Column for this index already assigned"); } if (!((ImportColumnExcel) column).isIndexSpecified() && ((ImportColumnExcel) column).getName().equals(((ImportColumnExcel) c).getName())) { throw new IllegalArgumentException("Column for this name already assigned"); } if (column.getAliasName() != null && c.getAliasName() != null && c.getAliasName().equals(column.getAliasName())) { throw new IllegalArgumentException("Column names need to be unique"); } } this.columns.add(column); } /** * @return {@link #containsHeader} */ @Override public boolean getContainsHeader() { return containsHeader; } /** * @return {@link #ExcelFileTypes} */ public ExcelFileTypes getExcelFileType() { return excelFileType; } /** * @return {@link #sheetIndex} */ public int getSheetIndex() { return sheetIndex; } /** * Sets the indexes based on the header. * * @param row */ public void prepare(Row row) { for (ImportColumn c : super.getColumns()) { ImportColumnExcel column = (ImportColumnExcel) c; if (!column.isIndexSpecified()) { boolean found = false; for (int i = 0; i < row.getPhysicalNumberOfCells(); i++) { row.getCell(i).setCellType(Cell.CELL_TYPE_STRING); if (row.getCell(i).getStringCellValue().equals(column.getName())) { found = true; column.setIndex(i); } } if (!found) { throw new IllegalArgumentException("Index for column '" + column.getName() + "' couldn't be found"); } } } } /** * @param containsHeader * {@link #containsHeader} */ @Override public void setContainsHeader(boolean containsHeader) { this.containsHeader = containsHeader; } /** * @param excelFileType * {@link #ExcelFileTypes} */ public void setExcelFileType(ExcelFileTypes excelFileType) { this.excelFileType = excelFileType; } /** * @param sheetIndex * {@link #sheetIndex} */ public void setSheetIndex(int sheetIndex) { this.sheetIndex = sheetIndex; } }