/* * JBoss, Home of Professional Open Source. * See the COPYRIGHT.txt file distributed with this work for information * regarding copyright ownership. Some portions may be licensed * to Red Hat, Inc. under one or more contributor license agreements. * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA * 02110-1301 USA. */ package org.teiid.translator.excel; import java.io.File; import java.io.FileInputStream; import java.io.IOException; import java.util.Arrays; import java.util.concurrent.atomic.AtomicInteger; import javax.resource.ResourceException; import org.apache.poi.hssf.usermodel.HSSFWorkbook; import org.apache.poi.ss.usermodel.Cell; import org.apache.poi.ss.usermodel.Row; import org.apache.poi.ss.usermodel.Sheet; import org.apache.poi.ss.usermodel.Workbook; import org.apache.poi.xssf.usermodel.XSSFWorkbook; import org.teiid.logging.LogConstants; import org.teiid.logging.LogManager; import org.teiid.metadata.Column; import org.teiid.metadata.Column.SearchType; import org.teiid.metadata.ExtensionMetadataProperty; import org.teiid.metadata.MetadataFactory; import org.teiid.metadata.Table; import org.teiid.translator.FileConnection; import org.teiid.translator.MetadataProcessor; import org.teiid.translator.TranslatorException; import org.teiid.translator.TranslatorProperty; import org.teiid.translator.TranslatorProperty.PropertyType; import org.teiid.translator.TypeFacility; public class ExcelMetadataProcessor implements MetadataProcessor<FileConnection> { @ExtensionMetadataProperty(applicable=Table.class, datatype=String.class, display="Excel File Name", description="Excel File name, use file name pattern if one than one file in the parent directory", required=true) public static final String FILE = MetadataFactory.EXCEL_URI+"FILE"; //$NON-NLS-1$ @ExtensionMetadataProperty(applicable=Column.class, datatype=Integer.class, display="Cell Number", description="Cell number, where the column information is defined. If column name is ROW_ID, define it as -1", required=true) public static final String CELL_NUMBER = MetadataFactory.EXCEL_URI+"CELL_NUMBER"; //$NON-NLS-1$ @ExtensionMetadataProperty(applicable=Table.class, datatype=Integer.class, display="First Data Number", description="First Row Number, where data rows start") public static final String FIRST_DATA_ROW_NUMBER = MetadataFactory.EXCEL_URI+"FIRST_DATA_ROW_NUMBER"; //$NON-NLS-1$ public static final String ROW_ID = "ROW_ID"; //$NON-NLS-1$ private String excelFileName; private boolean ignoreEmptyCells = false; private int headerRowNumber = 0; private boolean hasHeader = false; private int dataRowNumber = 0; private boolean hasDataRowNumber = false; public void process(MetadataFactory mf, FileConnection conn) throws TranslatorException { if (this.excelFileName == null) { throw new TranslatorException(ExcelPlugin.Event.TEIID23004, ExcelPlugin.Util.gs(ExcelPlugin.Event.TEIID23004, "importer.ExcelFileName")); //$NON-NLS-1$ } try { File xlsFile = conn.getFile(this.excelFileName); if (xlsFile.isDirectory()) { File[] files = xlsFile.listFiles(); if (files.length > 0) { xlsFile = files[0]; } } if (xlsFile.isDirectory() || !xlsFile.exists()) { throw new TranslatorException(ExcelPlugin.Event.TEIID23005, ExcelPlugin.Util.gs(ExcelPlugin.Event.TEIID23005, xlsFile.getName())); } String extension = getFileExtension(xlsFile); FileInputStream xlsFileStream = new FileInputStream(xlsFile); try { Workbook workbook = null; if (extension.equalsIgnoreCase("xls")) { //$NON-NLS-1$ workbook = new HSSFWorkbook(xlsFileStream); } else if (extension.equalsIgnoreCase("xlsx")) { //$NON-NLS-1$ workbook = new XSSFWorkbook(xlsFileStream); } int sheetCount = workbook.getNumberOfSheets(); for (int i = 0; i < sheetCount; i++) { Sheet sheet = workbook.getSheetAt(i); addTable(mf, sheet, xlsFile.getName(), this.excelFileName); } } finally { xlsFileStream.close(); } } catch (ResourceException e) { throw new TranslatorException(e); } catch (IOException e) { throw new TranslatorException(e); } } private void addTable(MetadataFactory mf, Sheet sheet, String xlsName, String originalName) { int firstRowNumber = sheet.getFirstRowNum(); Row headerRow = null; int firstCellNumber = -1; if (this.hasHeader) { headerRow = sheet.getRow(this.headerRowNumber); if (headerRow != null) { firstRowNumber = this.headerRowNumber; firstCellNumber = headerRow.getFirstCellNum(); if (firstCellNumber == -1) { LogManager.logInfo(LogConstants.CTX_CONNECTOR, ExcelPlugin.Util.gs(ExcelPlugin.Event.TEIID23006, xlsName)); return; } } } if (headerRow == null) { while (firstCellNumber == -1) { headerRow = sheet.getRow(firstRowNumber++); // check if this is a empty sheet; the data must be present in first 10000 rows if (headerRow == null && firstRowNumber > 10000) { return; } if (headerRow == null) { continue; } firstCellNumber = headerRow.getFirstCellNum(); } } // create a table for each sheet AtomicInteger columnCount = new AtomicInteger(); Table table = mf.addTable(sheet.getSheetName()); table.setNameInSource(sheet.getSheetName()); table.setProperty(ExcelMetadataProcessor.FILE, originalName); // add implicit row_id column based on row number from excel sheet Column column = mf.addColumn(ROW_ID, TypeFacility.RUNTIME_NAMES.INTEGER, table); column.setSearchType(SearchType.All_Except_Like); column.setProperty(CELL_NUMBER, ROW_ID); mf.addPrimaryKey("PK0", Arrays.asList(ROW_ID), table); //$NON-NLS-1$ column.setUpdatable(false); Row dataRow = null; int lastCellNumber = headerRow.getLastCellNum(); // if getIgnoreEmptyHeaderCells() is false and we have a header row // then only count cells that have a non-empty value. if (this.hasHeader && !getIgnoreEmptyHeaderCells()) { int cellCounter = 0; for (int i = firstCellNumber; i < lastCellNumber; i++) { Cell headerCell = headerRow.getCell(i); if (isCellEmpty(headerCell)) { // found a cell with no column name that will be the last cell. break; } cellCounter++; } lastCellNumber = cellCounter + firstCellNumber; } if (this.hasDataRowNumber) { // adjust for zero index table.setProperty(ExcelMetadataProcessor.FIRST_DATA_ROW_NUMBER, String.valueOf(this.dataRowNumber+1)); dataRow = sheet.getRow(this.dataRowNumber); } else if (this.hasHeader) { // +1 zero based, +1 to skip header table.setProperty(ExcelMetadataProcessor.FIRST_DATA_ROW_NUMBER, String.valueOf(firstRowNumber+2)); dataRow = sheet.getRow(firstRowNumber+1); } else { //+1 already occurred because of the increment above table.setProperty(ExcelMetadataProcessor.FIRST_DATA_ROW_NUMBER, String.valueOf(firstRowNumber)); dataRow = sheet.getRow(firstRowNumber); } if (firstCellNumber != -1) { for (int j = firstCellNumber; j < lastCellNumber; j++) { Cell headerCell = headerRow.getCell(j); // if the config is set to ignore empty header cells then validate the header // cell has a value, if not move on to the next column in the sheet. if (this.hasHeader && getIgnoreEmptyHeaderCells() && isCellEmpty(headerCell)) { continue; } Cell dataCell = dataRow.getCell(j); // if the cell value is null; then advance the data row cursor to to find it if (dataCell == null) { for (int rowNo = firstRowNumber+1; rowNo < firstRowNumber+10000; rowNo++) { Row row = sheet.getRow(rowNo); dataCell = row.getCell(j); if (dataCell != null) { break; } } } column = mf.addColumn(cellName(headerCell, columnCount), cellType(headerCell, dataCell), table); column.setSearchType(SearchType.Unsearchable); column.setProperty(ExcelMetadataProcessor.CELL_NUMBER, String.valueOf(j+1)); } } } private boolean isCellEmpty(Cell headerCell) { if (headerCell == null) return true; String name = headerCell.getStringCellValue(); return (name == null || name.isEmpty()); } private String cellType(Cell headerCell, Cell dataCell) { if (this.hasHeader) { return getCellType(dataCell); } return getCellType(headerCell); } private String cellName(Cell headerCell, AtomicInteger count) { if (this.hasHeader) { return headerCell.getStringCellValue(); } return "column"+count.incrementAndGet(); //$NON-NLS-1$ } public void setExcelFileName(String fileName) { this.excelFileName = fileName; } static String getFileExtension(File xlsFile) { int idx = xlsFile.getName().lastIndexOf('.'); String extension = "xls"; //$NON-NLS-1$ if (idx > 0) { extension = xlsFile.getName().substring(idx+1); } return extension; } private String getCellType(Cell cell) { if (cell == null) { return TypeFacility.RUNTIME_NAMES.STRING; } switch (cell.getCellType()) { case Cell.CELL_TYPE_STRING: return TypeFacility.RUNTIME_NAMES.STRING; case Cell.CELL_TYPE_BOOLEAN: return TypeFacility.RUNTIME_NAMES.BOOLEAN; default: return TypeFacility.RUNTIME_NAMES.DOUBLE; } } @TranslatorProperty(display="Header Row Number", category=PropertyType.IMPORT, description="Row number that contains the header information") public int getHeaderRowNumber() { return headerRowNumber; } public void setHeaderRowNumber(int headerRowNumber) { //adjust for zero index this.hasHeader = true; this.headerRowNumber = headerRowNumber-1; if (this.headerRowNumber < 0) { this.headerRowNumber = 0; } } @TranslatorProperty(display = "Ignore Empty Header Cells", category = TranslatorProperty.PropertyType.IMPORT, description = "When true any cells with empty value for header row are ignored, otherwise an empty header row cell indicates end of columns.") public boolean getIgnoreEmptyHeaderCells() { return ignoreEmptyCells; } public void setIgnoreEmptyHeaderCells(boolean ignoreEmpty) { ignoreEmptyCells = ignoreEmpty; } @TranslatorProperty(display = "Data Row Number", category = PropertyType.IMPORT, description = "Row number from which data rows start from") public int getDataRowNumber() { return dataRowNumber; } public void setDataRowNumber(int dataRowNumber) { //adjust for zero index this.hasDataRowNumber = true; this.dataRowNumber = dataRowNumber-1; if (this.dataRowNumber < 0) { this.dataRowNumber = 0; } } @TranslatorProperty(display="Excel File", category=PropertyType.IMPORT, description="Name of the Excel file to read metadata from", required=true) public String getExcelFileName() { return excelFileName; } }