package org.springframework.batch.item.file;
import org.apache.poi.POIXMLDocument;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.ss.usermodel.Workbook;
import org.apache.poi.ss.usermodel.WorkbookFactory;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.batch.item.ParseException;
import org.springframework.batch.item.ReaderNotOpenException;
import org.springframework.batch.item.resource.IndexedResourceAwareItemStreamReader;
import org.springframework.batch.item.resource.ResourceAwareItemStreamReader;
import org.springframework.batch.item.support.AbstractItemCountingItemStreamItemReader;
import org.springframework.beans.factory.InitializingBean;
import org.springframework.core.io.Resource;
import org.springframework.util.Assert;
import java.io.*;
/**
* <p>
* This {@link AbstractItemCountingItemStreamItemReader} is capable of reading one sheet of one Excel file,
* row per row, and mapping each row to a Java bean via a {@link RowMapper}.
* This implementation make usage of Apache POI core framework to read either OLE2 or OOXML format (detection is
* based on file content and not on file extension).
* </p>
* <p>
* If you keep default settings, you only need to specify a
* {@link org.springframework.core.io.Resource} and a {@link RowMapper}.</p>
* <p>
* Note that the restartability of this reader is only based on
* {@link AbstractItemCountingItemStreamItemReader}.
* </p>
*
* @param <T> return type of a {@link #read()}
* @author Almex
* @see #setRowsToSkip(int)
* @see #setSheetIndex(int)
* @implSpec This implementation is not thread-safe
* @since 1.1
*/
public class ExcelSheetItemReader<T> extends AbstractItemCountingItemStreamItemReader<T> implements
IndexedResourceAwareItemStreamReader<T>,
InitializingBean,
ResourceAwareItemReaderItemStream<T>,
ResourceAwareItemStreamReader<T> {
private static Logger LOGGER = LoggerFactory.getLogger(ExcelSheetItemReader.class);
private RowMapper<? extends T> rowMapper;
private Resource resource;
private Workbook workbook;
private boolean noInput = false;
private int rowsToSkip = 0;
private int sheetIndex = 0;
/**
* Validate if the {@link java.io.File} is of a supported format (i.e.: OLE2 or OOXML).
*
* @param file targeting the Excel workbook to validate.
* @return <code>true</code> if the format is supported, <code>false</code> otherwise.
* @throws IOException if an I/O error occurs.
*/
private static boolean isValidExcelFile(File file) throws IOException {
try (InputStream inputStream = new PushbackInputStream(new FileInputStream(file), 8)) {
return POIFSFileSystem.hasPOIFSHeader(inputStream) || POIXMLDocument.hasOOXMLHeader(inputStream);
}
}
/**
* Checking that the returned row is not null should validate if we have not reached the End Of File.
*
* @param row the {@link Row} from which we check the reference
* @return {@code true} if the reference to the {@code row} is {@code null}, {@code false} otherwise.
*/
private static boolean isEof(final Row row) {
return row == null;
}
@Override
public void afterPropertiesSet() throws Exception {
Assert.notNull(rowMapper, "rowMapper is required");
}
@Override
protected T doRead() throws Exception {
T result;
Row row = readRow();
if (!isEof(row)) {
try {
result = rowMapper.mapRow(row, getCurrentIndex());
} catch (Exception ex) {
throw new ParseException("Parsing error at line: " + getCurrentIndex() + " in resource=["
+ resource.getDescription() + "]", ex);
}
} else {
noInput = true;
//-- In order to full-fil ItemReader contract we must return null
result = null;
}
return result;
}
/**
* @return next line (skipping the number of row specified by {@link #setRowsToSkip(int)}.
*/
private Row readRow() {
Row result;
if (workbook == null) {
throw new ReaderNotOpenException("Reader must be open before it can be read.");
}
result = workbook.getSheetAt(sheetIndex).getRow(getCurrentIndex());
if (result == null) {
noInput = true;
}
return result;
}
@Override
protected void doOpen() throws Exception {
Assert.notNull(resource, "Input resource must be set");
noInput = true;
if (!resource.exists()) {
LOGGER.warn("Input resource does not exist {}", resource.getDescription());
return;
}
if (!isValidExcelFile(resource.getFile())) {
LOGGER.warn("Input resource is neither an OLE2 file, nor an OOXML file {}", resource.getDescription());
return;
}
/**
* ATTENTION: if we use the resource.getFileInputStream() the stream is never released!
* So, we create our own FileInputStream instead. Don't know why. Seems like a bug in Apache POI
*/
try (InputStream inputStream = new FileInputStream(resource.getFile())) {
this.workbook = WorkbookFactory.create(inputStream);
} finally {
if (workbook != null) {
workbook.close();
}
}
noInput = false;
jumpToItem(0);
}
@Override
protected void doClose() throws Exception {
this.noInput = true;
}
@Override
public T read() throws Exception {
T result = null;
if (!noInput) {
result = super.read();
if (result == null) {
noInput = true;
}
}
return result;
}
@Override
public int getCurrentIndex() {
int result;
if (isEof()) {
result = -1;
} else {
// it's a zero-based index ()
result = getCurrentItemCount() + rowsToSkip - 1;
}
return result;
}
/**
* @return true if we have reached the end of file false otherwise
*/
public boolean isEof() {
return noInput;
}
@Override
public void setResource(Resource resource) {
this.resource = resource;
}
/**
* Set the {@link RowMapper} used to map a {@link org.apache.poi.ss.usermodel.Row}
* to your <code>T</code> type.
*
* @param rowMapper an implementation of {@link RowMapper}
*/
public void setRowMapper(RowMapper<? extends T> rowMapper) {
this.rowMapper = rowMapper;
}
/**
* By default this value is initialized to 0.
*
* @param rowsToSkip set the number of the first row to read -1.
*/
public void setRowsToSkip(int rowsToSkip) {
this.rowsToSkip = rowsToSkip;
}
/**
* By default this value is initialized to 0.
*
* @param sheetIndex set the zero-indexed based sheet.
*/
public void setSheetIndex(int sheetIndex) {
this.sheetIndex = sheetIndex;
}
}