package charts.builder.spreadsheet;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.Iterator;
import java.util.List;
import java.util.NoSuchElementException;
import java.util.Set;
import org.apache.commons.io.FileUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.poi.hssf.util.HSSFColor;
import org.apache.poi.ss.formula.eval.ErrorEval;
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.CellValue;
import org.apache.poi.ss.usermodel.Color;
import org.apache.poi.ss.usermodel.FormulaError;
import org.apache.poi.ss.usermodel.FormulaEvaluator;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.ss.usermodel.Sheet;
import org.apache.poi.ss.usermodel.Workbook;
import org.apache.poi.ss.util.CellReference;
import org.apache.poi.xssf.usermodel.XSSFColor;
import org.apache.tika.io.IOUtils;
import play.Logger;
import charts.builder.DataSource;
import charts.builder.Value;
import charts.builder.spreadsheet.external.ResolvedRef;
import charts.builder.spreadsheet.external.SimpleCellLink;
import charts.builder.spreadsheet.external.UnresolvedRef;
import com.google.common.collect.Lists;
import com.google.common.collect.Sets;
public abstract class SpreadsheetDataSource implements DataSource {
private Workbook workbook;
private FormulaEvaluator evaluator;
private final int defaultSheet;
private class SpreadsheetCellValue implements Value {
private final Cell cell;
public SpreadsheetCellValue(Cell cell) {
this.cell = cell;
}
@Override
public String getValue() {
String result = "";
try {
CellValue cellValue = evaluator().evaluate(cell);
if (cellValue == null) {
return "";
}
switch (cellValue.getCellType()) {
case Cell.CELL_TYPE_BOOLEAN:
result = Boolean.toString(cellValue.getBooleanValue());
break;
case Cell.CELL_TYPE_NUMERIC:
double val = cellValue.getNumberValue();
result = Double.toString(val);
break;
case Cell.CELL_TYPE_STRING:
result = cellValue.getStringValue();
break;
case Cell.CELL_TYPE_BLANK:
result = "";
break;
case Cell.CELL_TYPE_ERROR:
result = ErrorEval.getText(cellValue.getErrorValue());
break;
// CELL_TYPE_FORMULA will never happen
case Cell.CELL_TYPE_FORMULA:
result = "#FORMULAR";
break;
default:
result = "#DEFAULT";
}
} catch(RuntimeException e) {
if(cell.getCellType() == Cell.CELL_TYPE_FORMULA) {
switch(cell.getCachedFormulaResultType()) {
case Cell.CELL_TYPE_NUMERIC:
double val = cell.getNumericCellValue();
result = Double.toString(val);
break;
case Cell.CELL_TYPE_ERROR:
FormulaError fe = FormulaError.forInt(cell.getErrorCellValue());
result = fe.getString();
break;
case Cell.CELL_TYPE_STRING:
result = cell.getStringCellValue();
break;
case Cell.CELL_TYPE_BOOLEAN:
result = Boolean.toString(cell.getBooleanCellValue());
break;
default:
result = "";
}
}
}
return result;
}
@Override
public String toString() {
return asString();
}
@Override
public String asString() {
return getValue();
}
@Override
public Double asDouble() {
String s = getValue();
try {
return new Double(s);
} catch (NumberFormatException e) {
return null;
}
}
@Override
public Integer asInteger() {
String s = getValue();
try {
return new Integer(Math.round(Float.parseFloat(s)));
} catch (NumberFormatException e) {
return null;
}
}
@Override
public java.awt.Color asColor() {
for(Color c : Lists.newArrayList(cell.getCellStyle().getFillForegroundColorColor(),
cell.getCellStyle().getFillBackgroundColorColor())) {
if (c instanceof HSSFColor && (((HSSFColor)c).getTriplet() != null)) {
final short[] rgb = ((HSSFColor)c).getTriplet();
return new java.awt.Color(rgb[0], rgb[1], rgb[2]);
}
if (c instanceof XSSFColor && (((XSSFColor)c).getRgb() != null)) {
final byte[] rgb = ((XSSFColor)c).getRgb();
// Convert bytes to unsigned integers
return new java.awt.Color(rgb[0] & 0xFF, rgb[1] & 0xFF, rgb[2] & 0xFF);
}
}
return null;
}
@Override
public Date asDate() {
try {
return cell.getDateCellValue();
} catch(Exception e) {
final String s = getValue();
// TODO it would be better if we could somehow parse an arbitrary date format
// http://stackoverflow.com/questions/3850784/recognise-an-arbitrary-date-string
// http://stackoverflow.com/questions/3389348/parse-any-date-in-java
final SimpleDateFormat sdf = new SimpleDateFormat("dd/MM/yyyy");
try {
return sdf.parse(s);
} catch(Exception e2) {
throw e;
}
}
}
@Override
public Double asPercent() {
Double value = asDouble();
if(!cell.getCellStyle().getDataFormatString().contains("%") && (value!=null)) {
value = value / 100.0;
}
return value;
}
}
private static class EmptyCell implements Value {
@Override
public String getValue() {
return null;
}
@Override
public String asString() {
return null;
}
@Override
public Double asDouble() {
return null;
}
@Override
public Integer asInteger() {
return null;
}
@Override
public java.awt.Color asColor() {
return null;
}
@Override
public Date asDate() {
return null;
}
@Override
public Double asPercent() {
return null;
}
}
public SpreadsheetDataSource() {
defaultSheet = 0;
}
SpreadsheetDataSource(Workbook workbook, FormulaEvaluator evaluator, int defaultSheet) {
this.workbook = workbook;
this.evaluator = evaluator;
this.defaultSheet = defaultSheet;
}
void init(Workbook workbook, FormulaEvaluator evaluator) {
this.workbook = workbook;
this.evaluator = evaluator;
}
/**
* select value from 1st sheet
*
* @param row
* - starts with 0
* @param col
* - starts with 0
* @throws MissingDataException
*/
public Value select(int row, int col) throws MissingDataException {
return select(null, row, col);
}
public Value select(String sheetname, int row, int col)
throws MissingDataException {
String cellref = new CellReference(row, col).formatAsString();
if (StringUtils.isNotBlank(sheetname)) {
cellref = sheetname + "!" + cellref;
}
return select(cellref);
}
public Value select(String sheetname, String selector) throws MissingDataException {
return select(sheetname+"!"+selector);
}
@Override
public Value select(String selector) throws MissingDataException {
Cell cell = selectCell(selector);
return cell!=null?new SpreadsheetCellValue(cell):new EmptyCell();
}
private Cell selectCell(String selector) throws MissingDataException {
// currently only CellReference selectors are supported like
// [sheet!]<row><column>
// e.g. Coral!A1 or just B20 which will select the cell from the first
// sheet.
CellReference cr = new CellReference(selector);
Sheet sheet;
String sheetName = cr.getSheetName();
if (sheetName != null) {
sheet = getSheet(sheetName);
if (sheet == null) {
throw new MissingDataException(String.format(
"Sheet '%s' does not exist in workbook", sheetName));
}
} else {
sheet = workbook.getSheetAt(defaultSheet);
if (sheet == null) {
throw new MissingDataException(
String.format("Sheet does not exist in workbook"));
}
}
Row row = sheet.getRow(cr.getRow());
if (row == null) {
return null;
}
Cell cell = row.getCell(cr.getCol());
if (cell == null) {
return null;
}
return cell;
}
private Sheet getSheet(String name) {
Sheet sheet = workbook.getSheet(name);
String strippedName = StringUtils.strip(name);
if (sheet == null) {
for (int i = 0; i < workbook.getNumberOfSheets(); i++) {
if (strippedName.equalsIgnoreCase(StringUtils.strip(workbook
.getSheetName(i)))) {
sheet = workbook.getSheetAt(i);
break;
}
}
}
if (sheet == null) {
for (int i = 0; i < workbook.getNumberOfSheets(); i++) {
if (StringUtils.containsIgnoreCase(
StringUtils.strip(workbook.getSheetName(i)), strippedName)) {
sheet = workbook.getSheetAt(i);
break;
}
}
}
return sheet;
}
public boolean hasSheet(String name) {
return getSheet(name) != null;
}
public String getSheetname(int i) {
Sheet sheet = workbook.getSheetAt(i);
if(sheet != null) {
return sheet.getSheetName();
} else {
return null;
}
}
public int sheets() {
return workbook.getNumberOfSheets();
}
public abstract SpreadsheetDataSource toSheet(int sheet);
public SpreadsheetDataSource toSheet(String sheetname) {
Sheet s = getSheet(sheetname);
if(s!= null) {
return toSheet(workbook.getSheetIndex(s));
} else {
return null;
}
}
public String getDefaultSheet() {
return workbook.getSheetName(defaultSheet);
}
public Integer getColumnCount(int row) {
return getColumnCount(defaultSheet, row);
}
public Integer getColumnCount(int i, int row) {
Sheet sheet = workbook.getSheetAt(i);
if(sheet != null) {
Row r = sheet.getRow(row);
if(r != null) {
return Integer.valueOf(r.getLastCellNum());
}
}
return null;
}
public List<Value> selectRow(int row) throws MissingDataException {
List<Value> result = Lists.newArrayList();
Integer max = getColumnCount(row);
if(max == null) {
return result;
}
for(int col = 0;col <= max;col++) {
result.add(select(row, col));
}
return result;
}
public List<Value> selectColumn(int column) throws MissingDataException {
return selectColumn(column, 100);
}
public List<Value> selectColumn(int column, int limit) throws MissingDataException {
List<Value> result = Lists.newArrayList();
Sheet sheet = workbook.getSheetAt(defaultSheet);
int max = Math.min(sheet.getLastRowNum(), limit);
for(int row = 0; row <= max;row++) {
result.add(select(row, column));
}
return result;
}
public static boolean containsString(List<Value> values, String s) {
for(Value v : values) {
if(StringUtils.equals(v.asString(),s)) {
return true;
}
}
return false;
}
Workbook workbook() {
return workbook;
}
FormulaEvaluator evaluator() {
return evaluator;
}
public boolean hasExternalReferences() {
for (int si = 0; si < workbook.getNumberOfSheets();si++) {
Sheet sheet = workbook.getSheetAt(si);
for (Row row : sheet) {
for (Cell cell : row) {
if (externalReference(cell) != null) {
return true;
}
}
}
}
return false;
}
public Set<UnresolvedRef> externalReferences() {
Set<UnresolvedRef> urefs = Sets.newHashSet();
for(int si = 0; si < workbook.getNumberOfSheets();si++) {
Sheet sheet = workbook.getSheetAt(si);
for(Row row : sheet) {
for(Cell cell : row) {
UnresolvedRef uref = externalReference(cell);
if(uref != null) {
//Logger.debug(String.format(
// "found external reference source '%s', source cell '%s', destination cell '%s'",
// uref.source(), uref.link().source(), uref.link().destination()));
urefs.add(uref);
}
}
}
}
return urefs;
}
abstract UnresolvedRef externalReference(Cell cell);
protected UnresolvedRef uref(String sIdOrName, final String sSelector,
final String dSelector) {
return new UnresolvedRef(sIdOrName,
new SimpleCellLink(sSelector, dSelector));
}
public InputStream updateExternalReferences(Set<ResolvedRef> refs) throws IOException {
boolean dirty = false;
for (ResolvedRef ref : refs) {
try {
final Cell dCell = selectCell(ref.link().destination());
if (dCell == null)
continue;
if (ref.source().isDefined()) {
final SpreadsheetDataSource source = ref.source().get();
try {
final Cell sCell = source.selectCell(ref.link().source());
dirty |= updatePrecalculatedValue(dCell, sCell, source.evaluator());
} catch (MissingDataException e) {
dirty |= updatePrecalculatedError(dCell, FormulaError.REF);
}
} else {
dirty |= updatePrecalculatedError(dCell, FormulaError.REF);
}
} catch (Exception e) {
e.printStackTrace();
}
}
try {
evaluateAll();
} catch(RuntimeException e) {
Logger.debug("evaluateAll() failed on updateExternalReferences," +
"some cached formula results may be out of date", e);
}
return dirty ? writeToTempFile() : null;
}
// evaluate all formula cells but external references. the XSSF evaluator seem to have
// issues with external references even if setIgnoreMissingWorkbooks(...) is set to true
// this workaround will probably not fully resolve the issue as formulas that depend on
// problematic external references might still fail.
private void evaluateAll() {
for(int i=0; i<workbook.getNumberOfSheets(); i++) {
Sheet sheet = workbook.getSheetAt(i);
for(Row r : sheet) {
for (Cell c : r) {
if (c.getCellType() == Cell.CELL_TYPE_FORMULA && !isExternalReference(c)) {
try {
evaluator.evaluateFormulaCell(c);
} catch(RuntimeException e) {
CellReference cr = new CellReference(c);
Logger.debug(String.format("failed to evaluate cell %s!%s, formula %s." +
" some cached formula results may be out of date",
sheet.getSheetName(), cr.formatAsString(), c.getCellFormula()), e);
}
}
}
}
}
}
private boolean isExternalReference(Cell cell) {
return externalReference(cell) != null;
}
private boolean updatePrecalculatedValue(Cell destination,
Cell source, FormulaEvaluator sEvaluator) {
if(source != null) {
switch(source.getCellType()) {
case Cell.CELL_TYPE_BLANK:
return updatePrecalculatedBlank(destination);
case Cell.CELL_TYPE_BOOLEAN:
return updatePrecalculatedBoolean(destination, source.getBooleanCellValue());
case Cell.CELL_TYPE_ERROR:
return updatePrecalculatedError(destination,
FormulaError.forInt(source.getErrorCellValue()));
case Cell.CELL_TYPE_FORMULA:
try {
return updatePrecalculatedCellValue(destination, sEvaluator.evaluate(source));
} catch(Exception e) {
switch(source.getCachedFormulaResultType()) {
case Cell.CELL_TYPE_NUMERIC:
return updatePrecalculatedNumeric(destination, source.getNumericCellValue());
case Cell.CELL_TYPE_STRING:
return updatePrecalculatedString(destination, source.getStringCellValue());
case Cell.CELL_TYPE_BOOLEAN:
return updatePrecalculatedBoolean(destination, source.getBooleanCellValue());
case Cell.CELL_TYPE_ERROR:
return updatePrecalculatedError(destination,
FormulaError.forInt(source.getErrorCellValue()));
}
}
case Cell.CELL_TYPE_NUMERIC:
return updatePrecalculatedNumeric(destination, source.getNumericCellValue());
case Cell.CELL_TYPE_STRING:
return updatePrecalculatedString(destination, source.getStringCellValue());
default:
return false;
}
} else {
return updatePrecalculatedError(destination, FormulaError.REF);
}
}
private boolean updatePrecalculatedCellValue(Cell destination, CellValue val) {
if(val != null) {
switch(val.getCellType()) {
case Cell.CELL_TYPE_BOOLEAN:
return updatePrecalculatedBoolean(destination, val.getBooleanValue());
case Cell.CELL_TYPE_NUMERIC:
return updatePrecalculatedNumeric(destination, val.getNumberValue());
case Cell.CELL_TYPE_STRING:
return updatePrecalculatedString(destination, val.getStringValue());
case Cell.CELL_TYPE_BLANK:
return updatePrecalculatedBlank(destination);
case Cell.CELL_TYPE_ERROR:
return updatePrecalculatedError(destination,
FormulaError.forInt(val.getErrorValue()));
default: return false;
}
} else {
return updatePrecalculatedError(destination, FormulaError.REF);
}
}
private boolean updatePrecalculatedBlank(Cell destination) {
return updatePrecalculatedNumeric(destination, 0);
}
private boolean updatePrecalculatedNumeric(Cell destination, double sVal) {
if(isFormula(destination)) {
try {
double dVal = destination.getNumericCellValue();
if(dVal != sVal) {
destination.setCellValue(sVal);
return true;
}
} catch(Exception e) {
destination.setCellValue(sVal);
return true;
}
}
return false;
}
private boolean updatePrecalculatedString(Cell destination, String sVal) {
if(isFormula(destination)) {
try {
String dVal = destination.getStringCellValue();
if(!StringUtils.equals(sVal, dVal)) {
destination.setCellValue(sVal);
return true;
}
} catch(Exception e) {
destination.setCellValue(sVal);
return true;
}
}
return false;
}
private boolean updatePrecalculatedError(Cell destination, FormulaError sError) {
if(isFormula(destination)) {
try {
FormulaError dError = FormulaError.forInt(destination.getErrorCellValue());
if(sError != dError) {
destination.setCellErrorValue(sError.getCode());
return true;
}
} catch(Exception e) {
destination.setCellErrorValue(sError.getCode());
return true;
}
}
return false;
}
private boolean updatePrecalculatedBoolean(Cell destination, boolean sVal) {
if(isFormula(destination)) {
try {
boolean dVal = destination.getBooleanCellValue();
if(dVal != sVal) {
destination.setCellValue(sVal);
return true;
}
} catch(Exception e) {
destination.setCellValue(sVal);
return true;
}
}
return false;
}
private boolean isFormula(Cell cell) {
return (cell != null) && (cell.getCellType() == Cell.CELL_TYPE_FORMULA);
}
private InputStream writeToTempFile() throws IOException {
final File f = File.createTempFile("spreadsheet", "poi");
FileOutputStream out = new FileOutputStream(f);
workbook.write(out);
IOUtils.closeQuietly(out);
return new FileInputStream(f) {
@Override
public void close() throws IOException {
super.close();
FileUtils.deleteQuietly(f);
}
};
}
public int getColumns(int row) {
return workbook.getSheetAt(defaultSheet).getRow(row).getLastCellNum();
}
public int getRows() {
return workbook.getSheetAt(defaultSheet).getLastRowNum();
}
public Iterable<Value> rangeSelect(final int row1, final int column1,
final int row2, final int column2) {
if(row1 == row2) {
return rangeColumnSelect(row1, column1, column2);
} else if(column1 == column2 ) {
return rangeRowSelect(column1, row1, row2);
} else {
throw new IllegalArgumentException("can only select from 1 row or 1 column");
}
}
public Iterable<Value> rangeRowSelect(final int column, final int row1, final int row2) {
return new Iterable<Value>() {
@Override
public Iterator<Value> iterator() {
return rangeIterator(row1, row2, new ValueSelector() {
@Override
public Value select(int i) throws MissingDataException {
return SpreadsheetDataSource.this.select(i, column);
}});
}};
}
public Iterable<Value> rangeColumnSelect(final int row, final int column1, final int column2) {
return new Iterable<Value>() {
@Override
public Iterator<Value> iterator() {
return rangeIterator(column1, column2, new ValueSelector() {
@Override
public Value select(int i) throws MissingDataException {
return SpreadsheetDataSource.this.select(row, i);
}});
}};
}
private interface ValueSelector {
public Value select(int i) throws MissingDataException;
}
private Iterator<Value> rangeIterator(final int from, final int to,
final ValueSelector selector) {
return new Iterator<Value> () {
boolean hasNext = true;
int i = from;
@Override
public boolean hasNext() {
return hasNext;
}
@Override
public Value next() {
if(hasNext) {
hasNext = !(i == to);
try {
return selector.select(i);
} catch(MissingDataException e) {
return new EmptyCell();
} finally {
i += from < to ? 1 : -1;
}
} else {
throw new NoSuchElementException();
}
}
@Override
public void remove() {
throw new UnsupportedOperationException();
}};
}
}