/***************************************************************************
* Copyright 2010 Global Biodiversity Information Facility Secretariat
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at
* http://www.apache.org/licenses/LICENSE-2.0
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
***************************************************************************/
package org.gbif.ipt.service.manage.impl;
import org.gbif.dwca.io.Archive;
import org.gbif.dwca.io.ArchiveFactory;
import org.gbif.dwca.io.ArchiveFile;
import org.gbif.dwca.io.UnsupportedArchiveException;
import org.gbif.ipt.config.AppConfig;
import org.gbif.ipt.config.DataDir;
import org.gbif.ipt.model.ExcelFileSource;
import org.gbif.ipt.model.FileSource;
import org.gbif.ipt.model.Resource;
import org.gbif.ipt.model.Source;
import org.gbif.ipt.model.SqlSource;
import org.gbif.ipt.model.TextFileSource;
import org.gbif.ipt.service.AlreadyExistingException;
import org.gbif.ipt.service.BaseManager;
import org.gbif.ipt.service.ImportException;
import org.gbif.ipt.service.InvalidFilenameException;
import org.gbif.ipt.service.SourceException;
import org.gbif.ipt.service.manage.SourceManager;
import org.gbif.utils.file.ClosableIterator;
import org.gbif.utils.file.ClosableReportingIterator;
import org.gbif.utils.file.csv.UnkownDelimitersException;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.ResultSet;
import java.sql.ResultSetMetaData;
import java.sql.SQLException;
import java.sql.SQLWarning;
import java.sql.Statement;
import java.util.ArrayList;
import java.util.Date;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
import java.util.regex.Pattern;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.collect.Lists;
import com.google.common.collect.Ordering;
import com.google.inject.Inject;
import org.apache.commons.io.FileUtils;
import org.apache.commons.io.FilenameUtils;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.StringUtils;
public class SourceManagerImpl extends BaseManager implements SourceManager {
private static class ColumnIterator implements ClosableIterator<Object> {
private final ClosableIterator<String[]> rows;
private final int column;
public ColumnIterator(FileSource source, int column) throws IOException {
rows = source.rowIterator();
this.column = column;
}
public void close() {
rows.close();
}
public boolean hasNext() {
return rows.hasNext();
}
public Object next() {
String[] row = rows.next();
if (row == null || row.length < column) {
return null;
}
return row[column];
}
public void remove() {
// unsupported
}
}
private class SqlColumnIterator implements ClosableIterator<Object> {
private final Connection conn;
private final Statement stmt;
private final ResultSet rs;
private final int column;
private boolean hasNext;
private final String sourceName;
public SqlColumnIterator(SqlSource source, int column) throws SQLException {
this(source, column, source.getSql());
}
public SqlColumnIterator(SqlSource source, int column, int limit) throws SQLException {
this(source, column, source.getSqlLimited(limit));
}
/**
* SqlColumnIterator constructor
*
* @param source of the sql data
* @param column to inspect, zero based numbering as used in the dwc archives
* @param sql statement to query in the sql source
*/
private SqlColumnIterator(SqlSource source, int column, String sql) throws SQLException {
this.conn = getDbConnection(source);
this.stmt = conn.createStatement(ResultSet.TYPE_FORWARD_ONLY, ResultSet.CONCUR_READ_ONLY);
source.getRdbms().enableLargeResultSet(this.stmt);
this.column = column + 1;
this.rs = stmt.executeQuery(sql);
this.hasNext = rs.next();
sourceName = source.getName();
}
public void close() {
if (rs != null) {
try {
rs.close();
stmt.close();
conn.close();
} catch (SQLException e) {
log.error("Cant close iterator for sql source " + sourceName, e);
}
}
}
public boolean hasNext() {
return hasNext;
}
public Object next() {
String val = null;
if (hasNext) {
try {
// forward rs cursor
hasNext = rs.next();
val = rs.getString(column);
} catch (SQLException e2) {
hasNext = false;
}
}
return val;
}
public void remove() {
// unsupported
}
}
private class SqlRowIterator implements ClosableReportingIterator<String[]> {
private final Connection conn;
private final Statement stmt;
private final ResultSet rs;
private boolean hasNext;
private final String sourceName;
private final int rowSize;
private boolean rowError;
private String errorMessage;
private Exception exception;
SqlRowIterator(SqlSource source) throws SQLException {
this.conn = getDbConnection(source);
this.stmt = conn.createStatement(ResultSet.TYPE_FORWARD_ONLY, ResultSet.CONCUR_READ_ONLY);
source.getRdbms().enableLargeResultSet(this.stmt);
this.rs = stmt.executeQuery(source.getSql());
this.rowSize = rs.getMetaData().getColumnCount();
this.hasNext = rs.next();
sourceName = source.getName();
this.rowError = false;
}
public void close() {
if (rs != null) {
try {
rs.close();
stmt.close();
conn.close();
} catch (SQLException e) {
log.error("Cant close iterator for sql source " + sourceName, e);
}
}
}
public boolean hasNext() {
return hasNext;
}
public String[] next() {
String[] val = new String[rowSize];
if (hasNext) {
try {
resetReportingIterator();
int gotTo = 0; // field reached in row
try {
for (int i = 1; i <= rowSize; i++) {
val[i - 1] = rs.getString(i);
gotTo = i;
}
} catch (SQLException exOnRow) {
log.debug("Exception caught reading row: " + exOnRow.getMessage(), exOnRow);
rowError = true;
exception = exOnRow;
// construct error message showing exception and problem row
StringBuilder msg = new StringBuilder();
msg.append("Exception caught reading row: ");
msg.append(exOnRow.getMessage());
msg.append("\n");
msg.append("Row: ");
for (int i = 0; i < gotTo; i++) {
msg.append("[").append(val[i]).append("]");
}
errorMessage = msg.toString();
} finally {
// forward rs cursor
hasNext = rs.next();
}
} catch (SQLException e2) {
// Exception on advancing cursor, assume no more rows.
log.debug("Exception caught advancing cursor: " + e2.getMessage(), e2);
hasNext = false;
exception = e2;
errorMessage = e2.getMessage();
}
}
return val;
}
/**
* Reset all iterator reporting parameters.
*/
private void resetReportingIterator() {
rowError = false;
exception = null;
errorMessage = null;
}
public void remove() {
// unsupported
}
public boolean hasRowError() {
return rowError;
}
public String getErrorMessage() {
return errorMessage;
}
public Exception getException() {
return exception;
}
}
// default fetch sized used in SQL statements
private static final int FETCH_SIZE = 10;
// the maximum time in seconds that a driver will wait while attempting to connect to a database
private static final int CONNECTION_TIMEOUT_SECS = 5;
private static final String ACCEPTED_FILE_NAMES = "[\\w.\\-\\s\\)\\(]+";
// Allowed characters in file names: alpha-numeric characters, plus ".", "-", "_", ")", "(", and " "
private Pattern acceptedPattern = Pattern.compile(ACCEPTED_FILE_NAMES);
@Inject
public SourceManagerImpl(AppConfig cfg, DataDir dataDir) {
super(cfg, dataDir);
}
public static void copyArchiveFileProperties(ArchiveFile from, TextFileSource to) {
to.setEncoding(from.getEncoding());
to.setFieldsEnclosedBy(from.getFieldsEnclosedBy() == null ? null : from.getFieldsEnclosedBy().toString());
to.setFieldsTerminatedBy(from.getFieldsTerminatedBy());
to.setIgnoreHeaderLines(from.getIgnoreHeaderLines());
to.setDateFormat(from.getDateFormat());
}
/**
* Tests if the the file name is composed of alpha-numeric characters, plus ".", "-", "_", ")", "(", and " ".
*
* @param fileName the file name
*
* @return <tt> if accepted, <tt>false</tt> otherwise
*/
@VisibleForTesting
protected boolean acceptableFileName(String fileName) {
boolean matches = acceptedPattern.matcher(fileName).matches();
if (!matches) {
log.error("File name contains illegal characters: " + fileName);
}
return matches;
}
private ExcelFileSource addExcelFile() throws ImportException {
ExcelFileSource src = new ExcelFileSource();
// TODO: encoding, header rows, date format?
src.setSheetIdx(0);
return src;
}
private TextFileSource addTextFile(File file) throws ImportException {
TextFileSource src = new TextFileSource();
try {
// anaylze individual files using the dwca reader
Archive arch = ArchiveFactory.openArchive(file);
copyArchiveFileProperties(arch.getCore(), src);
} catch (IOException e) {
log.warn(e.getMessage());
throw new ImportException(e);
} catch (UnsupportedArchiveException e) {
// fine, cant read it with dwca library, but might still be a valid file for manual setup
log.warn(e.getMessage());
} catch (UnkownDelimitersException e) {
// this file is invalid
log.warn(e.getMessage());
throw new ImportException(e);
}
return src;
}
public FileSource add(Resource resource, File file, String fileName) throws ImportException,
InvalidFilenameException {
log.debug("ADDING SOURCE " + fileName + " FROM " + file.getAbsolutePath());
if (acceptableFileName(fileName)) {
FileSource src;
String suffix = FilenameUtils.getExtension(fileName);
if (suffix != null && (suffix.equalsIgnoreCase("xls") || suffix.equalsIgnoreCase("xlsx"))) {
src = addExcelFile();
} else {
src = addTextFile(file);
}
src.setName(fileName);
src.setResource(resource);
try {
// copy file
File ddFile = dataDir.sourceFile(resource, src);
try {
FileUtils.copyFile(file, ddFile);
} catch (IOException e1) {
throw new ImportException(e1);
}
src.setFile(ddFile);
src.setLastModified(new Date());
// add to resource, allow overwriting existing ones
// if the file is uploaded not for the first time
resource.addSource(src, true);
} catch (AlreadyExistingException e) {
throw new ImportException(e);
}
// analyze file
analyze(src);
return src;
} else {
throw new InvalidFilenameException("Filename contains illegal characters");
}
}
public String analyze(Source source) {
if (source instanceof SqlSource) {
return analyze((SqlSource) source);
} else {
return analyze((FileSource) source);
}
}
private String analyze(SqlSource ss) {
String problem = null;
Connection con = null;
Statement stmt = null;
ResultSet rs = null;
try {
con = getDbConnection(ss);
// test sql
if (StringUtils.trimToNull(ss.getSql()) != null) {
stmt = con.createStatement(ResultSet.TYPE_FORWARD_ONLY, ResultSet.CONCUR_READ_ONLY);
if ((ss.getJdbcDriver() != null) && !ss.getJdbcDriver().contains("odbc")) {
stmt.setFetchSize(FETCH_SIZE);
}
rs = stmt.executeQuery(ss.getSqlLimited(FETCH_SIZE));
// get number of columns
ResultSetMetaData meta = rs.getMetaData();
ss.setColumns(meta.getColumnCount());
ss.setReadable(true);
}
} catch (SQLException e) {
log.warn("Cant read sql source " + ss, e);
problem = e.getMessage();
ss.setReadable(false);
} finally {
// close result set, statement, and connection in that order
if (rs != null) {
try {
rs.close();
} catch (SQLException e) {
log.error("ResultSet could not be closed: " + e.getMessage(), e);
}
}
if (stmt != null) {
try {
stmt.close();
} catch (SQLException e) {
log.error("Statement could not be closed: " + e.getMessage(), e);
}
}
if (con != null) {
try {
con.close();
} catch (SQLException e) {
log.error("Connection could not be closed: " + e.getMessage(), e);
}
}
}
return problem;
}
private String analyze(FileSource src) {
BufferedWriter logWriter = null;
File logFile = dataDir.sourceLogFile(src.getResource().getShortname(), src.getName());
try {
FileUtils.deleteQuietly(logFile);
Set<Integer> emptyLines;
try {
emptyLines = src.analyze();
} catch (IOException e) {
return e.getMessage();
}
logWriter = new BufferedWriter(new FileWriter(logFile));
logWriter.write(
"Log for source name:" + src.getName() + " from resource: " + src.getResource().getShortname() + "\n");
if (!emptyLines.isEmpty()) {
for (Integer i : Ordering.natural().sortedCopy(emptyLines)) {
logWriter.write("Line: " + i + " [EMPTY LINE]\n");
}
} else {
logWriter.write("No rows were skipped in this source");
}
logWriter.flush();
} catch (IOException e) {
log.warn("Cant write source log file " + logFile.getAbsolutePath(), e);
} finally {
if (logWriter != null) {
IOUtils.closeQuietly(logWriter);
}
}
return null;
}
/*
* (non-Javadoc)
* @see org.gbif.ipt.service.manage.SourceManager#columns(org.gbif.ipt.model.SourceBase)
*/
public List<String> columns(Source source) {
if (source == null) {
return Lists.newArrayList();
}
if (source instanceof SqlSource) {
return columns((SqlSource) source);
}
return ((FileSource) source).columns();
}
private List<String> columns(SqlSource source) {
List<String> columns = new ArrayList<String>();
Connection con = null;
Statement stmt = null;
ResultSet rs = null;
try {
con = getDbConnection(source);
if (con != null) {
// test sql
stmt = con.createStatement(ResultSet.TYPE_FORWARD_ONLY, ResultSet.CONCUR_READ_ONLY);
if ((source.getJdbcDriver() != null) && !source.getJdbcDriver().contains("odbc")) {
stmt.setFetchSize(1);
}
rs = stmt.executeQuery(source.getSqlLimited(1));
// get column metadata
ResultSetMetaData meta = rs.getMetaData();
int idx = 1;
int max = meta.getColumnCount();
while (idx <= max) {
columns.add(meta.getColumnLabel(idx));
idx++;
}
} else {
String msg = "Can't read sql source, the connection couldn't be created with the current parameters";
columns.add(msg);
log.warn(msg + " " + source);
}
} catch (SQLException e) {
log.warn("Cant read sql source " + source, e);
} finally {
// close result set, statement, and connection in that order
if (rs != null) {
try {
rs.close();
} catch (SQLException e) {
log.error("ResultSet could not be closed: " + e.getMessage(), e);
}
}
if (stmt != null) {
try {
stmt.close();
} catch (SQLException e) {
log.error("Statement could not be closed: " + e.getMessage(), e);
}
}
if (con != null) {
try {
con.close();
} catch (SQLException e) {
log.error("Connection could not be closed: " + e.getMessage(), e);
}
}
}
return columns;
}
/*
* (non-Javadoc)
* @see org.gbif.ipt.service.manage.MappingConfigManager#delete(org.gbif.ipt.model.SourceBase.TextFileSource)
*/
public boolean delete(Resource resource, Source source) {
if (source == null) {
return false;
}
resource.deleteSource(source);
if (source instanceof TextFileSource) {
// also delete source data file
TextFileSource fs = (TextFileSource) source;
fs.getFile().delete();
}
if (source instanceof ExcelFileSource) {
// also delete source data file if no further source uses it
ExcelFileSource es = (ExcelFileSource) source;
boolean del = true;
for (Source src : resource.getSources()) {
if (!src.equals(es) && src.isExcelSource() && ((ExcelFileSource) src).getFile().equals(es.getFile())) {
// another excel source using the same file, dont delete
del = false;
break;
}
}
if (del) {
es.getFile().delete();
}
}
return true;
}
private Connection getDbConnection(SqlSource source) throws SQLException {
Connection conn = null;
// try to connect to db via simple JDBC
if (source.getHost() != null && source.getJdbcUrl() != null && source.getJdbcDriver() != null) {
try {
DriverManager.setLoginTimeout(CONNECTION_TIMEOUT_SECS);
Class.forName(source.getJdbcDriver());
conn = DriverManager.getConnection(source.getJdbcUrl(), source.getUsername(), source.getPassword());
// If a SQLWarning object is available, log its
// warning(s). There may be multiple warnings chained.
SQLWarning warn = conn.getWarnings();
while (warn != null) {
log.warn("SQLWarning: state=" + warn.getSQLState() + ", message=" + warn.getMessage() + ", vendor=" + warn
.getErrorCode());
warn = warn.getNextWarning();
}
} catch (java.lang.ClassNotFoundException e) {
String msg =
String
.format(
"Couldnt load JDBC driver to create new external datasource connection with JDBC Class=%s and URL=%s. Error: %s",
source.getJdbcDriver(), source.getJdbcUrl(), e.getMessage());
log.warn(msg, e);
throw new SQLException(msg, e);
} catch (Exception e) {
String msg = String
.format("Couldnt create new external datasource connection with JDBC Class=%s, URL=%s, user=%s. Error: %s",
source.getJdbcDriver(), source.getJdbcUrl(), source.getUsername(), e.getMessage());
log.warn(msg, e);
throw new SQLException(msg);
}
}
return conn;
}
/*
* (non-Javadoc)
* @see org.gbif.ipt.service.manage.SourceManager#inspectColumn(org.gbif.ipt.model.SourceBase, int, int)
*/
public Set<String> inspectColumn(Source source, int column, int maxValues, int maxRows) throws SourceException {
Set<String> values = new HashSet<String>();
ClosableIterator<Object> iter = null;
try {
iter = iterSourceColumn(source, column, maxRows);
// get distinct values
while (iter.hasNext() && (maxValues < 1 || values.size() < maxValues)) {
Object obj = iter.next();
if (obj != null) {
String val = obj.toString();
values.add(val);
}
}
} catch (Exception e) {
log.error(e);
throw new SourceException("Error reading source " + source.getName() + ": " + e.getMessage());
} finally {
if (iter != null) {
iter.close();
}
}
return values;
}
/**
* @param limit limit for the recordset passed into the sql. If negative or zero no limit will be used
*/
private ClosableIterator<Object> iterSourceColumn(Source source, int column, int limit) throws Exception {
if (source instanceof SqlSource) {
SqlSource src = (SqlSource) source;
if (limit > 0) {
return new SqlColumnIterator(src, column, limit);
} else {
return new SqlColumnIterator(src, column);
}
} else {
return new ColumnIterator((FileSource) source, column);
}
}
/*
* (non-Javadoc)
* @see org.gbif.ipt.service.manage.SourceManager#peek(org.gbif.ipt.model.SourceBase)
*/
public List<String[]> peek(Source source, int rows) {
if (source instanceof SqlSource) {
return peek((SqlSource) source, rows);
}
// both excel and file implement FileSource
return peek((FileSource) source, rows);
}
private List<String[]> peek(FileSource source, int rows) {
List<String[]> preview = Lists.newArrayList();
if (source != null) {
ClosableReportingIterator<String[]> iter = null;
try {
iter = source.rowIterator();
while (rows > 0 && iter.hasNext()) {
rows--;
preview.add(iter.next());
}
} catch (Exception e) {
log.warn("Cant peek into source " + source.getName(), e);
} finally {
if (iter != null) {
iter.close();
}
}
}
return preview;
}
private List<String[]> peek(SqlSource source, int rows) {
List<String[]> preview = new ArrayList<String[]>();
Connection con = null;
Statement stmt = null;
ResultSet rs = null;
try {
con = getDbConnection(source);
if (con != null) {
// test sql
stmt = con.createStatement(ResultSet.TYPE_FORWARD_ONLY, ResultSet.CONCUR_READ_ONLY);
if ((source.getJdbcDriver() != null) && !source.getJdbcDriver().contains("odbc")) {
stmt.setFetchSize(rows);
}
rs = stmt.executeQuery(source.getSqlLimited(rows + 1));
// loop over result
while (rows > 0 && rs.next()) {
rows--;
String[] row = new String[source.getColumns()];
for (int idx = 0; idx < source.getColumns(); idx++) {
row[idx] = rs.getString(idx + 1);
}
preview.add(row);
}
}
} catch (SQLException e) {
log.warn("Cant read sql source " + source, e);
} finally {
// close result set, statement, and connection in that order
if (rs != null) {
try {
rs.close();
} catch (SQLException e) {
log.error("ResultSet could not be closed: " + e.getMessage(), e);
}
}
if (stmt != null) {
try {
stmt.close();
} catch (SQLException e) {
log.error("Statement could not be closed: " + e.getMessage(), e);
}
}
if (con != null) {
try {
con.close();
} catch (SQLException e) {
log.error("Connection could not be closed: " + e.getMessage(), e);
}
}
}
return preview;
}
public ClosableReportingIterator<String[]> rowIterator(Source source) throws SourceException {
if (source == null) {
return null;
}
try {
if (source instanceof SqlSource) {
return new SqlRowIterator((SqlSource) source);
}
// both excel and file implement FileSource
return ((FileSource) source).rowIterator();
} catch (Exception e) {
log.error("Exception while reading source " + source.getName(), e);
throw new SourceException("Cant build iterator for source " + source.getName() + " :" + e.getMessage());
}
}
}