package org.gbif.ipt.service.manage; import org.gbif.ipt.model.FileSource; import org.gbif.ipt.model.Resource; import org.gbif.ipt.model.Source; import org.gbif.ipt.service.ImportException; import org.gbif.ipt.service.InvalidFilenameException; import org.gbif.ipt.service.SourceException; import org.gbif.ipt.service.manage.impl.SourceManagerImpl; import org.gbif.utils.file.ClosableReportingIterator; import java.io.File; import java.util.List; import java.util.Set; import javax.annotation.Nullable; import com.google.inject.ImplementedBy; /** * This interface details all methods associated with a source entity. */ @ImplementedBy(SourceManagerImpl.class) public interface SourceManager { /** * Adds one text or excel file as a file source to a resource configuration. * The file will be analyzed to detect the character encoding and delimiters if not given explicitly in a dwc-a. * * @param resource resource * @param file the text or excel source file to be added to this resource * @param sourceName the preferred sourcename. If null the filename will be used * * @return file or excel source that has been added * * @throws ImportException if the file cant be copied or read * @throws org.gbif.ipt.service.InvalidFilenameException if the source filename contained illegal characters */ FileSource add(Resource resource, File file, @Nullable String sourceName) throws ImportException, InvalidFilenameException; /** * Checks if a source is readable and analyzes its file size, number of rows and other source properties which will * be updated. A full analysis might take some time in particular for sql source, so one should use the the quick * full=false one as much as possible. For SQL sources the database connection and number of avilable columns will be * checked. * * @param source the source to analyze * * @return problem message if source is not readable */ String analyze(Source source); /** * Return list of source's column names. * * @param source source * * @return list of column names */ List<String> columns(Source source); /** * Delete source from resource. * * @param resource resource * @param source source * * @return true if the source was deleted */ boolean delete(Resource resource, Source source); /** * Retrieves a set of unique string values used in a given column of a source. * The maximum number of distinct values can be restricted. * * @param source source * @param column column to inspect, zero based numbering as used in the dwc archives * @param maxValues maximun number of distinct values to return. If zero or negative all values will be retrieved. * @param maxRows maximum number of rows to inspect. If zero or negative all rows will be scanned. * * @return unique values found in the column */ Set<String> inspectColumn(Source source, int column, int maxValues, int maxRows) throws SourceException; /** * Return sample rows from the dataset. * * @param source source * @param rows number of rows to return * * @return sample rows from the dataset */ List<String[]> peek(Source source, int rows); /** * Create a ClosableReportingIterator iterator for a source. * * @param source source * * @return a ClosableReportingIterator for a source */ ClosableReportingIterator<String[]> rowIterator(Source source) throws SourceException; }