package tap.data; /* * This file is part of TAPLibrary. * * TAPLibrary is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * TAPLibrary is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public License * along with TAPLibrary. If not, see <http://www.gnu.org/licenses/>. * * Copyright 2015-2017 - Astronomisches Rechen Institut (ARI) */ import java.io.IOException; import java.io.InputStream; import java.util.NoSuchElementException; import org.xml.sax.SAXParseException; import adql.db.DBType; import tap.TAPException; import tap.metadata.TAPColumn; import tap.metadata.VotType; import tap.metadata.VotType.VotDatatype; import uk.ac.starlink.table.ColumnInfo; import uk.ac.starlink.table.DescribedValue; import uk.ac.starlink.table.StarTable; import uk.ac.starlink.table.StarTableFactory; import uk.ac.starlink.table.TableBuilder; import uk.ac.starlink.table.TableFormatException; import uk.ac.starlink.table.TableSink; /** * <p>{@link TableIterator} which lets iterate over a VOTable input stream using STIL.</p> * * <p>{@link #getColType()} will return TAP type based on the type declared in the VOTable metadata part.</p> * * @author Grégory Mantelet (ARI) * @version 2.1 (03/2017) * @since 2.0 */ public class VOTableIterator implements TableIterator { /** Message of the IOException sent when the streaming is aborted. */ protected static final String STREAM_ABORTED_MESSAGE = "Streaming aborted!"; /** * <p>This class lets consume the metadata and rows of a VOTable document.</p> * * <p> * On the contrary to a usual TableSink, this one will stop after each row until this row has been fetched by {@link VOTableIterator}. * </p> * * <p> * Besides, the metadata returned by StarTable are immediately converted into TAP metadata. If this conversion fails, the error is kept * in metaError, so that the VOTable reading can continue if the fact that metadata are missing is not a problem for the class using the * {@link VOTableIterator}. * </p> * * @author Grégory Mantelet (ARI) * @version 2.1 (03/2017) * @since 2.0 */ protected static class StreamVOTableSink implements TableSink { /** <p>The accepted VOTable metadata, after conversion from StarTable metadata.</p> * <p><i>Note: this may be NULL after the metadata has been read if an error occurred while performing the conversion. * In this case, metaError contains this error.</> */ private TAPColumn[] meta = null; /** The error which happened while converting the StarTable metadata into TAP metadata. */ private DataReadException metaError = null; /** The last accepted row. */ private Object[] pendingRow = null; /** Flag meaning that the end of the stream has been reached * OR if the VOTable reading should be stopped before reading more rows. */ private boolean endReached = false; /** * <p>Stop nicely reading the VOTable.</p> * * <p> * An exception will be thrown to the STILTS class using this TableSink, * but no exception should be thrown to VOTableIterator. * </p> */ public void stop(){ stop(null); } /** * <p>Stop nicely reading the VOTable.</p> * * <p> * An exception will be thrown to the STILTS class using this TableSink, * but no exception should be thrown to VOTableIterator. * </p> * * @param reason Reason why this Sink should be stop. * <i>This should be used in case of external grave error that * should be raised when trying to access data through * VOTableIterator. * Example: a wrong VOTable format.</i> */ public synchronized void stop(final Throwable reason){ // Prevent further attempt to read the input stream: endReached = true; // Set the stop reason (if any): if (reason != null && metaError == null){ // Case: Wrong VOTable format: if (reason instanceof TableFormatException){ // build the most precise error message as possible: String msg = "The input file is not a valid VOTable document!"; if (reason.getCause() != null){ if (reason.getCause() instanceof SAXParseException){ SAXParseException spe = (SAXParseException)reason.getCause(); msg += " Cause: [l." + spe.getLineNumber() + ", c." + spe.getColumnNumber() + "] " + spe.getMessage(); }else if (reason.getCause().getMessage() != null) msg += " Cause: " + reason.getCause().getMessage(); else msg += " Cause: {" + reason.getCause().getClass().getName() + "}"; } // create the exception: metaError = new DataReadException(msg, reason); } // Case: Unknown reason! else if (reason.getMessage() != null && !reason.getMessage().equals(STREAM_ABORTED_MESSAGE)) metaError = new DataReadException("Unexpected error while reading the uploaded VOTable!", reason); } // Stop waiting (=> the reading is aborted): notifyAll(); } @Override public synchronized void acceptMetadata(final StarTable metaTable) throws TableFormatException{ try{ // Convert the StartTable metadata into TAP metadata: meta = extractColMeta(metaTable); }catch(DataReadException dre){ // Save the error ; this error will be throw when a call to getMetadata() will be done: metaError = dre; }finally{ // Free all waiting threads: notifyAll(); } } @Override public synchronized void acceptRow(final Object[] row) throws IOException{ try{ // Wait until the last accepted row has been consumed: while(!endReached && pendingRow != null) wait(); /* If the end has been reached, this is not normal * (because endRows() is always called after acceptRow()...so, it means the iteration has been aborted before the end) * and so the stream reading should be interrupted: */ if (endReached) throw new IOException(STREAM_ABORTED_MESSAGE); // Otherwise, keep the given row: pendingRow = row; /* Security for the cases where a row to accept is NULL. * In such case, pendingRow will be set to NULL and the function getRow() will wait for ever. * This case is not supposed to happen because the caller of acceptRow(...) should not give a NULL row... * ...which should then mean that the end of the stream has been reached. */ if (pendingRow == null) endReached = true; }catch(InterruptedException ie){ /* If the thread has been interrupted, set this TableSink in a state similar to * when the end of the stream has been reached: */ pendingRow = null; endReached = true; }finally{ // In all cases, all waiting threads must be freed: notifyAll(); } } @Override public synchronized void endRows() throws IOException{ try{ // Wait until the last accepted row has been consumed: while(!endReached && pendingRow != null) wait(); }catch(InterruptedException ie){ /* Nothing to do in particular ; the end of the stream will be set anyway. */ }finally{ // No more rows are available: pendingRow = null; // Set the END flag: endReached = true; // Notify all waiting threads that the end has been reached: notifyAll(); } } /** * <p>Get the metadata found in the VOTable.</p> * * <p><i>Note: * This method is blocking until metadata are fully available by this TableSink * or if an error occurred while converting them in TAP metadata. * A Thread interruption will also make this function returning. * </i></p> * * @return The metadata found in the VOTable header. * * @throws DataReadException If the metadata can not be interpreted correctly. */ public synchronized TAPColumn[] getMeta() throws DataReadException{ try{ // Wait until metadata are available, or if an error has occurred while accepting them: while(metaError == null && meta == null) wait(); // If there was an error while interpreting the accepted metadata, throw it: if (metaError != null) throw metaError; // Or if no metadata can be fetched: else if (meta == null || meta.length == 0) throw (metaError = new DataReadException("Unexpected VOTable document: no FIELD can be found!")); // Otherwise, just return the metadata: return meta; }catch(InterruptedException ie){ /* If the thread has been interrupted, set this TableSink in a state similar to * when the end of the stream has been reached: */ endReached = true; /* Return the metadata ; * NULL will be returned if the interruption has occurred before the real reading of the VOTable metadata: */ return meta; }finally{ // In all cases, the waiting threads must be freed: notifyAll(); } } /** * <p>Get the last accepted row.</p> * * <p><i>Note: * This function is blocking until a row has been accepted or the end of the stream has been reached. * A Thread interruption will also make this function returning. * </i></p> * * @return The last accepted row, * or <i>NULL</i> if no more rows are available or if the iteration has been interrupted/canceled. */ public synchronized Object[] getRow(){ try{ // Wait until a row has been accepted or the end has been reached: while(!endReached && pendingRow == null) wait(); // If there is no more rows, just return NULL (meaning for the called "end of stream"): if (endReached && pendingRow == null) return null; /* Otherwise, reset pendingRow to NULL in order to enable the reading of the next row, * and finally return the last accepted row: */ Object[] row = pendingRow; pendingRow = null; return row; }catch(InterruptedException ie){ /* If the thread has been interrupted, set this TableSink in a state similar to * when the end of the stream has been reached: */ endReached = true; // Return NULL, meaning the end of the stream has been reached: return null; }finally{ // In all cases, the waiting threads must be freed: notifyAll(); } } /** * Extract an array of {@link TAPColumn} objects. Each corresponds to one of the columns listed in the given table, * and so corresponds to the metadata of a column. * * @param table {@link StarTable} which contains only the columns' information. * * @return The corresponding list of {@link TAPColumn} objects. * * @throws DataReadException If there is a problem while resolving the field datatype (for instance: unknown datatype, a multi-dimensional array is provided, a bad number format for the arraysize). */ protected TAPColumn[] extractColMeta(final StarTable table) throws DataReadException{ // Count the number columns and initialize the array: TAPColumn[] columns = new TAPColumn[table.getColumnCount()]; // Add all columns meta: for(int i = 0; i < columns.length; i++){ // get the field: ColumnInfo colInfo = table.getColumnInfo(i); // get the datatype: String datatype = getAuxDatumValue(colInfo, "Datatype"); // get the arraysize: String arraysize = ColumnInfo.formatShape(colInfo.getShape()); // get the xtype: String xtype = getAuxDatumValue(colInfo, "xtype"); // Resolve the field type: DBType type; try{ type = resolveVotType(datatype, arraysize, xtype).toTAPType(); }catch(TAPException te){ if (te instanceof DataReadException) throw (DataReadException)te; else throw new DataReadException(te.getMessage(), te); } // build the TAPColumn object: TAPColumn col = new TAPColumn(colInfo.getName(), type, colInfo.getDescription(), colInfo.getUnitString(), colInfo.getUCD(), colInfo.getUtype()); col.setPrincipal(false); col.setIndexed(false); col.setStd(false); // append it to the array: columns[i] = col; } return columns; } /** * Extract the specified auxiliary datum value from the given {@link ColumnInfo}. * * @param colInfo {@link ColumnInfo} from which the auxiliary datum must be extracted. * @param auxDatumName The name of the datum to extract. * * @return The extracted value as String. */ protected String getAuxDatumValue(final ColumnInfo colInfo, final String auxDatumName){ DescribedValue value = colInfo.getAuxDatumByName(auxDatumName); return (value != null) ? value.getValue().toString() : null; } } /** Stream containing the VOTable on which this {@link TableIterator} is iterating. */ protected final InputStream input; /** The StarTable consumer which is used to iterate on each row. */ protected final StreamVOTableSink sink; /** Indicate whether the row iteration has already started. */ protected boolean iterationStarted = false; /** Indicate whether the last row has already been reached. */ protected boolean endReached = false; /** The last read row. Column iteration is done on this array. */ protected Object[] row; /** Index of the last read column (=0 just after {@link #nextRow()} and before {@link #nextCol()}, ={@link #nbCol} after the last column has been read). */ protected int indCol = -1; /** Number of columns available according to the metadata. */ protected int nbCol = 0; /** * Build a TableIterator able to read rows and columns inside the given VOTable input stream. * * @param input Input stream over a VOTable document. * * @throws NullPointerException If NULL is given in parameter. * @throws DataReadException If the given VOTable can not be parsed. */ public VOTableIterator(final InputStream input) throws DataReadException{ // An input stream MUST BE provided: if (input == null) throw new NullPointerException("Missing VOTable document input stream over which to iterate!"); this.input = input; try{ // Set the VOTable builder/interpreter: final TableBuilder tb = (new StarTableFactory()).getTableBuilder("votable"); // Build the TableSink to use: sink = new StreamVOTableSink(); // Initiate the stream process: Thread streamThread = new Thread(){ @Override public void run(){ try{ tb.streamStarTable(input, sink, null); }catch(IOException e){ /* Stop the VOTable sink *(otherwise it may still waiting for a Thread notification to wake it up): */ sink.stop(e); } } }; streamThread.start(); }catch(Exception ex){ throw new DataReadException("Unable to parse/read the given VOTable input stream!", ex); } } @Override public TAPColumn[] getMetadata() throws DataReadException{ return sink.getMeta(); } @Override public boolean nextRow() throws DataReadException{ // If no more rows, return false directly: if (endReached) return false; // Fetch the row: row = sink.getRow(); // Reset the column iteration: if (!iterationStarted){ iterationStarted = true; nbCol = sink.getMeta().length; } indCol = 0; // Tells whether there is more rows or not: endReached = (row == null); return !endReached; } @Override public boolean hasNextCol() throws IllegalStateException, DataReadException{ // Check the read state: checkReadState(); // Determine whether the last column has been reached or not: return (indCol < nbCol); } @Override public Object nextCol() throws NoSuchElementException, IllegalStateException, DataReadException{ // Check the read state and ensure there is still at least one column to read: if (!hasNextCol()) throw new NoSuchElementException("No more field to read!"); // Get the column value: return row[indCol++]; } @Override public DBType getColType() throws IllegalStateException, DataReadException{ // Basically check the read state (for rows iteration): checkReadState(); // Check deeper the read state (for columns iteration): if (indCol <= 0) throw new IllegalStateException("No field has yet been read!"); else if (indCol > nbCol) throw new IllegalStateException("All fields have already been read!"); // Return the column type: return sink.getMeta()[indCol - 1].getDatatype(); } @Override public void close() throws DataReadException{ endReached = true; sink.stop(); // input.close(); // in case sink.stop() is not enough to stop the VOTable reading! } /** * <p>Check the row iteration state. That's to say whether:</p> * <ul> * <li>the row iteration has started = the first row has been read = a first call of {@link #nextRow()} has been done</li> * <li>AND the row iteration is not finished = the last row has been read.</li> * </ul> * @throws IllegalStateException */ protected void checkReadState() throws IllegalStateException{ if (!iterationStarted) throw new IllegalStateException("No row has yet been read!"); else if (endReached) throw new IllegalStateException("End of VOTable file already reached!"); } /** * Resolve a VOTable field type by using the datatype, arraysize and xtype strings as specified in a VOTable document. * * @param datatype Attribute value of VOTable corresponding to the datatype. * @param arraysize Attribute value of VOTable corresponding to the arraysize. * @param xtype Attribute value of VOTable corresponding to the xtype. * * @return The resolved VOTable field type. * * @throws DataReadException If a field datatype is unknown or missing. */ public static VotType resolveVotType(final String datatype, final String arraysize, final String xtype) throws DataReadException{ // If no datatype is specified, return immediately a CHAR(*) type: if (datatype == null || datatype.trim().length() == 0) throw new DataReadException("missing VOTable required field: \"datatype\"!"); // Identify the specified datatype: VotDatatype votdatatype; try{ votdatatype = VotDatatype.valueOf(datatype.toUpperCase()); }catch(IllegalArgumentException iae){ throw new DataReadException("unknown field datatype: \"" + datatype + "\""); } // Build the VOTable type: return new VotType(votdatatype, arraysize, xtype); } }