/* * Geotoolkit.org - An Open Source Java GIS Toolkit * http://www.geotoolkit.org * * (C) 2001-2012, Open Source Geospatial Foundation (OSGeo) * (C) 2009-2012, Geomatys * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; * version 2.1 of the License. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. */ package org.geotoolkit.image.io; import java.io.*; // Many imports, including some for javadoc only. import java.net.URI; import java.net.URL; import java.net.URLConnection; import java.nio.charset.Charset; import java.nio.file.Path; import java.util.Set; import java.util.Locale; import javax.imageio.spi.ImageReaderSpi; import javax.imageio.stream.ImageInputStream; import java.nio.channels.ReadableByteChannel; import org.geotoolkit.io.LineFormat; import org.geotoolkit.internal.io.LineReader; import org.geotoolkit.resources.Vocabulary; /** * Base class for image readers that expect a {@link BufferedReader} input source. * "<cite>Text images</cite>" are usually ASCII files where pixels values are actually * the geophysical values. This base class provides the following conveniences: * <p> * <ul> * <li>Get a {@link BufferedReader} from the input types, which may be a any type documented * in the {@linkplain StreamImageReader super-class} plus {@link Reader}.</li> * <li>Get a {@link LineFormat} for parsing a whole line as a record. Subclasses can override * this method for parsing text files having non-numeric columns (angles, dates, <i>etc.</i>).</li> * <li>Get the character encoding and the locale (for parsing numbers) from the fields declared * in the {@linkplain Spi Service Provider}. Alternatively, subclasses can also get more * control by overriding the {@link #getCharset(InputStream)} method.</li> * </ul> * * @author Martin Desruisseaux (IRD, Geomatys) * @version 3.08 * * @see TextImageWriter * * @since 3.08 (derived from 1.2) * @module */ public abstract class TextImageReader extends StreamImageReader { /** * {@link #input} as a reader, or {@code null} if none. * * @see #getReader */ private BufferedReader reader; /** * Constructs a new image reader. * * @param provider The {@link ImageReaderSpi} that is constructing this object, or {@code null}. */ protected TextImageReader(final Spi provider) { super(provider); } /** * Returns the character set to use for decoding the string from the input stream. The default * implementation returns the {@linkplain Spi#charset character set} specified to the * {@link Spi} object given to this {@code TextImageReader} constructor. Subclasses can * override this method if they want to detect the character encoding in some other way. * * @param input The input stream. * @return The character encoding, or {@code null} for the platform default encoding. * @throws IOException If reading from the input stream failed. * * @see Spi#charset */ protected Charset getCharset(final InputStream input) throws IOException { return (originatingProvider instanceof Spi) ? ((Spi) originatingProvider).charset : null; } /** * Returns the locale specified by the provider for the data to be read, * or {@code null} if unspecified. * * @return The locale for the data to be read, or {@code null} if unspecified. */ final Locale getDataLocale() { return (originatingProvider instanceof Spi) ? ((Spi) originatingProvider).locale : null; } /** * Returns the line format to use for parsing every lines in the input stream. The default * implementation creates a new {@link LineFormat} instance using the locale specified by * {@link Spi#locale}. Subclasses should override this method if they want more control * on the parser to be created. * * @param imageIndex the index of the image to be queried. * @return The object to use for parsing lines of text. * @throws IOException If reading from the input stream failed. * * @see Spi#locale */ protected LineFormat getLineFormat(final int imageIndex) throws IOException { final Locale locale = getDataLocale(); if (locale != null) { return new LineFormat(locale); } return new LineFormat(); } /** * Returns the pad value for missing data, or {@link Double#NaN} if none. The pad value will * applies to all columns except the one for * {@link org.geotoolkit.image.io.plugin.TextRecordImageReader#getColumnX x} and * {@link org.geotoolkit.image.io.plugin.TextRecordImageReader#getColumnY y} values, if any. * <p> * The default implementation returns the pad value specified to the {@link Spi} object given * to this {@code TextImageReader} constructor. Subclasses can override this method if they * want to detect the pad value in some other way. * * @param imageIndex the index of the image to be queried. * @return The pad value, or {@link Double#NaN} if none. * @throws IOException If reading from the input stream failed. * * @see Spi#padValue */ protected double getPadValue(final int imageIndex) throws IOException { return (originatingProvider instanceof Spi) ? ((Spi) originatingProvider).padValue : Double.NaN; } /** * Returns the {@linkplain #input input} as an {@linkplain BufferedReader buffered reader}. * If the input is already a buffered reader, it is returned unchanged. Otherwise this method * creates a new {@linkplain LineNumberReader line number reader} from various input types * including {@link File}, {@link URL}, {@link URLConnection}, {@link Reader}, * {@link InputStream} and {@link ImageInputStream}. * <p> * This method creates a new {@linkplain BufferedReader reader} only when first invoked. * All subsequent calls will returns the same instance. Consequently, the returned reader * should never be closed by the caller. It may be {@linkplain #close closed} automatically * when {@link #setInput setInput(...)}, {@link #reset() reset()} or {@link #dispose() * dispose()} methods are invoked. * * @return {@link #getInput} as a {@link BufferedReader}. * @throws IllegalStateException if the {@linkplain #input input} is not set. * @throws IOException If the input stream can't be created for an other reason. * * @see #getInput * @see #getInputStream */ protected BufferedReader getReader() throws IllegalStateException, IOException { if (reader == null) { final Object input = getInput(); if (input instanceof BufferedReader) { reader = (BufferedReader) input; closeOnReset = null; // We don't own the underlying reader, so don't close it. } else if (input instanceof Reader) { reader = new LineReader((Reader) input); closeOnReset = null; // We don't own the underlying reader, so don't close it. } else { final InputStream stream = getInputStream(); reader = new LineReader(getInputStreamReader(stream)); if (closeOnReset == stream) { closeOnReset = reader; } } } return reader; } /** * Returns the specified {@link InputStream} as a {@link Reader}. */ final Reader getInputStreamReader(final InputStream stream) throws IOException { final Charset charset = getCharset(stream); return (charset != null) ? new InputStreamReader(stream, charset) : new InputStreamReader(stream); } /** * Returns {@code true} if the specified line is a comment. This method is invoked automatically * during a {@link #read read} operation. The default implementation returns {@code true} if the * line is empty or if the first non-whitespace character is {@code '#'}, and {@code false} * otherwise. Override this method if comment lines should be determined in a different way. * * @param line A line to be parsed. * @return {@code true} if the line is a comment and should be ignored, or {@code false} if it * should be parsed. */ protected boolean isComment(final String line) { final int length = line.length(); for (int i=0; i<length; i++) { final char c = line.charAt(i); if (!Character.isSpaceChar(c)) { return (c == '#'); } } return true; } /** * Returns a string representation of the current stream position. For example this method * may returns something like {@code "Line 14 in file HUV18204.asc"}. This method returns * {@code null} if the stream position is unknown. * * @param message An optional message to append to the stream position, or {@code null} * if none. * @return A string representation of current stream position. */ protected String getPositionString(final String message) { final String file; final Object input = getInput(); if (input instanceof File) { file = ((File) input).getName(); } else if (input instanceof URL) { file = ((URL) input).getFile(); } else { file = null; } final Integer line = (reader instanceof LineNumberReader) ? ((LineNumberReader) reader).getLineNumber() : null; final Vocabulary resources = Vocabulary.getResources(getLocale()); final String position; if (file != null) { if (line != null) { position = resources.getString(Vocabulary.Keys.FilePosition_2, file, line); } else { position = resources.getString(Vocabulary.Keys.File_1, file); } } else if (line != null) { position = resources.getString(Vocabulary.Keys.Line_1, line); } else { position = null; } if (position != null) { if (message != null) { return position + ": " + message; } else { return position; } } else { return message; } } /** * Closes the reader created by {@link #getReader()}. This method does nothing if * the reader is the {@linkplain #input input} instance given by the user rather * than a reader created by this class from a {@link File} or {@link URL} input. * * @throws IOException If an error occurred while closing the reader. * * @see #closeOnReset */ @Override protected void close() throws IOException { reader = null; super.close(); } /** * Service provider interface (SPI) for {@link TextImageReader}s. This SPI provides additional * fields controlling the character encoding ({@link #charset}), the local to use for parsing * numbers, dates or other objects ({@link #locale}) and the value used in place of missing * pixel values ({@link #padValue}). * <p> * By default the {@code charset} and {@code locale} fields are initialized to {@code null}, * which stands for the platform-dependent character encoding and locale. In addition the * {@code padValue} is set to {@link Double#NaN}, which means that there is no pad value. If * a subclass wants to fix the encoding, locale and pad value to some format-specific values, * it shall specify those values at construction time as in the example below: * * {@preformat java * public Spi() { * charset = Charset.forName("ISO-8859-1"); // ISO Latin Alphabet No. 1 * locale = Locale.US; * padValue = -9999; * } * } * * The table below summarizes the initial values. * Those values can be modified by subclass constructors. * <p> * <table border="1"> * <tr bgcolor="lightblue"> * <th>Field</th> * <th>Value</th> * </tr><tr> * <td> {@link #inputTypes} </td> * <td> {@link String}, {@link File}, {@link URI}, {@link URL}, {@link URLConnection}, * {@link Reader}, {@link InputStream}, {@link ImageInputStream}, * {@link ReadableByteChannel} </td> * </tr><tr> * <td> {@link #suffixes} </td> * <td> {@code "txt"}, {@code "TXT"}, * {@code "asc"}, {@code "ASC"}, * {@code "dat"}, {@code "DAT"} </td> * </tr><tr> * <td> {@link #charset} </td> * <td> {@code null} (stands for the * {@linkplain Charset#defaultCharset() platform default}) </td> * </tr><tr> * <td> {@link #locale} </td> * <td> {@code null} (stands for the * {@linkplain Locale#getDefault() platform default}) </td> * </tr><tr> * <td> {@link #padValue} </td> * <td> {@link Double#NaN} (stands for no pad-value) </td> * </tr><tr> * <td colspan="2" align="center">See * {@linkplain org.geotoolkit.image.io.SpatialImageReader.Spi super-class javadoc} * for remaining fields</td> * </tr> * </table> * * @author Martin Desruisseaux (IRD, Geomatys) * @version 3.07 * * @see TextImageWriter.Spi * * @since 3.08 (derived from 2.4) * @module */ protected abstract static class Spi extends StreamImageReader.Spi { /** * List of legal input types for {@link TextImageReader}. */ private static final Class<?>[] INPUT_TYPES = new Class<?>[] { File.class, Path.class, URI.class, URL.class, URLConnection.class, Reader.class, InputStream.class, ImageInputStream.class, ReadableByteChannel.class, String.class // To be interpreted as file path. }; /** * Default list of file suffixes. This list is shared with {@link TextImageWriter}. */ static final String[] SUFFIXES = new String[] { "txt", "TXT", "asc", "ASC", "dat", "DAT" }; /** * Character encoding, or {@code null} for the default. This field is initially * {@code null}, which means to use the platform-dependent encoding. Subclasses * shall set a non-null value if the files to be decoded use some specific character * encoding. * * @see TextImageReader#getCharset(InputStream) */ protected Charset charset; /** * The locale for numbers or dates parsing. For example {@link Locale#US} means that * numbers are expected to use a dot for the decimal separator. This field is initially * {@code null}, which means that the {@linkplain Locale#getDefault() default locale} * will be used. * * @see TextImageReader#getLineFormat(int) */ protected Locale locale; /** * The pad value, or {@link Double#NaN} if none. Every occurrences of pixel value equals * to this pad value will be replaced by {@link Double#NaN} during the read operation. * Note that this replacement doesn't apply to non-pixel values (for example <var>x</var>, * <var>y</var> coordinates in the format read by * {@link org.geotoolkit.image.io.plugin.TextRecordImageReader}). * * @see TextImageReader#getPadValue(int) */ protected double padValue; /** * Constructs a quasi-blank {@code TextImageReader.Spi}. This constructor initializes * the fields as documented in the <a href="#skip-navbar_top">class javadoc</a>. It is * up to the subclass to initialize all other instance variables in order to provide * working versions of all methods. * <p> * For efficiency reasons, the above fields are initialized to shared arrays. Subclasses * can assign new arrays, but should not modify the default array content. */ protected Spi() { inputTypes = INPUT_TYPES; suffixes = SUFFIXES; padValue = Double.NaN; } /** * Returns {@code true} if the supplied source object appears to be of the format * supported by this reader. The default implementation tries to parse the first * few lines up to 2048 characters, as below: * * {@preformat java * return canDecodeInput(source, 2048); * } * * @param source The object (typically an {@link ImageInputStream}) to be decoded. * @return {@code true} if the source <em>seems</em> readable. * @throws IOException If an error occurred during reading. */ @Override public boolean canDecodeInput(final Object source) throws IOException { return canDecodeInput(source, 2048); } /** * Returns {@code true} if the supplied source object appears to be of the format * supported by this reader. The default implementation tries to parse the first * few lines up to the specified number of characters, then gives those lines to * the {@link #isValidHeader(Set)} and {@link #isValidContent(double[][])} methods. * <p> * The default implementation is suitable for * {@link org.geotoolkit.image.io.plugin.TextMatrixImageReader}, i.e. * it expects only rows for pixel values (no header) and all rows shall have the * same length. If this behavior needs to be changed, consider overriding the * {@code isValidHeader} and {@code isValidContent} methods. * * @param source The object (typically an {@link ImageInputStream}) to be decoded. * @param readAheadLimit Maximum number of characters to read. If this amount is reached * but this method still unable to make a choice, then it conservatively returns * {@code false}. * @return {@code true} if the source <em>seems</em> readable. * @throws IOException If an error occurred during reading. */ protected boolean canDecodeInput(final Object source, final int readAheadLimit) throws IOException { final TestReader test = new TestReader(this); test.setInput(source); try { return test.canDecode(readAheadLimit); } finally { test.close(); } } /** * Invoked by {@link #canDecodeInput(Object, int)} for determining if the given header is * likely to be valid. This method receives in argument a {@code keywords} set containing * the first word of every <cite>header lines</cite> (defined below), converted to upper * cases using the {@linkplain #locale} defined in this provider. * <p> * A <cite>header line</cite> is defined as a line which is not a * {@linkplain TextImageReader#isComment(String) comment line}, appears before * the first row of pixel values and where the first non-blank character is a * {@linkplain Character#isJavaIdentifierStart(char) Java identifier start}. * <p> * The default implementation returns {@code true} if the given set is empty. * In other words, by default no header is allowed in the data file. * * @param keywords The first word found in every <cite>header lines</cite>, * converted to upper-case. * @return {@code true} if the set of keywords is known to this format. * * @since 3.07 */ protected boolean isValidHeader(final Set<String> keywords) { return keywords.isEmpty(); } /** * Invoked by {@link #canDecodeInput(Object, int)} for determining if the given rows are * likely to be valid. This method receives in argument a {@code rows} array containing * the first few lines of data. The number of rows depends on the average row length and * the {@code readAheadLimit} argument given to {@code canDecodeInput}. * <p> * The default implementation returns {@code true} if there is at least one row * and every row have the same number of columns. * * @param rows The first few rows. * @return {@code true} if the given rows seem to have a valid content. */ protected boolean isValidContent(final double[][] rows) { if (rows.length == 0) { return false; } final int length = rows[0].length; for (int i=1; i<rows.length; i++) { if (rows[i].length != length) { return false; } } return isValidColumnCount(length); } /** * Invoked by {@link #isValidContent(double[][])} for determining if the given number * of columns is likely to be valid. This method receives in argument the length of * every rows that were given to {@code isValidContent}, when that length is constant. * <p> * The default implementation returns {@code true} if the number of columns is greater * than zero. Subclasses can override this method if they know the expected number of * columns. * * @param count The number of columns in the first few line of rows. * @return {@code true} if the given number of columns seems to be valid. * * @since 3.07 */ protected boolean isValidColumnCount(final int count) { return count > 0; } } }