/*
* Geotoolkit.org - An Open Source Java GIS Toolkit
* http://www.geotoolkit.org
*
* (C) 2001-2012, Open Source Geospatial Foundation (OSGeo)
* (C) 2009-2012, Geomatys
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation;
* version 2.1 of the License.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*/
package org.geotoolkit.image.io;
import java.io.*; // Many imports, including some for javadoc only.
import java.net.URI;
import java.net.URL;
import java.net.URLConnection;
import java.nio.charset.Charset;
import java.nio.file.Path;
import java.util.Set;
import java.util.Locale;
import javax.imageio.spi.ImageReaderSpi;
import javax.imageio.stream.ImageInputStream;
import java.nio.channels.ReadableByteChannel;
import org.geotoolkit.io.LineFormat;
import org.geotoolkit.internal.io.LineReader;
import org.geotoolkit.resources.Vocabulary;
/**
* Base class for image readers that expect a {@link BufferedReader} input source.
* "<cite>Text images</cite>" are usually ASCII files where pixels values are actually
* the geophysical values. This base class provides the following conveniences:
* <p>
* <ul>
* <li>Get a {@link BufferedReader} from the input types, which may be a any type documented
* in the {@linkplain StreamImageReader super-class} plus {@link Reader}.</li>
* <li>Get a {@link LineFormat} for parsing a whole line as a record. Subclasses can override
* this method for parsing text files having non-numeric columns (angles, dates, <i>etc.</i>).</li>
* <li>Get the character encoding and the locale (for parsing numbers) from the fields declared
* in the {@linkplain Spi Service Provider}. Alternatively, subclasses can also get more
* control by overriding the {@link #getCharset(InputStream)} method.</li>
* </ul>
*
* @author Martin Desruisseaux (IRD, Geomatys)
* @version 3.08
*
* @see TextImageWriter
*
* @since 3.08 (derived from 1.2)
* @module
*/
public abstract class TextImageReader extends StreamImageReader {
/**
* {@link #input} as a reader, or {@code null} if none.
*
* @see #getReader
*/
private BufferedReader reader;
/**
* Constructs a new image reader.
*
* @param provider The {@link ImageReaderSpi} that is constructing this object, or {@code null}.
*/
protected TextImageReader(final Spi provider) {
super(provider);
}
/**
* Returns the character set to use for decoding the string from the input stream. The default
* implementation returns the {@linkplain Spi#charset character set} specified to the
* {@link Spi} object given to this {@code TextImageReader} constructor. Subclasses can
* override this method if they want to detect the character encoding in some other way.
*
* @param input The input stream.
* @return The character encoding, or {@code null} for the platform default encoding.
* @throws IOException If reading from the input stream failed.
*
* @see Spi#charset
*/
protected Charset getCharset(final InputStream input) throws IOException {
return (originatingProvider instanceof Spi) ? ((Spi) originatingProvider).charset : null;
}
/**
* Returns the locale specified by the provider for the data to be read,
* or {@code null} if unspecified.
*
* @return The locale for the data to be read, or {@code null} if unspecified.
*/
final Locale getDataLocale() {
return (originatingProvider instanceof Spi) ? ((Spi) originatingProvider).locale : null;
}
/**
* Returns the line format to use for parsing every lines in the input stream. The default
* implementation creates a new {@link LineFormat} instance using the locale specified by
* {@link Spi#locale}. Subclasses should override this method if they want more control
* on the parser to be created.
*
* @param imageIndex the index of the image to be queried.
* @return The object to use for parsing lines of text.
* @throws IOException If reading from the input stream failed.
*
* @see Spi#locale
*/
protected LineFormat getLineFormat(final int imageIndex) throws IOException {
final Locale locale = getDataLocale();
if (locale != null) {
return new LineFormat(locale);
}
return new LineFormat();
}
/**
* Returns the pad value for missing data, or {@link Double#NaN} if none. The pad value will
* applies to all columns except the one for
* {@link org.geotoolkit.image.io.plugin.TextRecordImageReader#getColumnX x} and
* {@link org.geotoolkit.image.io.plugin.TextRecordImageReader#getColumnY y} values, if any.
* <p>
* The default implementation returns the pad value specified to the {@link Spi} object given
* to this {@code TextImageReader} constructor. Subclasses can override this method if they
* want to detect the pad value in some other way.
*
* @param imageIndex the index of the image to be queried.
* @return The pad value, or {@link Double#NaN} if none.
* @throws IOException If reading from the input stream failed.
*
* @see Spi#padValue
*/
protected double getPadValue(final int imageIndex) throws IOException {
return (originatingProvider instanceof Spi) ? ((Spi) originatingProvider).padValue : Double.NaN;
}
/**
* Returns the {@linkplain #input input} as an {@linkplain BufferedReader buffered reader}.
* If the input is already a buffered reader, it is returned unchanged. Otherwise this method
* creates a new {@linkplain LineNumberReader line number reader} from various input types
* including {@link File}, {@link URL}, {@link URLConnection}, {@link Reader},
* {@link InputStream} and {@link ImageInputStream}.
* <p>
* This method creates a new {@linkplain BufferedReader reader} only when first invoked.
* All subsequent calls will returns the same instance. Consequently, the returned reader
* should never be closed by the caller. It may be {@linkplain #close closed} automatically
* when {@link #setInput setInput(...)}, {@link #reset() reset()} or {@link #dispose()
* dispose()} methods are invoked.
*
* @return {@link #getInput} as a {@link BufferedReader}.
* @throws IllegalStateException if the {@linkplain #input input} is not set.
* @throws IOException If the input stream can't be created for an other reason.
*
* @see #getInput
* @see #getInputStream
*/
protected BufferedReader getReader() throws IllegalStateException, IOException {
if (reader == null) {
final Object input = getInput();
if (input instanceof BufferedReader) {
reader = (BufferedReader) input;
closeOnReset = null; // We don't own the underlying reader, so don't close it.
} else if (input instanceof Reader) {
reader = new LineReader((Reader) input);
closeOnReset = null; // We don't own the underlying reader, so don't close it.
} else {
final InputStream stream = getInputStream();
reader = new LineReader(getInputStreamReader(stream));
if (closeOnReset == stream) {
closeOnReset = reader;
}
}
}
return reader;
}
/**
* Returns the specified {@link InputStream} as a {@link Reader}.
*/
final Reader getInputStreamReader(final InputStream stream) throws IOException {
final Charset charset = getCharset(stream);
return (charset != null) ? new InputStreamReader(stream, charset) : new InputStreamReader(stream);
}
/**
* Returns {@code true} if the specified line is a comment. This method is invoked automatically
* during a {@link #read read} operation. The default implementation returns {@code true} if the
* line is empty or if the first non-whitespace character is {@code '#'}, and {@code false}
* otherwise. Override this method if comment lines should be determined in a different way.
*
* @param line A line to be parsed.
* @return {@code true} if the line is a comment and should be ignored, or {@code false} if it
* should be parsed.
*/
protected boolean isComment(final String line) {
final int length = line.length();
for (int i=0; i<length; i++) {
final char c = line.charAt(i);
if (!Character.isSpaceChar(c)) {
return (c == '#');
}
}
return true;
}
/**
* Returns a string representation of the current stream position. For example this method
* may returns something like {@code "Line 14 in file HUV18204.asc"}. This method returns
* {@code null} if the stream position is unknown.
*
* @param message An optional message to append to the stream position, or {@code null}
* if none.
* @return A string representation of current stream position.
*/
protected String getPositionString(final String message) {
final String file;
final Object input = getInput();
if (input instanceof File) {
file = ((File) input).getName();
} else if (input instanceof URL) {
file = ((URL) input).getFile();
} else {
file = null;
}
final Integer line = (reader instanceof LineNumberReader) ?
((LineNumberReader) reader).getLineNumber() : null;
final Vocabulary resources = Vocabulary.getResources(getLocale());
final String position;
if (file != null) {
if (line != null) {
position = resources.getString(Vocabulary.Keys.FilePosition_2, file, line);
} else {
position = resources.getString(Vocabulary.Keys.File_1, file);
}
} else if (line != null) {
position = resources.getString(Vocabulary.Keys.Line_1, line);
} else {
position = null;
}
if (position != null) {
if (message != null) {
return position + ": " + message;
} else {
return position;
}
} else {
return message;
}
}
/**
* Closes the reader created by {@link #getReader()}. This method does nothing if
* the reader is the {@linkplain #input input} instance given by the user rather
* than a reader created by this class from a {@link File} or {@link URL} input.
*
* @throws IOException If an error occurred while closing the reader.
*
* @see #closeOnReset
*/
@Override
protected void close() throws IOException {
reader = null;
super.close();
}
/**
* Service provider interface (SPI) for {@link TextImageReader}s. This SPI provides additional
* fields controlling the character encoding ({@link #charset}), the local to use for parsing
* numbers, dates or other objects ({@link #locale}) and the value used in place of missing
* pixel values ({@link #padValue}).
* <p>
* By default the {@code charset} and {@code locale} fields are initialized to {@code null},
* which stands for the platform-dependent character encoding and locale. In addition the
* {@code padValue} is set to {@link Double#NaN}, which means that there is no pad value. If
* a subclass wants to fix the encoding, locale and pad value to some format-specific values,
* it shall specify those values at construction time as in the example below:
*
* {@preformat java
* public Spi() {
* charset = Charset.forName("ISO-8859-1"); // ISO Latin Alphabet No. 1
* locale = Locale.US;
* padValue = -9999;
* }
* }
*
* The table below summarizes the initial values.
* Those values can be modified by subclass constructors.
* <p>
* <table border="1">
* <tr bgcolor="lightblue">
* <th>Field</th>
* <th>Value</th>
* </tr><tr>
* <td> {@link #inputTypes} </td>
* <td> {@link String}, {@link File}, {@link URI}, {@link URL}, {@link URLConnection},
* {@link Reader}, {@link InputStream}, {@link ImageInputStream},
* {@link ReadableByteChannel} </td>
* </tr><tr>
* <td> {@link #suffixes} </td>
* <td> {@code "txt"}, {@code "TXT"},
* {@code "asc"}, {@code "ASC"},
* {@code "dat"}, {@code "DAT"} </td>
* </tr><tr>
* <td> {@link #charset} </td>
* <td> {@code null} (stands for the
* {@linkplain Charset#defaultCharset() platform default}) </td>
* </tr><tr>
* <td> {@link #locale} </td>
* <td> {@code null} (stands for the
* {@linkplain Locale#getDefault() platform default}) </td>
* </tr><tr>
* <td> {@link #padValue} </td>
* <td> {@link Double#NaN} (stands for no pad-value) </td>
* </tr><tr>
* <td colspan="2" align="center">See
* {@linkplain org.geotoolkit.image.io.SpatialImageReader.Spi super-class javadoc}
* for remaining fields</td>
* </tr>
* </table>
*
* @author Martin Desruisseaux (IRD, Geomatys)
* @version 3.07
*
* @see TextImageWriter.Spi
*
* @since 3.08 (derived from 2.4)
* @module
*/
protected abstract static class Spi extends StreamImageReader.Spi {
/**
* List of legal input types for {@link TextImageReader}.
*/
private static final Class<?>[] INPUT_TYPES = new Class<?>[] {
File.class,
Path.class,
URI.class,
URL.class,
URLConnection.class,
Reader.class,
InputStream.class,
ImageInputStream.class,
ReadableByteChannel.class,
String.class // To be interpreted as file path.
};
/**
* Default list of file suffixes. This list is shared with {@link TextImageWriter}.
*/
static final String[] SUFFIXES = new String[] {
"txt", "TXT", "asc", "ASC", "dat", "DAT"
};
/**
* Character encoding, or {@code null} for the default. This field is initially
* {@code null}, which means to use the platform-dependent encoding. Subclasses
* shall set a non-null value if the files to be decoded use some specific character
* encoding.
*
* @see TextImageReader#getCharset(InputStream)
*/
protected Charset charset;
/**
* The locale for numbers or dates parsing. For example {@link Locale#US} means that
* numbers are expected to use a dot for the decimal separator. This field is initially
* {@code null}, which means that the {@linkplain Locale#getDefault() default locale}
* will be used.
*
* @see TextImageReader#getLineFormat(int)
*/
protected Locale locale;
/**
* The pad value, or {@link Double#NaN} if none. Every occurrences of pixel value equals
* to this pad value will be replaced by {@link Double#NaN} during the read operation.
* Note that this replacement doesn't apply to non-pixel values (for example <var>x</var>,
* <var>y</var> coordinates in the format read by
* {@link org.geotoolkit.image.io.plugin.TextRecordImageReader}).
*
* @see TextImageReader#getPadValue(int)
*/
protected double padValue;
/**
* Constructs a quasi-blank {@code TextImageReader.Spi}. This constructor initializes
* the fields as documented in the <a href="#skip-navbar_top">class javadoc</a>. It is
* up to the subclass to initialize all other instance variables in order to provide
* working versions of all methods.
* <p>
* For efficiency reasons, the above fields are initialized to shared arrays. Subclasses
* can assign new arrays, but should not modify the default array content.
*/
protected Spi() {
inputTypes = INPUT_TYPES;
suffixes = SUFFIXES;
padValue = Double.NaN;
}
/**
* Returns {@code true} if the supplied source object appears to be of the format
* supported by this reader. The default implementation tries to parse the first
* few lines up to 2048 characters, as below:
*
* {@preformat java
* return canDecodeInput(source, 2048);
* }
*
* @param source The object (typically an {@link ImageInputStream}) to be decoded.
* @return {@code true} if the source <em>seems</em> readable.
* @throws IOException If an error occurred during reading.
*/
@Override
public boolean canDecodeInput(final Object source) throws IOException {
return canDecodeInput(source, 2048);
}
/**
* Returns {@code true} if the supplied source object appears to be of the format
* supported by this reader. The default implementation tries to parse the first
* few lines up to the specified number of characters, then gives those lines to
* the {@link #isValidHeader(Set)} and {@link #isValidContent(double[][])} methods.
* <p>
* The default implementation is suitable for
* {@link org.geotoolkit.image.io.plugin.TextMatrixImageReader}, i.e.
* it expects only rows for pixel values (no header) and all rows shall have the
* same length. If this behavior needs to be changed, consider overriding the
* {@code isValidHeader} and {@code isValidContent} methods.
*
* @param source The object (typically an {@link ImageInputStream}) to be decoded.
* @param readAheadLimit Maximum number of characters to read. If this amount is reached
* but this method still unable to make a choice, then it conservatively returns
* {@code false}.
* @return {@code true} if the source <em>seems</em> readable.
* @throws IOException If an error occurred during reading.
*/
protected boolean canDecodeInput(final Object source, final int readAheadLimit)
throws IOException
{
final TestReader test = new TestReader(this);
test.setInput(source);
try {
return test.canDecode(readAheadLimit);
} finally {
test.close();
}
}
/**
* Invoked by {@link #canDecodeInput(Object, int)} for determining if the given header is
* likely to be valid. This method receives in argument a {@code keywords} set containing
* the first word of every <cite>header lines</cite> (defined below), converted to upper
* cases using the {@linkplain #locale} defined in this provider.
* <p>
* A <cite>header line</cite> is defined as a line which is not a
* {@linkplain TextImageReader#isComment(String) comment line}, appears before
* the first row of pixel values and where the first non-blank character is a
* {@linkplain Character#isJavaIdentifierStart(char) Java identifier start}.
* <p>
* The default implementation returns {@code true} if the given set is empty.
* In other words, by default no header is allowed in the data file.
*
* @param keywords The first word found in every <cite>header lines</cite>,
* converted to upper-case.
* @return {@code true} if the set of keywords is known to this format.
*
* @since 3.07
*/
protected boolean isValidHeader(final Set<String> keywords) {
return keywords.isEmpty();
}
/**
* Invoked by {@link #canDecodeInput(Object, int)} for determining if the given rows are
* likely to be valid. This method receives in argument a {@code rows} array containing
* the first few lines of data. The number of rows depends on the average row length and
* the {@code readAheadLimit} argument given to {@code canDecodeInput}.
* <p>
* The default implementation returns {@code true} if there is at least one row
* and every row have the same number of columns.
*
* @param rows The first few rows.
* @return {@code true} if the given rows seem to have a valid content.
*/
protected boolean isValidContent(final double[][] rows) {
if (rows.length == 0) {
return false;
}
final int length = rows[0].length;
for (int i=1; i<rows.length; i++) {
if (rows[i].length != length) {
return false;
}
}
return isValidColumnCount(length);
}
/**
* Invoked by {@link #isValidContent(double[][])} for determining if the given number
* of columns is likely to be valid. This method receives in argument the length of
* every rows that were given to {@code isValidContent}, when that length is constant.
* <p>
* The default implementation returns {@code true} if the number of columns is greater
* than zero. Subclasses can override this method if they know the expected number of
* columns.
*
* @param count The number of columns in the first few line of rows.
* @return {@code true} if the given number of columns seems to be valid.
*
* @since 3.07
*/
protected boolean isValidColumnCount(final int count) {
return count > 0;
}
}
}