/* * GeoTools - The Open Source Java GIS Toolkit * http://geotools.org * * (C) 2001-2008, Open Source Geospatial Foundation (OSGeo) * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; * version 2.1 of the License. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. */ package org.geotools.image.io.text; import java.io.*; // Many imports, including some for javadoc only. import java.net.URL; import java.net.URLConnection; import java.nio.charset.Charset; import java.util.Locale; import javax.imageio.spi.ImageReaderSpi; import javax.imageio.stream.ImageInputStream; import org.geotools.data.DataUtilities; import org.geotools.io.LineFormat; import org.geotools.image.io.StreamImageReader; import org.geotools.resources.i18n.Vocabulary; import org.geotools.resources.i18n.VocabularyKeys; /** * Base class for text image decoders. "Text images" are usually ASCII files containing pixel * as geophysical values. This base class provides a convenient way to get {@link BufferedReader} * for reading lines. * <p> * {@code TextImageReader} accepts many input types, including {@link File}, {@link URL}, * {@link Reader}, {@link InputStream} and {@link ImageInputStream}. The {@link Spi} provider * automatically advises those input types. The above cited {@code Spi} provided also provides * a convenient way to control the character encoding, with the {@link Spi#charset charset} field. * Developer can gain yet more control on character encoding by overriding the {@link #getCharset} * method. * * @since 2.4 * * @source $URL$ * @version $Id$ * @author Martin Desruisseaux (IRD) */ public abstract class TextImageReader extends StreamImageReader { /** * {@link #input} as a reader, or {@code null} if none. * * @see #getReader */ private BufferedReader reader; /** * Constructs a new image reader. * * @param provider The provider that is invoking this constructor, or {@code null} if none. */ protected TextImageReader(final ImageReaderSpi provider) { super(provider); } /** * Returns the character set to use for decoding the string from the input stream. The default * implementation returns the {@linkplain Spi#charset character set} specified to the * {@link Spi} object given to this {@code TextImageReader} constructor. Subclasses can * override this method if they want to detect the character encoding in some other way. * * @param input The input stream. * @return The character encoding, or {@code null} for the platform default encoding. * @throws IOException If reading from the input stream failed. * * @see Spi#charset */ protected Charset getCharset(final InputStream input) throws IOException { return (originatingProvider instanceof Spi) ? ((Spi)originatingProvider).charset : null; } /** * Returns the line format to use for parsing every lines in the input stream. The default * implementation creates a new {@link LineFormat} instance using the locale specified by * {@link Spi#locale}. Subclasses should override this method if they want more control * on the parser to be created. * * @param imageIndex the index of the image to be queried. * @throws IOException If reading from the input stream failed. * * @see Spi#locale */ protected LineFormat getLineFormat(final int imageIndex) throws IOException { if (originatingProvider instanceof Spi) { final Locale locale = ((Spi) originatingProvider).locale; if (locale != null) { return new LineFormat(locale); } } return new LineFormat(); } /** * Returns the pad value for missing data, or {@link Double#NaN} if none. The pad value will * applies to all columns except the one for {@link TextRecordImageReader#getColumnX x} and * {@link TextRecordImageReader#getColumnY y} values, if any. * <p> * The default implementation returns the pad value specified to the {@link Spi} object given * to this {@code TextImageReader} constructor. Subclasses can override this method if they * want to detect the pad value in some other way. * * @param imageIndex the index of the image to be queried. * @throws IOException If reading from the input stream failed. * * @see Spi#padValue * * @deprecated Should be specified in metadata instead, and implementations should use * {@code SampleConverter}. */ protected double getPadValue(final int imageIndex) throws IOException { return (originatingProvider instanceof Spi) ? ((Spi) originatingProvider).padValue : Double.NaN; } /** * Returns the {@linkplain #input input} as an {@linkplain BufferedReader buffered reader}. * If the input is already a buffered reader, it is returned unchanged. Otherwise this method * creates a new {@linkplain LineNumberReader line number reader} from various input types * including {@link File}, {@link URL}, {@link URLConnection}, {@link Reader}, * {@link InputStream} and {@link ImageInputStream}. * <p> * This method creates a new {@linkplain BufferedReader reader} only when first invoked. * All subsequent calls will returns the same instance. Consequently, the returned reader * should never be closed by the caller. It may be {@linkplain #close closed} automatically * when {@link #setInput setInput(...)}, {@link #reset() reset()} or {@link #dispose() * dispose()} methods are invoked. * * @return {@link #getInput} as a {@link BufferedReader}. * @throws IllegalStateException if the {@linkplain #input input} is not set. * @throws IOException If the input stream can't be created for an other reason. * * @see #getInput * @see #getInputStream */ protected BufferedReader getReader() throws IllegalStateException, IOException { if (reader == null) { final Object input = getInput(); if (input instanceof BufferedReader) { reader = (BufferedReader) input; closeOnReset = null; // We don't own the underlying reader, so don't close it. } else if (input instanceof Reader) { reader = new LineReader((Reader) input); closeOnReset = null; // We don't own the underlying reader, so don't close it. } else { final InputStream stream = getInputStream(); reader = new LineReader(getInputStreamReader(stream)); if (closeOnReset == stream) { closeOnReset = reader; } } } return reader; } /** * Returns the specified {@link InputStream} as a {@link Reader}. */ final Reader getInputStreamReader(final InputStream stream) throws IOException { final Charset charset = getCharset(stream); return (charset != null) ? new InputStreamReader(stream, charset) : new InputStreamReader(stream); } /** * Returns {@code true} if the specified line is a comment. This method is invoked automatically * during a {@link #read read} operation. The default implementation returns {@code true} if the * line is empty or if the first non-whitespace character is {@code '#'}, and {@code false} * otherwise. Override this method if comment lines should be determined in a different way. * * @param line A line to be parsed. * @return {@code true} if the line is a comment and should be ignored, or {@code false} if it * should be parsed. */ protected boolean isComment(final String line) { final int length = line.length(); for (int i=0; i<length; i++) { final char c = line.charAt(i); if (!Character.isSpaceChar(c)) { return (c == '#'); } } return true; } /** * Retourne une approximation du nombre d'octets du flot occupés par les * images {@code fromImage} inclusivement jusqu'à {@code toImage} * exclusivement. L'implémentation par défaut calcule cette longueur en * supposant que toutes les images se divisent la longueur totale du flot * en parts égales. * * @param fromImage Index de la première image à prendre en compte. * @param toImage Index suivant celui de la dernière image à prendre en * compte, ou -1 pour prendre en compte toutes les images * restantes jusqu'à la fin du flot. * @return Le nombre d'octets occupés par les images spécifiés, ou -1 si * cette longueur n'a pas pu être calculée. Si le calcul précis de * cette longueur serait prohibitif, cette méthode est autorisée à * retourner une simple approximation ou même à retourner la longueur * totale du flot. * @throws IOException si une erreur est survenue lors de la lecture du flot. */ final long getStreamLength(final int fromImage, int toImage) throws IOException { long length = getStreamLength(); if (length > 0) { final int numImages = getNumImages(false); if (numImages > 0) { if (toImage == -1) { toImage = numImages; } if (fromImage<0 || fromImage>numImages) { throw new IndexOutOfBoundsException(String.valueOf(fromImage)); } if (toImage<0 || toImage>numImages) { throw new IndexOutOfBoundsException(String.valueOf( toImage)); } if (fromImage > toImage) { throw new IllegalArgumentException(); } return length * (toImage-fromImage) / numImages; } } return length; } /** * Retourne la position du flot spécifié, ou {@code -1} si cette position est * inconnue. Note: la position retournée est <strong>approximative</strong>. * Elle est utile pour afficher un rapport des progrès, mais sans plus. * * @param reader Flot dont on veut connaître la position. * @return Position approximative du flot, ou {@code -1} * si cette position n'a pas pu être obtenue. * @throws IOException si l'opération a échouée. */ static long getStreamPosition(final Reader reader) throws IOException { return (reader instanceof LineReader) ? ((LineReader) reader).getPosition() : -1; } /** * Returns a string representation of the current stream position. For example this method * may returns something like {@code "Line 14 in file HUV18204.asc"}. This method returns * {@code null} if the stream position is unknown. * * @param message An optional message to append to the stream position, or {@code null} * if none. */ protected String getPositionString(final String message) { final String file; final Object input = getInput(); if (input instanceof File) { file = ((File) input).getName(); } else if (input instanceof URL) { file = DataUtilities.urlToFile((URL) input).getPath(); } else { file = null; } final Integer line = (reader instanceof LineNumberReader) ? ((LineNumberReader) reader).getLineNumber() : null; final Vocabulary resources = Vocabulary.getResources(null); final String position; if (file != null) { if (line != null) { position = resources.getString(VocabularyKeys.FILE_POSITION_$2, file, line); } else { position = resources.getString(VocabularyKeys.FILE_$1, file); } } else if (line != null) { position = resources.getString(VocabularyKeys.LINE_$1, line); } else { position = null; } if (position != null) { if (message != null) { return position + ": " + message; } else { return position; } } else { return message; } } /** * Closes the reader created by {@link #getReader()}. This method does nothing if * the reader is the {@linkplain #input input} instance given by the user rather * than a reader created by this class from a {@link File} or {@link URL} input. * * @see #closeOnReset */ @Override protected void close() throws IOException { reader = null; super.close(); } /** * Service provider interface (SPI) for {@link TextImageReader}s. This * SPI provides a convenient way to control the {@link TextImageReader} * character encoding: the {@link #charset} field. For example, many * {@code Spi} subclasses will put the following line in their * constructor: * * <blockquote><pre> * {@link #charset} = Charset.forName("ISO-LATIN-1"); // ISO Latin Alphabet No. 1 (ISO-8859-1) * </pre></blockquote> * * @since 2.4 * @source $URL$ * @version $Id$ * @author Martin Desruisseaux (IRD) */ public static abstract class Spi extends StreamImageReader.Spi { /** * List of legal input types for {@link TextImageReader}. */ private static final Class[] INPUT_TYPES = new Class[] { File.class, URL.class, URLConnection.class, Reader.class, InputStream.class, ImageInputStream.class, String.class // To be interpreted as file path. }; /** * Default list of file extensions. */ private static final String[] EXTENSIONS = new String[] { "txt", "TXT", "asc", "ASC", "dat", "DAT" }; /** * Character encoding, or {@code null} for the default. This field is initially * {@code null}. A value shall be set by subclasses if the files to be decoded * use some specific character encoding. * * @see TextImageReader#getCharset */ protected Charset charset; /** * The locale for numbers or dates parsing. For example {@link Locale#US} means that * numbers are expected to use dot as decimal separator. This field is initially * {@code null}, which means that default locale should be used. * * @see TextImageReader#getLineFormat * @see TextRecordImageReader#parseLine */ protected Locale locale; /** * The pad value, or {@link Double#NaN} if none. Every occurences of pixel value equals * to this pad value will be replaced by {@link Double#NaN} during read operation. Note * that this replacement doesn't apply to non-pixel values (for example <var>x</var>, * <var>y</var> coordinates in some file format). * * @see TextImageReader#getPadValue * @see TextRecordImageReader#parseLine */ protected double padValue; /** * Constructs a quasi-blank {@code TextImageReader.Spi}. It is up to the subclass to * initialize instance variables in order to provide working versions of all methods. * This constructor provides the following defaults: * * <ul> * <li>{@link #inputTypes} = {{@link File}, {@link URL}, {@link URLConnection}, * {@link Reader}, {@link InputStream}, {@link ImageInputStream}, {@link String}}</li> * * <li>{@link #suffixes} = {{@code "txt"}, {@code "asc"}, {@code "dat"}} * (lowercases and uppercases)</li> * * <li>{@link #padValue} = {@link Double#NaN}</li> * </ul> * * For efficienty reasons, the above fields are initialized to shared arrays. Subclasses * can assign new arrays, but should not modify the default array content. */ public Spi() { inputTypes = INPUT_TYPES; suffixes = EXTENSIONS; padValue = Double.NaN; } /** * Returns {@code true} if the supplied source object appears to be of the format * supported by this reader. The default implementation tries to parse the first * few lines up to 1024 characters. * * @param source The object (typically an {@link ImageInputStream}) to be decoded. * @return {@code true} if the source <em>seems</em> readable. * @throws IOException If an error occured during reading. */ public boolean canDecodeInput(final Object source) throws IOException { return canDecodeInput(source, 1024); } /** * Returns {@code true} if the supplied source object appears to be of the format * supported by this reader. The default implementation tries to parse the first * few lines up to the specified number of characters. * * @param source The object (typically an {@link ImageInputStream}) to be decoded. * @param readAheadLimit Maximum number of characters to read. If this amount is reached * but this method still unable to make a choice, then it conservatively returns * {@code false}. * @return {@code true} if the source <em>seems</em> readable. * @throws IOException If an error occured during reading. */ protected boolean canDecodeInput(final Object source, final int readAheadLimit) throws IOException { final TestReader test = new TestReader(this); test.setInput(source); final boolean result = test.canDecode(readAheadLimit); test.close(); return result; } /** * Returns {@code true} if the content of the first few rows seems valid, or {@code false} * otherwise. The number of rows depends on the row length and the {@code readAheadLimit} * argument given to {@link #canDecodeInput(Object,int) canDecodeInput}. * <p> * The default implementation returns {@code true} if there is at least one row * and every row have the same number of columns. */ protected boolean isValidContent(final double[][] rows) { if (rows.length == 0) { return false; } final int length = rows[0].length; for (int i=1; i<rows.length; i++) { if (rows[i].length != length) { return false; } } return isValidColumnCount(length); } /** * Returns {@code true} if the specified row length is valid. If unsure, this methods * can conservatively returns {@code false}. The default implementation always returns * {@code true}. */ boolean isValidColumnCount(final int count) { return true; } } }