DbaseFileReader.java example

Explorer
geotools-old-master
/*
 *    GeoTools - The Open Source Java GIS Toolkit
 *    http://geotools.org
 *
 *    (C) 2002-2008, Open Source Geospatial Foundation (OSGeo)
 *
 *    This library is free software; you can redistribute it and/or
 *    modify it under the terms of the GNU Lesser General Public
 *    License as published by the Free Software Foundation;
 *    version 2.1 of the License.
 *
 *    This library is distributed in the hope that it will be useful,
 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 *    Lesser General Public License for more details.
 *
 *    This file is based on an origional contained in the GISToolkit project:
 *    http://gistoolkit.sourceforge.net/
 */
package org.geotools.data.shapefile.dbf;

import java.io.BufferedInputStream;
import java.io.ByteArrayInputStream;
import java.io.DataInputStream;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import java.nio.channels.FileChannel;
import java.nio.channels.FileChannel.MapMode;
import java.nio.channels.ReadableByteChannel;
import java.nio.charset.Charset;
import java.util.Calendar;
import java.util.Locale;
import java.util.TimeZone;

import org.geotools.data.shapefile.FileReader;
import org.geotools.data.shapefile.ShpFileType;
import org.geotools.data.shapefile.ShpFiles;
import org.geotools.data.shapefile.StreamLogging;
import org.geotools.resources.NIOUtilities;

/**
 * A DbaseFileReader is used to read a dbase III format file. <br>
 * The general use of this class is: <CODE><PRE>
 * 
 * FileChannel in = new FileInputStream("thefile.dbf").getChannel();
 * DbaseFileReader r = new DbaseFileReader( in ) Object[] fields = new
 * Object[r.getHeader().getNumFields()]; while (r.hasNext()) {
 * r.readEntry(fields); // do stuff } r.close();
 * 
 * </PRE></CODE> For consumers who wish to be a bit more selective with their reading
 * of rows, the Row object has been added. The semantics are the same as using
 * the readEntry method, but remember that the Row object is always the same.
 * The values are parsed as they are read, so it pays to copy them out (as each
 * call to Row.read() will result in an expensive String parse). <br>
 * <b>EACH CALL TO readEntry OR readRow ADVANCES THE FILE!</b><br>
 * An example of using the Row method of reading: <CODE><PRE>
 * 
 * FileChannel in = new FileInputStream("thefile.dbf").getChannel();
 * DbaseFileReader r = new DbaseFileReader( in ) int fields =
 * r.getHeader().getNumFields(); while (r.hasNext()) { DbaseFileReader.Row row =
 * r.readRow(); for (int i = 0; i < fields; i++) { // do stuff Foo.bar(
 * row.read(i) ); } } r.close();
 * 
 * </PRE></CODE>
 * 
 * @author Ian Schneider, Andrea Aaime
 *
 * @source $URL$
 */
public class DbaseFileReader implements FileReader {

    public final class Row {
        public Object read(final int column) throws IOException {
            final int offset = fieldOffsets[column];
            return readObject(offset, column);
        }

        public String toString() {
            final StringBuffer ret = new StringBuffer("DBF Row - ");
            for (int i = 0; i < header.getNumFields(); i++) {
                ret.append(header.getFieldName(i)).append(": \"");
                try {
                    ret.append(this.read(i));
                } catch (final IOException ioe) {
                    ret.append(ioe.getMessage());
                }
                ret.append("\" ");
            }
            return ret.toString();
        }
    }

    DbaseFileHeader header;

    ByteBuffer buffer;

    ReadableByteChannel channel;

    byte[] bytes;

    char[] fieldTypes;

    int[] fieldLengths;
    
    int[] fieldOffsets;

    int cnt = 1;

    Row row;

    protected boolean useMemoryMappedBuffer;

    protected boolean randomAccessEnabled;

    protected long currentOffset = 0;
    private final StreamLogging streamLogger = new StreamLogging("Dbase File Reader");

    private Charset stringCharset;
    
    private boolean oneBytePerChar;

    private Calendar calendar;

    private final long MILLISECS_PER_DAY = 24*60*60*1000;

    
    /**
     * Creates a new instance of DBaseFileReader
     * 
     * @param shapefileFiles.
     *                The readable channel to use.
     * @throws IOException
     *                 If an error occurs while initializing.
     */
    public DbaseFileReader(final ShpFiles shapefileFiles,
            final boolean useMemoryMappedBuffer, final Charset charset, final TimeZone timeZone) throws IOException {
        final ReadableByteChannel dbfChannel = shapefileFiles.getReadChannel(ShpFileType.DBF, this);
        init(dbfChannel, useMemoryMappedBuffer, charset, timeZone);
    }
    
    public DbaseFileReader(final ShpFiles shapefileFiles,
            final boolean useMemoryMappedBuffer, final Charset charset) throws IOException {
        final ReadableByteChannel dbfChannel = shapefileFiles.getReadChannel(ShpFileType.DBF, this);
        init(dbfChannel, useMemoryMappedBuffer, charset, null);
    }
    
    public DbaseFileReader(final ReadableByteChannel readChannel, final boolean useMemoryMappedBuffer, 
    		final Charset charset) throws IOException {
        init(readChannel, useMemoryMappedBuffer, charset, null);
    }

    public DbaseFileReader(final ReadableByteChannel readChannel, final boolean useMemoryMappedBuffer, 
    		final Charset charset, final TimeZone timeZone) throws IOException {
        init(readChannel, useMemoryMappedBuffer, charset, timeZone);
    }

    private void init(final ReadableByteChannel dbfChannel, final boolean useMemoryMappedBuffer,
            final Charset charset, final TimeZone timeZone) throws IOException {
        this.channel = dbfChannel;
        this.stringCharset = charset == null ? Charset.defaultCharset() : charset;
        TimeZone calTimeZone = timeZone == null ? TimeZone.getDefault() : timeZone;
        this.calendar = Calendar.getInstance(calTimeZone, Locale.US);

        this.useMemoryMappedBuffer = useMemoryMappedBuffer;
        this.randomAccessEnabled = (channel instanceof FileChannel);
        streamLogger.open();
        header = new DbaseFileHeader();

        // create the ByteBuffer
        // if we have a FileChannel, lets map it
        if (channel instanceof FileChannel && this.useMemoryMappedBuffer) {
            final FileChannel fc = (FileChannel) channel;
            if((fc.size() - fc.position()) < (long) Integer.MAX_VALUE) {
                buffer = fc.map(FileChannel.MapMode.READ_ONLY, 0, fc.size());
            } else {
                buffer = fc.map(FileChannel.MapMode.READ_ONLY, 0, Integer.MAX_VALUE);
            }
            buffer.position((int) fc.position());
            header.readHeader(buffer);
            
            this.currentOffset = 0;
        } else {
            // Force useMemoryMappedBuffer to false
            this.useMemoryMappedBuffer = false;
            header.readHeader(channel, charset);
            // Some other type of channel
            // size the buffer so that we can read 4 records at a time (and make the buffer cacheable)
            //int size = (int) Math.pow(2, Math.ceil(Math.log(header.getRecordLength()) / Math.log(2)));
            buffer = NIOUtilities.allocate(header.getRecordLength());
            // fill it and reset
            fill(buffer, channel);
            buffer.flip();
            this.currentOffset = header.getHeaderLength();
        }
        
        // The entire file is in little endian
        buffer.order(ByteOrder.LITTLE_ENDIAN);
        
        // Set up some buffers and lookups for efficiency
        fieldTypes = new char[header.getNumFields()];
        fieldLengths = new int[header.getNumFields()];
        fieldOffsets = new int[header.getNumFields()];
        for (int i = 0, ii = header.getNumFields(); i < ii; i++) {
            fieldTypes[i] = header.getFieldType(i);
            fieldLengths[i] = header.getFieldLength(i);
            if(i > 0)
                fieldOffsets[i] = fieldOffsets[i -1] + header.getFieldLength(i - 1);
        }
        bytes = new byte[header.getRecordLength() - 1];
        

        // check if we working with a latin-1 char Charset
        final String cname = stringCharset.name();
        oneBytePerChar = "ISO-8859-1".equals(cname) || "US-ASCII".equals(cname);
        
        row = new Row();
    }

    protected int fill(final ByteBuffer buffer, final ReadableByteChannel channel)
            throws IOException {
        int r = buffer.remaining();
        // channel reads return -1 when EOF or other error
        // because they a non-blocking reads, 0 is a valid return value!!
        while (buffer.remaining() > 0 && r != -1) {
            r = channel.read(buffer);
        }
        if (r == -1) {
            buffer.limit(buffer.position());
        }
        return r;
    }

    private void bufferCheck() throws IOException {
        // remaining is less than record length
        // compact the remaining data and read again
        if(useMemoryMappedBuffer) {
            if(buffer.remaining() < header.getRecordLength()) {
                // ops, we're dealing with a DBF whose size is > 2GB (and < 4 normally?)
                FileChannel fc = (FileChannel) channel;
                int position = buffer.position();
                if(fc.size() > position + Integer.MAX_VALUE) {
                    currentOffset = position;
                } else {
                    currentOffset = fc.size() - Integer.MAX_VALUE;
                }
                NIOUtilities.clean(buffer);
                buffer = fc.map(MapMode.READ_ONLY, currentOffset, Integer.MAX_VALUE);
                
                buffer = ((FileChannel) channel).map(MapMode.READ_ONLY, buffer.position(), Integer.MAX_VALUE);
            }
        } else if (buffer.remaining() < header.getRecordLength()) {
            this.currentOffset += buffer.position();
            buffer.compact();
            fill(buffer, channel);
            buffer.position(0);
        }
    }

    /**
     * Get the header from this file. The header is read upon instantiation.
     * 
     * @return The header associated with this file or null if an error
     *         occurred.
     */
    public DbaseFileHeader getHeader() {
        return header;
    }

    /**
     * Clean up all resources associated with this reader.<B>Highly recomended.</B>
     * 
     * @throws IOException
     *                 If an error occurs.
     */
    public void close() throws IOException {
        if (channel != null && channel.isOpen()) {
            channel.close();
            streamLogger.close();
        }
        if(buffer != null) {
            NIOUtilities.clean(buffer, useMemoryMappedBuffer);
        }

        buffer = null;
        channel = null;
        bytes= null;
        header = null;
        row = null;
    }

    /**
     * Query the reader as to whether there is another record.
     * 
     * @return True if more records exist, false otherwise.
     */
    public boolean hasNext() {
        return cnt < header.getNumRecords() + 1;
    }

    /**
     * Get the next record (entry). Will return a new array of values.
     * 
     * @throws IOException
     *                 If an error occurs.
     * @return A new array of values.
     */
    public Object[] readEntry() throws IOException {
        return readEntry(new Object[header.getNumFields()]);
    }

    public Row readRow() throws IOException {
        read();
        return row;
    }

    /**
     * Skip the next record.
     * 
     * @throws IOException
     *                 If an error occurs.
     */
    public void skip() throws IOException {
        boolean foundRecord = false;
        while (!foundRecord) {

            bufferCheck();

            // read the deleted flag
            final char tempDeleted = (char) buffer.get();

            // skip the next bytes
            buffer.position(buffer.position() + header.getRecordLength() - 1); // the
            // 1 is
            // for
            // the
            // deleted
            // flag
            // just
            // read.

            // add the row if it is not deleted.
            if (tempDeleted != '*') {
                foundRecord = true;
            }
        }
        cnt++;
    }

    /**
     * Copy the next record into the array starting at offset.
     * 
     * @param entry
     *                Th array to copy into.
     * @param offset
     *                The offset to start at
     * @throws IOException
     *                 If an error occurs.
     * @return The same array passed in.
     */
    public Object[] readEntry(final Object[] entry, final int offset)
            throws IOException {
        if (entry.length - offset < header.getNumFields()) {
            throw new ArrayIndexOutOfBoundsException();
        }

        read();

        // retrieve the record length
        final int numFields = header.getNumFields();

        for (int j = 0; j < numFields; j++) {
            entry[j + offset] = readObject(fieldOffsets[j], j);
        }

        return entry;
    }
    
    /**
     * Reads a single field from the current record and returns it. Remember to call {@link #read()} before
     * starting to read fields from the dbf, and call it every time you need to move to the next record.
     * @param fieldNum The field number to be read (zero based)
     * @throws IOException
     *                 If an error occurs.
     * @return The value of the field
     */
    public Object readField(final int fieldNum)
            throws IOException {
        return readObject(fieldOffsets[fieldNum], fieldNum);
    }

    /**
     * Transfer, by bytes, the next record to the writer.
     */
    public void transferTo(final DbaseFileWriter writer) throws IOException {
        bufferCheck();
        buffer.limit(buffer.position() + header.getRecordLength());
        writer.channel.write(buffer);
        buffer.limit(buffer.capacity());

        cnt++;
    }

    /**
     * Reads the next record into memory. You need to use this directly when reading only
     * a subset of the fields using {@link #readField(int)}. 
     * @throws IOException
     */
    public void read() throws IOException {
        boolean foundRecord = false;
        while (!foundRecord) {

            bufferCheck();

            // read the deleted flag
            final char deleted = (char) buffer.get();
            if (deleted == '*') {
                continue;
            }

            buffer.limit(buffer.position() + header.getRecordLength() - 1);
            buffer.get(bytes); // SK: There is a side-effect here!!!
            buffer.limit(buffer.capacity());

            foundRecord = true;
        }

        cnt++;
    }

    /**
     * Copy the next entry into the array.
     * 
     * @param entry
     *                The array to copy into.
     * @throws IOException
     *                 If an error occurs.
     * @return The same array passed in.
     */
    public Object[] readEntry(final Object[] entry) throws IOException {
        return readEntry(entry, 0);
    }
    private Object readObject(final int fieldOffset, final int fieldNum)
            throws IOException {
        final char type = fieldTypes[fieldNum];
        final int fieldLen = fieldLengths[fieldNum];
        Object object = null;
        if (fieldLen > 0) {
            switch (type) {
            // (L)logical (T,t,F,f,Y,y,N,n)
            case 'l':
            case 'L':
                final char c = (char) bytes[fieldOffset];
                switch (c) {
                case 't':
                case 'T':
                case 'Y':
                case 'y':
                    object = Boolean.TRUE;
                    break;
                case 'f':
                case 'F':
                case 'N':
                case 'n':
                    object = Boolean.FALSE;
                    break;
                default:
                    // 0x20 should be interpreted as null, but we're going to be a bit more lax
                    object = null;
                }
                break;
            // (C)character (String)
            case 'c':
            case 'C':
                // if the string begins with a null terminator, the value is null
                if (bytes[fieldOffset] != '\0') {
                    // remember we need to skip trailing and leading spaces
                    if(oneBytePerChar) {
                        object = fastParse(bytes, fieldOffset, fieldLen).trim();
                    } else {
                        object = new String(bytes, fieldOffset, fieldLen, stringCharset.name()).trim();
                    }
                }
                break;
            // (D)date (Date)
            case 'd':
            case 'D':
                // If the first 8 characters are '0', this is a null date
                for (int i = 0; i < 8; i++) {
                    if (bytes[fieldOffset+i] != '0') {
                        try {
                            String tempString = fastParse(bytes,fieldOffset,4); 
                            final int tempYear = Integer.parseInt(tempString);
                            tempString =  fastParse(bytes,fieldOffset + 4,2);
                            final int tempMonth = Integer.parseInt(tempString) - 1;
                            tempString = fastParse(bytes,fieldOffset + 6,2); 
                            final int tempDay = Integer.parseInt(tempString);
                            calendar.clear();
                            calendar.set(Calendar.YEAR, tempYear);
                            calendar.set(Calendar.MONTH, tempMonth);
                            calendar.set(Calendar.DAY_OF_MONTH, tempDay);
                            object = calendar.getTime();
                        } catch (final NumberFormatException nfe) {
                            // todo: use progresslistener, this isn't a grave error.
                        }
                        break;
                    }
                }
                break;
            // (@) Timestamp (Date)
            case '@':
                try {      
                    //TODO: Find a smarter way to do this. 
                    //timestampBytes = bytes[fieldOffset:fieldOffset+7]
                    byte[] timestampBytes = {
                        // Time in millis, after reverse.
                        bytes[fieldOffset+7], bytes[fieldOffset+6], bytes[fieldOffset+5], bytes[fieldOffset+4],
                        // Days, after reverse.
                        bytes[fieldOffset+3], bytes[fieldOffset+2], bytes[fieldOffset+1], bytes[fieldOffset]                    
                    };
                       
                    ByteArrayInputStream i_bytes = new ByteArrayInputStream(timestampBytes);
                    DataInputStream i_stream = new DataInputStream(new BufferedInputStream(i_bytes));

                    int time = i_stream.readInt();
                    int days = i_stream.readInt();
                              
                    calendar.setTimeInMillis(days * MILLISECS_PER_DAY + DbaseFileHeader.MILLIS_SINCE_4713 + time);

                    object = calendar.getTime();

                } catch (final NumberFormatException nfe) {
                   // todo: use progresslistener, this isn't a grave error.
                }
                break;                
            // (N)umeric (Integer, Long or Fallthrough to Double)
            case 'n':
            case 'N':
                // numbers that begin with '*' are considered null
                if (bytes[fieldOffset] == '*') {
                    break;
                } else {
                    final String string = fastParse(bytes,fieldOffset,fieldLen).trim();
                    Class clazz = header.getFieldClass(fieldNum);
                    if (clazz == Integer.class) {
                        try {
                            object = Integer.parseInt(string);
                            break;
                        } catch (NumberFormatException e) {
                            // try to parse as long... 
                            clazz = Long.class; 
                        }
                    } 
                    if (clazz == Long.class) {
                        try {
                            object = Long.parseLong(string);
                            break;
                        } catch (final NumberFormatException e2) {
                            // fall through to the floating point number
                        }
                    }
                }
                // do not break, fall through to the 'f' case

            // (F)loating point number
            case 'f':
            case 'F': 
                if (bytes[fieldOffset] != '*') {
                    try {
                        object = Double.parseDouble(fastParse(bytes,fieldOffset,fieldLen));
                    } catch (final NumberFormatException e) {
                        // okay, now whatever we got was truly indigestible. Lets go
                        // with a zero Double.
                        object = new Double(0.0);
                    }
                }
                break;
            default:
                throw new IOException("Invalid field type : " + type);
            }

        }
        return object;
    }
    
    /**
     * Performs a faster byte[] to String conversion under the assumption the content
     * is represented with one byte per char 
     * @param fieldLen
     * @param fieldOffset
     * @return
     */
    String fastParse(final byte[] bytes, final int fieldOffset, final int fieldLen) {
        // faster reading path, the decoder is for some reason slower,
        // probably because it has to make extra checks to support multibyte chars
        final char[] chars = new char[fieldLen]; 
        for (int i = 0; i < fieldLen; i++) {
            // force the byte to a positive integer interpretation before casting to char
            chars[i] = ((char) (0x00FF & bytes[fieldOffset+i]));
        }
        return new String(chars);
    }

    public static void main(final String[] args) throws Exception {
        final DbaseFileReader reader = new DbaseFileReader(new ShpFiles(args[0]),
                false, Charset.forName("ISO-8859-1"), null);
        System.out.println(reader.getHeader());
        int r = 0;
        while (reader.hasNext()) {
            System.out.println(++r + ","
                    + java.util.Arrays.asList(reader.readEntry()));
        }
        reader.close();
    }

    public String id() {
        return getClass().getName();
    }

}