/* * Geotoolkit - An Open Source Java GIS Toolkit * http://www.geotoolkit.org * * (C) 2002-2008, Open Source Geospatial Foundation (OSGeo) * (C) 2010, Geomatys * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; * version 2.1 of the License. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * This file is based on an origional contained in the GISToolkit project: * http://gistoolkit.sourceforge.net/ */ package org.geotoolkit.data.dbf; import com.vividsolutions.jts.geom.Geometry; import java.io.EOFException; import java.io.IOException; import java.math.BigInteger; import java.nio.ByteBuffer; import java.nio.ByteOrder; import java.nio.channels.ReadableByteChannel; import java.nio.channels.WritableByteChannel; import java.util.ArrayList; import java.util.Calendar; import java.util.Date; import java.util.HashSet; import java.util.List; import java.util.Set; import java.util.logging.Level; import java.util.logging.Logger; import org.apache.sis.feature.SingleAttributeTypeBuilder; import org.apache.sis.feature.FeatureExt; import org.apache.sis.internal.feature.AttributeConvention; import org.apache.sis.util.logging.Logging; import org.opengis.feature.AttributeType; import org.opengis.feature.FeatureType; import org.opengis.feature.PropertyType; /** * Class to represent the header of a Dbase III file. * * Creation date: (5/15/2001 5:15:30 PM) * * @module */ public class DbaseFileHeader { private static final Logger LOGGER = Logging.getLogger("org.geotoolkit.data.shapefile"); // Constant for the size of a record private static final int FILE_DESCRIPTOR_SIZE = 32; // type of the file, must be 03h private static final byte MAGIC = 0x03; private static final int MINIMUM_HEADER = 33; // Date the file was last updated. private Date date = new Date(); private int recordCnt = 0; private int fieldCnt = 0; // set this to a default length of 1, which is enough for one "space" // character which signifies an empty record private int recordLength = 1; // set this to a flagged value so if no fields are added before the write, // we know to adjust the headerLength to MINIMUM_HEADER private int headerLength = -1; private int largestFieldSize = 0; // collection of header records. // lets start out with a zero-length array, just in case private DbaseField[] fields = new DbaseField[0]; private void read(final ByteBuffer buffer, final ReadableByteChannel channel) throws IOException { while (buffer.remaining() > 0) { if (channel.read(buffer) == -1) { throw new EOFException("Premature end of file"); } } } /** * Determine the most appropriate Java Class for representing the data in * the field. * * <PRE> * All packages are java.lang unless otherwise specified. * C (Character) -> String * N (Numeric) -> Integer or Double (depends on field's decimal count) * F (Floating) -> Double * L (Logical) -> Boolean * D (Date) -> java.util.Date * Unknown -> String * </PRE> * * @param i * The index of the field, from 0 to * <CODE>getNumFields() - 1</CODE> . * @return A Class which closely represents the dbase field type. */ public Class getFieldClass(final int i) { return getFieldClass(fields[i].fieldType, fields[i].decimalCount, fields[i].fieldLength); } private static Class getFieldClass(final char fieldType, final int decimalCount, final int fieldLength) { final Class typeClass; switch (fieldType) { case 'C': typeClass = String.class; break; case 'N': if (decimalCount == 0) { if (fieldLength < 10) { typeClass = Integer.class; } else { typeClass = Long.class; } } else { typeClass = Double.class; } break; case 'F': typeClass = Double.class; break; case 'L': typeClass = Boolean.class; break; case 'D': typeClass = Date.class; break; default: typeClass = String.class; break; } return typeClass; } DbaseField getField(final int index){ return fields[index]; } /** * Add a column to this DbaseFileHeader. The type is one of (C N L or D) * character, number, logical(true/false), or date. The Field length is the * total length in bytes reserved for this column. The decimal count only * applies to numbers(N), and floating point values (F), and refers to the * number of characters to reserve after the decimal point. <B>Don't expect * miracles from this...</B> * * <PRE> * Field Type MaxLength * ---------- --------- * C 254 * D 8 * F 20 * N 18 * </PRE> * * @param inFieldName * The name of the new field, must be less than 10 characters * or it gets truncated. * @param inFieldType * A character representing the dBase field, ( see above ). * Case insensitive. * @param inFieldLength * The length of the field, in bytes ( see above ) * @param inDecimalCount * For numeric fields, the number of decimal places to track. * @throws DbaseFileException * If the type is not recognized. */ public void addColumn(String inFieldName, char inFieldType, int inFieldLength, int inDecimalCount) throws DbaseFileException { if (inFieldLength <= 0) { throw new DbaseFileException("field length <= 0"); } if (fields == null) { fields = new DbaseField[0]; } int tempLength = 1; // the length is used for the offset, and there is a // * for deleted as the first byte final DbaseField[] tempFieldDescriptors = new DbaseField[fields.length + 1]; for (int i = 0; i < fields.length; i++) { tempFieldDescriptors[i] = DbaseField.create(fields[i],tempLength); tempLength += fields[i].fieldLength; } // set the field name if (inFieldName == null) { inFieldName = "NoName"; } // Fix for GEOT-42, ArcExplorer will not handle field names > 10 chars // Sorry folks. if (inFieldName.length() > 10) { inFieldName = inFieldName.substring(0, 10); if (LOGGER.isLoggable(Level.WARNING)) { LOGGER.log(Level.WARNING, "FieldName {0} is longer than 10 characters, truncating to {1}", new Object[]{inFieldName, inFieldName}); } } // the field type if ((inFieldType == 'C') || (inFieldType == 'c')) { inFieldType = 'C'; if (inFieldLength > 254) { if (LOGGER.isLoggable(Level.FINE)) { LOGGER.log(Level.FINE, "Field Length for {0} set to {1} Which is longer than 254, not consistent with dbase III", new Object[]{inFieldName, inFieldLength}); } } } else if ((inFieldType == 'S') || (inFieldType == 's')) { inFieldType = 'C'; if (LOGGER.isLoggable(Level.WARNING)) { LOGGER.log(Level.WARNING, "Field type for {0} set to S which is flat out wrong people!, I am setting this to C, in the hopes you meant character.", inFieldName); } if (inFieldLength > 254) { if (LOGGER.isLoggable(Level.FINE)) { LOGGER.log(Level.FINE, "Field Length for {0} set to {1} Which is longer than 254, not consistent with dbase III", new Object[]{inFieldName, inFieldLength}); } } inFieldLength = 8; } else if ((inFieldType == 'D') || (inFieldType == 'd')) { inFieldType = 'D'; if (inFieldLength != 8) { if (LOGGER.isLoggable(Level.FINE)) { LOGGER.log(Level.FINE, "Field Length for {0} set to {1} Setting to 8 digets YYYYMMDD", new Object[]{inFieldName, inFieldLength}); } } inFieldLength = 8; } else if ((inFieldType == 'F') || (inFieldType == 'f')) { inFieldType = 'F'; if (inFieldLength > 20) { if (LOGGER.isLoggable(Level.FINE)) { LOGGER.log(Level.FINE, "Field Length for {0} set to {1} Preserving length, but should be set to Max of 20 not valid for dbase IV, and UP specification, not present in dbaseIII.", new Object[]{inFieldName, inFieldLength}); } } } else if ((inFieldType == 'N') || (inFieldType == 'n')) { inFieldType = 'N'; if (inFieldLength > 18) { if (LOGGER.isLoggable(Level.FINE)) { LOGGER.log(Level.FINE, "Field Length for {0} set to {1} Preserving length, but should be set to Max of 18 for dbase III specification.", new Object[]{inFieldName, inFieldLength}); } } if (inDecimalCount < 0) { if (LOGGER.isLoggable(Level.FINE)) { LOGGER.log(Level.FINE, "Field Decimal Position for {0} set to {1} Setting to 0 no decimal data will be saved.", new Object[]{inFieldName, inDecimalCount}); } inDecimalCount = 0; } if (inDecimalCount > inFieldLength - 1) { if (LOGGER.isLoggable(Level.WARNING)) { LOGGER.log(Level.WARNING, "Field Decimal Position for {0} set to {1} Setting to {2} no non decimal data will be saved.", new Object[]{inFieldName, inDecimalCount, inFieldLength - 1}); } inDecimalCount = inFieldLength - 1; } } else if ((inFieldType == 'L') || (inFieldType == 'l')) { inFieldType = 'L'; if (inFieldLength != 1) { if (LOGGER.isLoggable(Level.FINE)) { LOGGER.log(Level.FINE, "Field Length for {0} set to {1} Setting to length of 1 for logical fields.", new Object[]{inFieldName, inFieldLength}); } } inFieldLength = 1; } else { throw new DbaseFileException("Undefined field type " + inFieldType + " For column " + inFieldName); } try { tempFieldDescriptors[fields.length] = DbaseField.create( inFieldName, inFieldType, tempLength, inFieldLength, inDecimalCount, getFieldClass(inFieldType, inDecimalCount, inFieldLength)); } catch (IOException ex) { throw new DbaseFileException("Invalid field declaration", ex); } // the length of a record tempLength = tempLength + tempFieldDescriptors[fields.length].fieldLength; // set the new fields. fields = tempFieldDescriptors; fieldCnt = fields.length; headerLength = MINIMUM_HEADER + 32 * fields.length; recordLength = tempLength; } /** * Remove a column from this DbaseFileHeader. * * @todo This is really ugly, don't know who wrote it, but it needs fixin... * @param inFieldName * The name of the field, will ignore case and trim. * @return index of the removed column, -1 if no found */ public int removeColumn(final String inFieldName) throws IOException { int retCol = -1; int tempLength = 1; DbaseField[] tempFieldDescriptors = new DbaseField[fields.length - 1]; for (int i = 0, j = 0; i < fields.length; i++) { if (!inFieldName.equalsIgnoreCase(fields[i].fieldName.trim())) { // if this is the last field and we still haven't found the // named field if (i == j && i == fields.length - 1) { System.err.println("Could not find a field named '" + inFieldName + "' for removal"); return retCol; } tempFieldDescriptors[j] = DbaseField.create( fields[i].fieldName, fields[i].fieldType, tempLength, fields[i].fieldLength, fields[i].decimalCount, fields[i].clazz); tempLength += tempFieldDescriptors[j].fieldLength; // only increment j on non-matching fields j++; } else { retCol = i; } } // set the new fields. fields = tempFieldDescriptors; headerLength = 33 + 32 * fields.length; recordLength = tempLength; return retCol; } /** * Returns the field length in bytes. * * @param inIndex * The field index. * @return The length in bytes. */ public int getFieldLength(final int inIndex) { return fields[inIndex].fieldLength; } /** * Get the decimal count of this field. * location of the decimal point within the field. * * @param inIndex * The field index. * @return The decimal count. */ public int getFieldDecimalCount(final int inIndex) { return fields[inIndex].decimalCount; } /** * Get the field name. * * @param inIndex * The field index. * @return The name of the field. */ public String getFieldName(final int inIndex) { return fields[inIndex].fieldName; } /** * Get the character class of the field. * * @param inIndex * The field index. * @return The dbase character representing this field. */ public char getFieldType(final int inIndex) { return fields[inIndex].fieldType; } /** * Get the field offset from the record start position. * First field will start at 1, the first byte is for the deleted flag. * * @param inIndex * The field index. * @return fild offset */ public int getFieldOffset(final int inIndex) { int offset = 1; for (int x = 0, n = inIndex; x < n; x++) { offset += fields[x].fieldLength; } return offset; } /** * Get the date this file was last updated. * * @return The Date last modified. */ public Date getLastUpdateDate() { return date; } /** * Return the number of fields in the records. * * @return The number of fields in this table. */ public int getNumFields() { return fields.length; } /** * Return the number of records in the file * * @return The number of records in this table. */ public int getNumRecords() { return recordCnt; } /** * Get the length of the records in bytes. * * @return The number of bytes per record. */ public int getRecordLength() { return recordLength; } /** * Get the length of the header * * @return The length of the header in bytes. */ public int getHeaderLength() { return headerLength; } /** * Read the header data from the DBF file. * * @param channel * A readable byte channel. If you have an InputStream you * need to use, you can call * java.nio.Channels.getChannel(InputStream in). * @throws IOException * If errors occur while reading. */ public void readHeader(final ReadableByteChannel channel) throws IOException { // we'll read in chunks of 1K ByteBuffer in = ByteBuffer.allocateDirect(1024); // do this or GO CRAZY // ByteBuffers come preset to BIG_ENDIAN ! in.order(ByteOrder.LITTLE_ENDIAN); // only want to read first 10 bytes... in.limit(10); read(in, channel); in.position(0); // type of file. byte magic = in.get(); if (magic != MAGIC) { throw new IOException("Unsupported DBF file Type " + Integer.toHexString(magic)); } // parse the update date information. int tempUpdateYear = in.get(); int tempUpdateMonth = in.get(); int tempUpdateDay = in.get(); // ouch Y2K uncompliant if (tempUpdateYear > 90) { tempUpdateYear = tempUpdateYear + 1900; } else { tempUpdateYear = tempUpdateYear + 2000; } Calendar c = Calendar.getInstance(); c.set(Calendar.YEAR, tempUpdateYear); c.set(Calendar.MONTH, tempUpdateMonth - 1); c.set(Calendar.DATE, tempUpdateDay); date = c.getTime(); // read the number of records. recordCnt = in.getInt(); // read the length of the header structure. // ahhh.. unsigned little-endian shorts // mask out the byte and or it with shifted 2nd byte headerLength = (in.get() & 0xff) | ((in.get() & 0xff) << 8); // if the header is bigger than our 1K, reallocate if (headerLength > in.capacity()) { in = ByteBuffer.allocateDirect(headerLength - 10); } in.limit(headerLength - 10); in.position(0); read(in, channel); in.position(0); // read the length of a record // ahhh.. unsigned little-endian shorts recordLength = (in.get() & 0xff) | ((in.get() & 0xff) << 8); // skip / skip thesreserved bytes in the header. in.position(in.position() + 20); // calculate the number of Fields in the header fieldCnt = (headerLength - FILE_DESCRIPTOR_SIZE - 1) / FILE_DESCRIPTOR_SIZE; // read all of the header records final Set<String> names = new HashSet<String>(); final List<DbaseField> lfields = new ArrayList<DbaseField>(); for (int i = 0; i < fieldCnt; i++) { // read the field name byte[] buffer = new byte[11]; in.get(buffer); String name = new String(buffer); int nullPoint = name.indexOf(0); if (nullPoint != -1) { name = name.substring(0, nullPoint); } String fieldName = name.trim(); if(names.contains(fieldName)){ int inc = 2; while(names.contains(fieldName+inc)){ inc++; } LOGGER.log(Level.INFO, "DBF : multiple fields for name {0} , one will be exposed as {1}{2}", new Object[]{fieldName, fieldName, inc}); fieldName = fieldName+inc; } // read the field type char fieldType = (char) in.get(); // read the field data address, offset from the start of the record. int fieldDataAddress = in.getInt(); // read the field length in bytes int length = (int) in.get(); if (length < 0) { length = length + 256; } int fieldLength = length; if (length > largestFieldSize) { largestFieldSize = length; } // read the field decimal count in bytes int decimalCount = (int) in.get(); // reserved bytes. // in.skipBytes(14); in.position(in.position() + 14); // some broken shapefiles have 0-length attributes. The reference // implementation // (ArcExplorer 2.0, built with MapObjects) just ignores them. final DbaseField field = DbaseField.create(fieldName, fieldType, fieldDataAddress, fieldLength, decimalCount, getFieldClass(fieldType, decimalCount, fieldLength)); if (field.fieldLength > 0) { lfields.add(field); } names.add(fieldName); } // Last byte is a marker for the end of the field definitions. // in.skipBytes(1); in.position(in.position() + 1); fields = new DbaseField[lfields.size()]; fields = (DbaseField[]) lfields.toArray(fields); } /** * Get the largest field size of this table. * * @return The largt field size in bytes. */ public int getLargestFieldSize() { return largestFieldSize; } /** * Set the number of records in the file * * @param inNumRecords * The number of records. */ public void setNumRecords(final int inNumRecords) { recordCnt = inNumRecords; } /** * Create the list of matching attribute descriptor from header informations. * * @return List of AttributDescriptor */ public List<AttributeType> createDescriptors(final String namespace){ final int nbFields = getNumFields(); final SingleAttributeTypeBuilder atb = new SingleAttributeTypeBuilder(); final List<AttributeType> attributes = new ArrayList<>(nbFields); for(int i=0; i<nbFields; i++){ final String name = getFieldName(i); final Class attributeClass = getFieldClass(i); final int length = getFieldLength(i); atb.reset(); atb.setName(namespace, name); atb.setValueClass(attributeClass); atb.setLength(length); attributes.add(atb.build()); } return attributes; } /** * Write the header data to the DBF file. * * @param out * A channel to write to. If you have an OutputStream you can * obtain the correct channel by using * java.nio.Channels.newChannel(OutputStream out). * @throws IOException * If errors occur. */ public void writeHeader(final WritableByteChannel out) throws IOException { // take care of the annoying case where no records have been added... if (headerLength == -1) { headerLength = MINIMUM_HEADER; } ByteBuffer buffer = ByteBuffer.allocateDirect(headerLength); buffer.order(ByteOrder.LITTLE_ENDIAN); // write the output file type. buffer.put((byte) MAGIC); // write the date stuff Calendar c = Calendar.getInstance(); c.setTime(new Date()); buffer.put((byte) (c.get(Calendar.YEAR) % 100)); buffer.put((byte) (c.get(Calendar.MONTH) + 1)); buffer.put((byte) (c.get(Calendar.DAY_OF_MONTH))); // write the number of records in the datafile. buffer.putInt(recordCnt); // write the length of the header structure. buffer.putShort((short) headerLength); // write the length of a record buffer.putShort((short) recordLength); // // write the reserved bytes in the header // for (int i=0; i<20; i++) out.writeByteLE(0); buffer.position(buffer.position() + 20); // write all of the header records int tempOffset = 0; for (int i = 0; i < fields.length; i++) { // write the field name for (int j = 0; j < 11; j++) { if (fields[i].fieldName.length() > j) { buffer.put((byte) fields[i].fieldName.charAt(j)); } else { buffer.put((byte) 0); } } // write the field type buffer.put((byte) fields[i].fieldType); // // write the field data address, offset from the start of the // record. buffer.putInt(tempOffset); tempOffset += fields[i].fieldLength; // write the length of the field. buffer.put((byte) fields[i].fieldLength); // write the decimal count. buffer.put((byte) fields[i].decimalCount); // write the reserved bytes. // for (in j=0; jj<14; j++) out.writeByteLE(0); buffer.position(buffer.position() + 14); } // write the end of the field definitions marker buffer.put((byte) 0x0D); buffer.position(0); int r = buffer.remaining(); while ((r -= out.write(buffer)) > 0) { // do nothing } } /** * Get a simple representation of this header. * * @return A String representing the state of the header. */ @Override public String toString() { final StringBuilder fs = new StringBuilder(); fs.append("DB3 Header\n Date : ").append(date) .append("\n Records : ").append(recordCnt) .append("\n Fields : ").append(fieldCnt) .append('\n'); for (DbaseField f : fields) { fs.append(f.fieldName).append(' ').append(f.fieldType).append(' ') .append(f.fieldLength).append(' ').append(f.decimalCount) .append(' ').append(f.fieldDataAddress).append('\n'); } return fs.toString(); } /** * Attempt to create a DbaseFileHeader for the FeatureType. Note, we cannot * set the number of records until the write has completed. * * @param featureType DOCUMENT ME! * @return DOCUMENT ME! * @throws IOException DOCUMENT ME! * @throws DbaseFileException DOCUMENT ME! */ public static DbaseFileHeader createDbaseHeader(final FeatureType featureType) throws IOException,DbaseFileException { final DbaseFileHeader header = new DbaseFileHeader(); for(PropertyType type : featureType.getProperties(true)) { //skip properties part of the convention if(AttributeConvention.contains(type.getName())) continue; final Class<?> colType = ((AttributeType) type).getValueClass(); final String colName = type.getName().tip().toString(); Integer fieldLen = FeatureExt.getLengthCharacteristic((AttributeType)type); if (fieldLen == null) fieldLen = 255; if ((colType == Integer.class) || (colType == Short.class) || (colType == Byte.class)) { header.addColumn(colName, 'N', Math.min(fieldLen, 9), 0); } else if (colType == Long.class) { header.addColumn(colName, 'N', Math.min(fieldLen, 19), 0); } else if (colType == BigInteger.class) { header.addColumn(colName, 'N', Math.min(fieldLen, 33), 0); } else if (Number.class.isAssignableFrom(colType)) { int l = Math.min(fieldLen, 33); int d = Math.max(l - 2, 0); header.addColumn(colName, 'N', l, d); } else if (java.util.Date.class.isAssignableFrom(colType)) { header.addColumn(colName, 'D', fieldLen, 0); } else if (colType == Boolean.class) { header.addColumn(colName, 'L', 1, 0); } else if (CharSequence.class.isAssignableFrom(colType)) { // Possible fix for GEOT-42 : ArcExplorer doesn't like 0 length // ensure that maxLength is at least 1 header.addColumn(colName, 'C', Math.min(254, fieldLen), 0); } else if (Geometry.class.isAssignableFrom(colType)) { continue; } else { //fallback : write as string header.addColumn(colName, 'C', Math.min(254, fieldLen), 0); } } return header; } }