// ********************************************************************** // // <copyright> // // BBN Technologies // 10 Moulton Street // Cambridge, MA 02138 // (617) 873-8000 // // Copyright (C) BBNT Solutions LLC. All rights reserved. // // </copyright> // ********************************************************************** // // $Source: /cvs/distapps/openmap/src/openmap/com/bbn/openmap/dataAccess/shape/DbfFile.java,v $ // $RCSfile: DbfFile.java,v $ // $Revision: 1.4 $ // $Date: 2009/02/05 18:46:11 $ // $Author: dietrick $ // // ********************************************************************** package com.bbn.openmap.dataAccess.shape; import java.io.EOFException; import java.io.FileOutputStream; import java.io.IOException; import java.io.OutputStream; import java.net.URL; import java.text.DecimalFormatSymbols; import java.text.ParseException; import java.util.ArrayList; import java.util.List; import java.util.Locale; import com.bbn.openmap.dataAccess.shape.output.DbfOutputStream; import com.bbn.openmap.io.BinaryBufferedFile; import com.bbn.openmap.io.BinaryFile; import com.bbn.openmap.io.FormatException; import com.bbn.openmap.util.ArgParser; import com.bbn.openmap.util.Debug; import com.bbn.openmap.util.FileUtils; /** * An extension to DbfTableModel that handles reading only certain records when * needed, in conjunction with some spatial indexing mechanism. Can be told to * which columns to read and which to skip, so unnecessary/unused information * isn't held in memory. * * @author dietrick */ public class DbfFile extends DbfTableModel { /** * */ private static final long serialVersionUID = 1L; protected int _rowCount; protected int _recordLength; protected int _headerLength; /** * This _columnMask variable is an array that either contains a Boolean.TRUE * for the indexes for columns that should be read, or a Integer object that * contains the byte length of the column entry, so the reader will know how * many bytes to skip for columns being ignored. The _length array contents * were modified to reflect the lengths of only the columns being read when * the column mask was set, as was the _names, _types, and _decimalCount * arrays. The order and length of those arrays match the order of the * Boolean.TRUE objects in the _columnMask array. */ protected Object[] _columnMask = null; protected java.text.DecimalFormat df; protected BinaryFile bf; protected DbfFile() { df = new java.text.DecimalFormat(); DecimalFormatSymbols dfs = new DecimalFormatSymbols(Locale.ENGLISH); df.setDecimalFormatSymbols(dfs); } /** * Creates a blank DbfTableModel * * @param columnCount The number of columns this model will manage */ public DbfFile(int columnCount) { this(); _columnCount = columnCount; _records = new ArrayList<List<Object>>(); _lengths = new int[columnCount]; _decimalCounts = new byte[columnCount]; _types = new byte[columnCount]; _names = new String[columnCount]; } public DbfFile(BinaryFile bf) throws EOFException, FormatException, IOException { this(); setBinaryFile(bf); } public void readHeader(BinaryFile bf) throws FormatException, IOException { try { bf.seek(0); bf.byteOrder(false); /* byte description = */bf.read(); /* byte year = */bf.read(); /* byte month = */bf.read(); /* byte day = */bf.read(); _rowCount = bf.readInteger(); _headerLength = bf.readShort(); _recordLength = bf.readShort(); _columnCount = (_headerLength - 32 - 1) / 32; bf.skipBytes(20); _names = new String[_columnCount]; _types = new byte[_columnCount]; _lengths = new int[_columnCount]; _decimalCounts = new byte[_columnCount]; for (int n = 0; n <= _columnCount - 1; n++) { // 32 bytes for each column _names[n] = bf.readFixedLengthString(11); // // Some TIGER dbf files from ESRI have nulls // in the column names. Delete them. // int ix = _names[n].indexOf((char) 0); if (ix > 0) { _names[n] = _names[n].substring(0, ix); } _types[n] = (byte) bf.read(); bf.skipBytes(4); _lengths[n] = bf.readUnsigned(); _decimalCounts[n] = (byte) bf.read(); bf.skipBytes(14); } if (DEBUG && _headerLength != bf.getFilePointer()) { Debug.output("DbfFile: Header length specified in file doesn't match current pointer location"); } } catch (EOFException eofe) { throw new FormatException(eofe.getMessage()); } } /** * Tells the BinaryFile input reader to close, releasing the file pointer. * Will automatically reopen if necessary. */ public void close() { if (bf != null) { try { bf.close(); } catch (IOException e) { if (Debug.debugging("shape")) { e.printStackTrace(); } } } } /** * Reads the data and puts data in an ArrayList of records. */ public void readData() throws IOException, FormatException { readData(0, _rowCount); } /** * Read in a set of records from the dbf file, starting at the provided * index and continuing for the provided count. * * @param startingRecordIndex , 0 is the first record index. * @param numRecordsToRead * @throws IOException * @throws FormatException */ public void readData(int startingRecordIndex, int numRecordsToRead) throws IOException, FormatException { if (startingRecordIndex < 0) { startingRecordIndex = 0; } if (numRecordsToRead < 0 || numRecordsToRead > _rowCount - startingRecordIndex) { numRecordsToRead = _rowCount - startingRecordIndex; } _records = new ArrayList<List<Object>>(numRecordsToRead); for (int r = startingRecordIndex; r <= numRecordsToRead - 1; r++) { List<Object> record = getRecordData(r); _records.add(record); } } /** * Fetches the record data for the given index. * * @param index the index of the data, starting at 0 for the first record. * @return List containing Strings and Numbers for the dbf entry for the * record. * @throws IOException * @throws FormatException */ public List<Object> getRecordData(int index) throws IOException, FormatException { if (bf == null) { throw new IOException("DbfFile not set with valid BinaryFile."); } bf.seek(_headerLength + index * _recordLength); /* int deleteFlag = */bf.read(); int columnCount = _columnCount; if (_columnMask != null) { columnCount = _columnMask.length; } // Here, even with the columnMask, the _columnCount is the target number // of columns to be stored out of the dbf file. The _columnMask.length // is the number of columns actually in the file (if that array is not // null). ArrayList<Object> record = new ArrayList<Object>(_columnCount); int targetColumnIndex = 0; for (int c = 0; c <= columnCount - 1; c++) { if (_columnMask == null || _columnMask[c] == Boolean.TRUE) { int length = _lengths[targetColumnIndex]; if (length == -1) length = 255; int type = _types[targetColumnIndex]; int numDecSpaces = _decimalCounts[targetColumnIndex]; df.setMaximumFractionDigits(numDecSpaces); String cell = bf.readFixedLengthString(length).trim(); Object obj = cell; try { obj = getObjectForType(cell, type, df, length); } catch (ParseException pe) { // Don't need to do anything, obj == cell; } record.add(targetColumnIndex, obj); targetColumnIndex++; } else { bf.skipBytes(((Integer) _columnMask[c]).intValue()); } } return record; } /** * Clear the record information from memory. * */ public void clearRecords() { if (_records != null) { _records.clear(); } } /** * Create another DbfTableModel with the same structure as this one (number * of columns, column names, lengths and decimal counts). */ public DbfTableModel headerClone() { int size = getColumnCount(); DbfFile dtm = new DbfFile(size); for (int i = 0; i < size; i++) { dtm.setColumnName(i, this.getColumnName(i)); dtm.setDecimalCount(i, this.getDecimalCount(i)); dtm.setLength(i, this.getLength(i)); dtm.setType(i, this.getType(i)); } return dtm; } /** * Creates a DbfTableModel for a given .dbf file * * @param dbf The url of the file to retrieve. * @return The DbfTableModel, null if there is a problem. */ public static DbfTableModel getDbfTableModel(URL dbf) { return getDbfTableModel(dbf.toString()); } /** * Creates a DbfTableModel for a given .dbf file * * @param dbf The path of the file to retrieve. * @return The DbfTableModel, null if there is a problem. */ public static DbfTableModel getDbfTableModel(String dbf) { DbfFile model = null; try { BinaryBufferedFile bbf = new BinaryBufferedFile(dbf); model = new DbfFile(bbf); model.close(); } catch (Exception exception) { if (Debug.debugging("shape")) { Debug.error("problem loading DBF file" + exception.getMessage()); } } return model; } public static void main(String[] args) { Debug.init(); ArgParser ap = new ArgParser("DbfFile"); ap.add("columns", "Print field header information."); ap.add("mask", "Only show listed columns", -1); ap.add("source", "The dbf file to read.", 1); ap.add("target", "The dbf file to write, use with mask to remove columns into new dbf file.", 1); ap.add("num", "Specify the number of records to read and display (handy for large dbf files)", 1); if (!ap.parse(args)) { ap.printUsage(); System.exit(0); } String source = null; String target = null; double num = Double.MAX_VALUE; String[] ags = ap.getArgValues("source"); if (ags != null) { source = ags[0]; } else { source = FileUtils.getFilePathToOpenFromUser("Choose DBF file"); if (source == null) { System.exit(0); } } ags = ap.getArgValues("target"); if (ags != null) { target = ags[0]; } boolean readData = ap.getArgValues("columns") == null; if (!readData) { num = 0; } else { ags = ap.getArgValues("num"); if (ags != null) { try { num = Double.parseDouble(ags[0]); } catch (NumberFormatException nfe) { } } } String[] columnMask = ap.getArgValues("mask"); String[] columns = ap.getArgValues("columns"); try { DbfFile dtm = (DbfFile) DbfFile.getDbfTableModel(source); if (dtm == null) { System.out.println("Problem reading " + source); System.exit(-1); } else { if (columns != null) { dtm.setColumnMask(columnMask); } dtm.readData(0, (int) num); if (target != null) { OutputStream os = new FileOutputStream(target); DbfOutputStream dos = new DbfOutputStream(os); dos.writeModel(dtm); } else { dtm.setWritable(true); dtm.exitOnClose = true; dtm.showGUI(args[0], MODIFY_ROW_MASK | MODIFY_COLUMN_MASK | SAVE_MASK); } } } catch (Exception e) { Debug.error(e.getMessage()); e.printStackTrace(); } } /** * Checks the _columnMask Object[] and looks for Boolean.TRUE objects, * indicating a column that should be used. Returns a boolean[] with trues * in the indexes for those columns. * * @return boolean[] representing columns, true values should be used. */ public boolean[] getColumnMask() { boolean[] columnMask = new boolean[_columnMask.length]; for (int i = 0; i < _columnMask.length; i++) { columnMask[i] = _columnMask[i] == Boolean.TRUE; } return columnMask; } /** * Given a boolean[] where trues mark columns to keep, a _columnMask * Object[] is set on this object with Boolean.TRUE objects in that array * for the trues, and Integer objects representing the lengths of the false * columns. The lengths are used when reading the dbf file, so it's known * how many bytes to skip for that column. * * @param mask */ protected void createColumnMaskArray(boolean[] mask) { if (mask != null && mask.length <= _columnCount) { _columnMask = new Object[mask.length]; for (int i = 0; i < mask.length; i++) { if (mask[i] == true) { _columnMask[i] = Boolean.TRUE; } else { _columnMask[i] = new Integer(_lengths[i]); } } resolveColumns(); } } /** * Limit which columns are read from the dbf file using a boolean array * corresponding to the columns. For indexes in the array marked true, those * columns will be read. If the column mask has already been set, the dbf * file header will be re-read to reset the metadata for the file. * * @param mask */ public void setColumnMask(boolean[] mask) { try { if (_columnMask != null) { readHeader(bf); } } catch (Exception e) { Debug.error("problem setting column mask for DbfFile" + e.getMessage()); } createColumnMaskArray(mask); } /** * Limit which columns are read from the dbf file using the column names. If * the column mask has already been set, the dbf file header will be re-read * to reset the metadata for the file. * * @param columnNames */ public void setColumnMask(String[] columnNames) { try { if (_columnMask != null) { readHeader(bf); } } catch (Exception e) { Debug.error("problem setting column mask for DbfFile" + e.getMessage()); } if (columnNames != null && _names != null) { boolean[] mask = new boolean[_names.length]; for (int j = 0; j < _names.length; j++) { for (int i = 0; i < columnNames.length; i++) { if (_names[j].equalsIgnoreCase(columnNames[i])) { mask[j] = true; break; } } } createColumnMaskArray(mask); } } /** * Sets the metadata for the dbf file to match the current _columnMask * settings. */ protected void resolveColumns() { if (_columnMask != null && _columnMask.length == _columnCount) { int newColumnCount = 0; for (int i = 0; i < _columnMask.length; i++) { if (_columnMask[i] == Boolean.TRUE) { newColumnCount++; } } ArrayList<List<Object>> records = null; if (_records != null) { records = new ArrayList<List<Object>>(_rowCount); } int[] lengths = new int[newColumnCount]; byte[] decimalCounts = new byte[newColumnCount]; byte[] types = new byte[newColumnCount]; String[] names = new String[newColumnCount]; int newIndex = 0; for (int i = 0; i < _columnMask.length; i++) { if (_columnMask[i] == Boolean.TRUE) { lengths[newIndex] = _lengths[i]; decimalCounts[newIndex] = _decimalCounts[i]; types[newIndex] = _types[i]; names[newIndex] = _names[i]; if (records != null) { records.add(_records.get(i)); } newIndex++; } } _lengths = lengths; _decimalCounts = decimalCounts; _types = types; _names = names; _columnCount = newColumnCount; if (records != null) { _records = records; } } } public int getHeaderLength() { return _headerLength; } public void setHeaderLength(int length) { _headerLength = length; } public int getRecordLength() { return _recordLength; } public void setRecordLength(int length) { _recordLength = length; } public int getRowCount() { return _rowCount; } public void setRowCount(int count) { _rowCount = count; } public BinaryFile getBinaryFile() { return bf; } public void setBinaryFile(BinaryFile bf) throws EOFException, FormatException, IOException { this.bf = bf; readHeader(bf); } public java.text.DecimalFormat getDecimalFormat() { return df; } public void setDecimalFormat(java.text.DecimalFormat df) { this.df = df; } }