package org.gbif.dwca.tools; import org.gbif.dwc.terms.DwcTerm; import org.gbif.dwca.io.Archive; import org.gbif.dwca.io.ArchiveFactory; import org.gbif.dwca.io.UnsupportedArchiveException; import org.gbif.dwca.record.Record; import org.gbif.dwca.record.StarRecord; import java.io.File; import java.io.IOException; import java.util.ArrayList; import java.util.List; public class ArchiveScanner { public static void main(String[] args) throws IOException, UnsupportedArchiveException { if (args.length < 1) { System.out.println( "Please specify a Darwin Core Archive folder or single Darwin Core CSV file as the first argument.\nAn optional second integer argument lets you specify the number of records to view, a third the offset to skip."); System.exit(0); } File archiveFile = new File(args[0]); if (!archiveFile.exists()) { System.out.println("Cannot find archive file: " + archiveFile); System.exit(0); } int maxNumbers = 0; if (args.length > 1) { try { maxNumbers = Integer.parseInt(args[1]); } catch (NumberFormatException ignored) { System.out.println("Second argument should an integer defining maxNumbers"); } } int offset = 0; if (args.length > 2) { try { offset = Integer.parseInt(args[2]); } catch (NumberFormatException ignored) { System.out.println("Third offset argument is no integer"); } } System.out.println("Opening archive: " + archiveFile.getAbsolutePath()); Archive arch = ArchiveFactory.openArchive(archiveFile); if (arch.getCore() == null) { System.out.println("Cannot locate the core data file"); System.exit(0); } System.out.println("Core file(s) found: " + arch.getCore().getLocations()); System.out.println("Core row type: " + arch.getCore().getRowType()); System.out.println("Core identifier column: " + arch.getCore().getId().getIndex()); List<DwcTerm> terms = new ArrayList<DwcTerm>(); terms.add(DwcTerm.scientificName); terms.add(DwcTerm.taxonRank); terms.add(DwcTerm.parentNameUsageID); terms.add(DwcTerm.acceptedNameUsageID); terms.add(DwcTerm.taxonomicStatus); terms.add(DwcTerm.nomenclaturalStatus); terms.add(DwcTerm.kingdom); terms.add(DwcTerm.family); for (DwcTerm t : terms) { if (!arch.getCore().hasTerm(t)) { System.out.println("Cannot locate term " + t); } } System.out.println("Number of extensions " + arch.getExtensions().size()); // count records int i = 0; for (StarRecord rec : arch) { i++; } System.out.println("Archive contains " + i + " core records."); // show some records i = 0; if (maxNumbers > 0) { for (StarRecord rec : arch) { i++; if (i > offset + maxNumbers) { break; } if (offset < i) { System.out.println("record " + i + ": " + rec); for (Record erec : rec) { System.out.println(" : " + erec); } } } } } }