/*
* To change this license header, choose License Headers in Project Properties.
* To change this template file, choose Tools | Templates
* and open the template in the editor.
*/
package edu.harvard.iq.dataverse.ingest.metadataextraction.impl.plugins.fits;
import edu.harvard.iq.dataverse.ingest.metadataextraction.*;
import edu.harvard.iq.dataverse.ingest.metadataextraction.spi.*;
import java.io.BufferedInputStream;
import java.io.FileInputStream;
import java.io.BufferedReader;
import java.io.InputStreamReader;
import java.io.IOException;
import java.io.File;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.Map;
import java.util.HashMap;
import java.util.Set;
import java.util.HashSet;
import java.util.List;
import java.util.ArrayList;
import java.util.Calendar;
import java.util.Date;
import java.util.Properties;
import java.util.logging.Logger;
import nom.tam.fits.BasicHDU;
import nom.tam.fits.Fits;
import nom.tam.fits.FitsException;
import nom.tam.fits.Header;
import nom.tam.fits.HeaderCard;
import nom.tam.fits.ImageHDU;
import nom.tam.fits.TableHDU;
import nom.tam.fits.UndefinedHDU;
/**
*
* @author Leonid Andreev
*/
public class FITSFileMetadataExtractor extends FileMetadataExtractor {
private static Logger dbgLog = Logger.getLogger(FITSFileMetadataExtractor.class.getPackage().getName());
private Map<String, Integer> recognizedFitsMetadataKeys = null;
// the integer value in the map is reserved for the type of the metadata
// keyword and configuration options.
private Map<String, Integer> recognizedFitsColumnKeys = null;
// these are the column-level metadata keys; these are defined as XXXXn in
// the "FITS Standard, Appendix C" document; for example, "TTYPEn", meaning
// that the Header section of the table HDU will contain the keys TTYPE1,
// TTYPE2, ... TTYPEN - where N is the number of columns.
private Map<String, String> indexableFitsMetaKeys = null;
// This map defines the names of the keys under which they will be indexed
// and made searchable in the application
private static final Map<String, Integer> defaultRecognizedFitsMetadataKeys = new HashMap<String, Integer>();
// the integer value in the map is reserved for the type of the metadata
// keyword; it's not being used as of now.
private static final Map<String, Integer> defaultRecognizedFitsColumnKeys = new HashMap<String, Integer>();
// these are the column-level metadata keys; these are defined as XXXXn in
// the "FITS Standard, Appendix C" document; for example, "TTYPEn", meaning
// that the Header section of the table HDU will contain the keys TTYPE1,
// TTYPE2, ... TTYPEN - where N is the number of columns.
private static final Map<String, String> defaultIndexableFitsMetaKeys = new HashMap<String, String>();
// This map defines the names of the keys under which they will be indexed
// and made searchable in the application
private static final String CONFIG_TOKEN_META_KEY = "RECOGNIZED_META_KEY";
private static final String CONFIG_TOKEN_COLUMN_KEY = "RECOGNIZED_COLUMN_KEY";
private static final String ASTROPHYSICS_BLOCK_NAME = "astrophysics";
private static final int FIELD_TYPE_TEXT = 0;
private static final int FIELD_TYPE_DATE = 1;
private static final int FIELD_TYPE_FLOAT = 2;
private static final String ATTRIBUTE_TYPE = "astroType";
private static final String ATTRIBUTE_FACILITY = "astroFacility";
private static final String ATTRIBUTE_INSTRUMENT = "astroInstrument";
private static final String ATTRIBUTE_OBJECT = "astroObject";
private static final String ATTRIBUTE_START_TIME = "coverage.Temporal.StartTime";
private static final String ATTRIBUTE_STOP_TIME = "coverage.Temporal.StopTime";
private static final String ATTRIBUTE_COVERAGE_SPATIAL = "coverage.Spatial";
static {
dbgLog.fine("FITS plugin: loading the default configuration values;");
// The following fields have been dropped from the configuration
// map, not because we are not interested in them anymore - but
// because they are now *mandatory*, i.e. non-configurable.
// We will be checking for the "telescope", "instrument", etc.
// fields on all files and HDUs:
// -- 4.0 beta
//defaultRecognizedFitsMetadataKeys.put("TELESCOP", 0);
//defaultRecognizedFitsMetadataKeys.put("INSTRUME", 0);
//defaultRecognizedFitsMetadataKeys.put("NAXIS", 0);
//defaultRecognizedFitsMetadataKeys.put("DATE-OBS", FIELD_TYPE_DATE);
// both coverage.Temporal.StartTime and .EndTime are derived from
// the DATE-OBS values; extra rules apply (coded further down)
//defaultRecognizedFitsMetadataKeys.put("OBJECT", FIELD_TYPE_TEXT);
//defaultRecognizedFitsMetadataKeys.put("CRVAL1", FIELD_TYPE_TEXT);
//defaultRecognizedFitsMetadataKeys.put("CRVAL2", FIELD_TYPE_TEXT);
//defaultRecognizedFitsMetadataKeys.put("EXPTIME", FIELD_TYPE_DATE);
//defaultIndexableFitsMetaKeys.put("DATE-OBS", "coverage.Temporal.StartTime");
//defaultIndexableFitsMetaKeys.put("DATE-OBS", "coverage.Temporal.StopTime");
//defaultIndexableFitsMetaKeys.put("NAXIS", "naxis");
//defaultIndexableFitsMetaKeys.put("OBJECT", "astroObject");
//defaultIndexableFitsMetaKeys.put("CRVAL1", "coverage.Spatial");
//defaultIndexableFitsMetaKeys.put("CRVAL2", "coverage.Spatial");
// Optional, configurable fields:
defaultRecognizedFitsMetadataKeys.put("FILTER", FIELD_TYPE_TEXT);
defaultRecognizedFitsMetadataKeys.put("CD1_1", FIELD_TYPE_FLOAT);
defaultRecognizedFitsMetadataKeys.put("CDELT", FIELD_TYPE_FLOAT);
// And the mapping to the corresponding values in the
// metadata block:
// (per 4.0 beta implementation, the names below must match
// the names of the fields in the corresponding metadata block!)
defaultIndexableFitsMetaKeys.put("TELESCOP", ATTRIBUTE_FACILITY);
defaultIndexableFitsMetaKeys.put("INSTRUME", ATTRIBUTE_INSTRUMENT);
defaultIndexableFitsMetaKeys.put("FILTER", "coverage.Spectral.Bandpass");
defaultIndexableFitsMetaKeys.put("CD1_1", "resolution.Spatial");
defaultIndexableFitsMetaKeys.put("CDELT", "resolution.Spatial");
defaultIndexableFitsMetaKeys.put("EXPTIME", "resolution.Temporal");
// The following fields have been dropped from the configuration
// in 4.0 beta because we are not interested in them
// any longer:
//defaultRecognizedFitsMetadataKeys.put("EQUINOX", 0);
//defaultIndexableFitsMetaKeys.put("EQUINOX", "Equinox");
//defaultRecognizedFitsMetadataKeys.put("DATE", 0);
//defaultRecognizedFitsMetadataKeys.put("ORIGIN", 0);
//defaultRecognizedFitsMetadataKeys.put("AUTHOR", 0);
//defaultRecognizedFitsMetadataKeys.put("REFERENC", 0);
//defaultRecognizedFitsMetadataKeys.put("COMMENT", 0);
//defaultRecognizedFitsMetadataKeys.put("HISTORY", 0);
//defaultRecognizedFitsMetadataKeys.put("OBSERVER", 0);
//defaultRecognizedFitsMetadataKeys.put("EXTNAME", 0);
//defaultRecognizedFitsColumnKeys.put("TTYPE", 1);
//defaultRecognizedFitsColumnKeys.put("TCOMM", 0);
//defaultRecognizedFitsColumnKeys.put("TUCD", 0);
//defaultRecognizedFitsMetadataKeys.put("CUNIT", 0);
//defaultIndexableFitsMetaKeys.put("DATE", "Date");
//defaultIndexableFitsMetaKeys.put("ORIGIN", "Origin");
//defaultIndexableFitsMetaKeys.put("AUTHOR", "Author");
//defaultIndexableFitsMetaKeys.put("REFERENC", "Reference");
//defaultIndexableFitsMetaKeys.put("COMMENT", "Comment");
//defaultIndexableFitsMetaKeys.put("HISTORY", "History");
//defaultIndexableFitsMetaKeys.put("OBSERVER", "Observer");
//defaultIndexableFitsMetaKeys.put("EXTNAME", "Extension-Name");
//defaultIndexableFitsMetaKeys.put("TTYPE", "Column-Label");
//defaultIndexableFitsMetaKeys.put("TCOMM", "Column-Comment");
//defaultIndexableFitsMetaKeys.put("TUCD", "Column-UCD");
//defaultIndexableFitsMetaKeys.put("CUNIT", "cunit");
}
//private static final String METADATA_SUMMARY = "FILE_METADATA_SUMMARY_INFO";
//private static final String OPTION_PREFIX_SEARCHABLE = "PREFIXSEARCH";
private static final String HDU_TYPE_IMAGE = "Image";
private static final String HDU_TYPE_IMAGE_CUBE = "Cube";
private static final String HDU_TYPE_TABLE = "Table";
private static final String HDU_TYPE_UNDEF = "Undefined";
private static final String HDU_TYPE_UNKNOWN = "Unknown";
private static final String FILE_TYPE_IMAGE = "Image";
private static final String FILE_TYPE_MOSAIC = "Mosaic";
private static final String FILE_TYPE_CUBE = "Cube";
private static final String FILE_TYPE_TABLE = "Table";
private static final String FILE_TYPE_SPECTRUM = "Spectrum";
// Recognized date formats, for extracting temporal values:
private static SimpleDateFormat[] DATE_FORMATS = new SimpleDateFormat[] {
new SimpleDateFormat("yyyy-MM-dd"),
new SimpleDateFormat("dd-MM-yy")
};
private static SimpleDateFormat[] TIME_FORMATS = new SimpleDateFormat[] {
new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSS"),
new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss")
};
/**
* Constructs a <code>FITSFileMetadataExtractor</code> instance with a
* <code>FITSFileMetadataExtractorSpi</code> object.
*
* @param originator a <code>FITSFileMetadataExtractorSpi</code> object.
*/
public FITSFileMetadataExtractor(FileMetadataExtractorSpi originator) {
super(originator);
}
public FITSFileMetadataExtractor() {
super(null);
}
public FileMetadataIngest ingest (BufferedInputStream stream) throws IOException{
dbgLog.fine("Attempting to read FITS file;");
Map<String, Set<String>> fitsMetaMap = new HashMap<>();
Date minDate = null;
Date maxDate = null;
String startObsTime = "";
String stopObsTime = "";
FileMetadataIngest ingest = new FileMetadataIngest();
ingest.setMetadataBlockName(ASTROPHYSICS_BLOCK_NAME);
Fits fitsFile = null;
try {
fitsFile = new Fits (stream);
} catch (FitsException fEx) {
throw new IOException ("Failed to open FITS stream; "+fEx.getMessage());
}
if (fitsFile == null) {
throw new IOException ("Failed to open FITS stream; null Fits object");
}
readConfig();
BasicHDU hdu = null;
int i = 0;
int nTableHDUs = 0;
int nImageHDUs = 0;
int nUndefHDUs = 0;
int nAxis = 0;
Set<String> metadataKeys = new HashSet<String>();
Set<String> columnKeys = new HashSet<String>();
List<String> hduTypes = new ArrayList<String>();
List<String> hduNames = new ArrayList<String>();
try {
fitsMetaMap.put(ATTRIBUTE_TYPE, new HashSet<String>());
while ((hdu = fitsFile.readHDU()) != null) {
dbgLog.fine("reading HDU number " + i);
hduNames.add("[UNNAMED]");
Header hduHeader = hdu.getHeader();
if (hdu instanceof ImageHDU) {
dbgLog.fine("this is an image HDU");
nAxis = hduHeader.getIntValue("NAXIS");
dbgLog.fine("NAXIS (directly from header): "+nAxis);
if (nAxis > 0) {
metadataKeys.add("NAXIS");
if (nAxis > 1) {
nImageHDUs++;
if (nAxis > 2) {
hduTypes.add(HDU_TYPE_IMAGE_CUBE);
} else {
// Check for type Spectrum:
hduTypes.add(HDU_TYPE_IMAGE);
}
}
} else {
hduTypes.add(HDU_TYPE_UNKNOWN);
}
} else if (hdu instanceof TableHDU) {
dbgLog.fine("this is a table HDU");
nTableHDUs++;
hduTypes.add(HDU_TYPE_TABLE);
} else if (hdu instanceof UndefinedHDU) {
dbgLog.fine("this is an undefined HDU");
nUndefHDUs++;
hduTypes.add(HDU_TYPE_UNDEF);
} else {
dbgLog.fine("this is an UKNOWN HDU");
hduTypes.add(HDU_TYPE_UNKNOWN);
}
i++;
// Standard HDU attributes that we always check:
if (fitsMetaMap.get(ATTRIBUTE_FACILITY) == null) {
String hduTelescope = hdu.getTelescope();
if (hduTelescope != null) {
fitsMetaMap.put(ATTRIBUTE_FACILITY, new HashSet<String>());
fitsMetaMap.get(ATTRIBUTE_FACILITY).add(hduTelescope);
metadataKeys.add("TELESCOP");
}
}
if (fitsMetaMap.get(ATTRIBUTE_INSTRUMENT) == null) {
String hduInstrument = hdu.getInstrument();
if (hduInstrument != null) {
fitsMetaMap.put(ATTRIBUTE_INSTRUMENT, new HashSet<String>());
fitsMetaMap.get(ATTRIBUTE_INSTRUMENT).add(hduInstrument);
metadataKeys.add("INSTRUME");
}
}
/*
* Spatial coordinates: we just use CRVAL1 and CRVAL2,
* X and Y coordinates of the center pixel, if available:
*/
double crval1Float = hduHeader.getDoubleValue("CRVAL1");
double crval2Float = hduHeader.getDoubleValue("CRVAL2");
dbgLog.fine("CRVAL1: "+crval1Float);
dbgLog.fine("CRVAL2: "+crval2Float);
if (crval1Float != 0.0 || crval2Float != 0.0) {
if (fitsMetaMap.get(ATTRIBUTE_COVERAGE_SPATIAL) == null) {
fitsMetaMap.put(ATTRIBUTE_COVERAGE_SPATIAL, new HashSet<String>());
}
fitsMetaMap.get(ATTRIBUTE_COVERAGE_SPATIAL).add("("+crval1Float+" "+crval2Float+")");
metadataKeys.add("CRVAL1");
metadataKeys.add("CRVAL2");
}
/*
* Special treatment for the OBJECT value:
*/
String objectString = hduHeader.getStringValue("OBJECT");
if (objectString != null && !objectString.equals("")) {
metadataKeys.add("OBJECT");
} else {
objectString = hduHeader.getStringValue("TARGNAME");
if (objectString != null && !objectString.equals("")) {
metadataKeys.add("TARGNAME");
}
}
if (objectString != null && !objectString.equals("")) {
if (fitsMetaMap.get(ATTRIBUTE_OBJECT) == null) {
fitsMetaMap.put(ATTRIBUTE_OBJECT, new HashSet<String>());
}
fitsMetaMap.get(ATTRIBUTE_OBJECT).add(objectString);
}
/*
* Let's try to determine the start and end date/time for this
* HDU. HDUs can have their own, differend end and start times;
* for the start time of the whole file we'll select the min.
* of the individual HDU start times, and the max. for end time.
*/
// The standard header key is "DATE-OBS" - but all these
// hacky variants below are common too, so we'll go through
// them all:
String obsDateString = hduHeader.getStringValue("DATE-OBS");
if (obsDateString != null && !obsDateString.equals("")) {
metadataKeys.add("DATE-OBS");
} else {
obsDateString = hduHeader.getStringValue("DATE_OBS");
if (obsDateString != null && !obsDateString.equals("")) {
metadataKeys.add("DATE_OBS");
} else {
obsDateString = hduHeader.getStringValue("OBS-DATE");
if (obsDateString != null && !obsDateString.equals("")) {
metadataKeys.add("OBS-DATE");
}
}
}
// TODO:
// see if it's easier to replace this with getObservationDate()
// on the HDU - ?
// DONE: No, getObservationDate() is bad news. all it does
// on the inside is
// return new FitsDate(myHeader.getStringValue("DATE-OBS")).toDate();
// -- which adds all the complications you'd expect, adding
// a time zone to the strings that didn't have any (shifting
// the value by a seemingly random number of hours), etc.
if (obsDateString != null) {
Date startDate = null;
Date endDate = null;
String startDateFormatted = null;
String endDateFormatted = null;
// We'll try to parse it, first as a full date-time string:
// replace all slashes with dashes:
obsDateString = obsDateString.replace('/', '-');
for (SimpleDateFormat format : TIME_FORMATS) {
format.setLenient(false);
try {
startDate = format.parse(obsDateString);
dbgLog.fine("Valid date string: " + obsDateString + ", format: " + format.toPattern() + ", resulting date: "+startDate+", formatted resulting date: "+TIME_FORMATS[0].format(startDate));
startDateFormatted = format.format(startDate);
//startDateFormatted = obsDateString;
break;
} catch (ParseException ex) {
startDate = null;
}
// Alternative method:
// We could truncate the string to the point where the parser
// stopped; e.g., if our format was yyyy-mm-dd and the
// string was "2014-05-07T14:52:01" we'll truncate the
// string to "2014-05-07".
/*
ParsePosition pos = new ParsePosition(0);
startDate = format.parse(obsDateString, pos);
if (startDate == null) {
continue;
}
if (pos.getIndex() != obsDateString.length()) {
obsDateString = obsDateString.substring(0, pos.getIndex());
}
dbgLog.fine("Valid date: " + obsDateString + ", format: " + format.toPattern());
break;
*/
}
// if that didn't work, we'll try parsing the string as a
// date only:
if (startDate == null) {
for (SimpleDateFormat format : DATE_FORMATS) {
format.setLenient(false);
try {
startDate = format.parse(obsDateString);
dbgLog.info("Valid date string: " + obsDateString + ", format: " + format.toPattern() + ", resulting date: "+startDate+", formatted resulting date: "+DATE_FORMATS[0].format(startDate));
//startDateFormatted = format.format(startDate);
startDateFormatted = DATE_FORMATS[0].format(startDate);
break;
} catch (ParseException ex) {
startDate = null;
}
}
// if that worked, let's see if we have the time value
// stored separately - in "TIME-OBS":
if (startDate != null) {
String obsTimeString = hduHeader.getStringValue("TIME-OBS");
Date startDateTime = null;
if (obsTimeString != null && !obsTimeString.equals("")) {
String newObsDateString = DATE_FORMATS[0].format(startDate) + "T" + obsTimeString;
for (SimpleDateFormat format : TIME_FORMATS) {
format.setLenient(false);
try {
startDateTime = format.parse(newObsDateString);
dbgLog.fine("Valid date obtained by combining obs date and time: " + newObsDateString + ", format: " + format.toPattern() + ", resulting date: "+startDateTime+", formatted resulting date: "+TIME_FORMATS[0].format(startDateTime));
//startDateFormatted = TIME_FORMATS[0].format(startDateTime);
//startDateFormatted = newObsDateString;
startDateFormatted = format.format(startDateTime);
break;
} catch (ParseException ex) {
startDateTime = null;
}
}
if (startDateTime != null) {
startDate = startDateTime;
}
}
}
}
if (startDate != null) {
dbgLog.fine("Let's try and calculate the end date...");
// Check if it's the min. start date value we've got so far:
if (minDate == null) {
minDate = startDate;
startObsTime = startDateFormatted;
} else if (startDate.before(minDate)) {
minDate = startDate;
startObsTime = startDateFormatted;
}
// Stop/end dates:
endDate = startDate;
endDateFormatted = startDateFormatted;
// Check if we have the EXPTIME stored, that would allow us
// to recalculate the end time:
// getDoubleValue isn't advertised to throw any exceptions;
// so I'm going to assume that it just returns 0
// if there's not such header value, or if the value
// is not a valid double. (their document does say that
// it returns 0.0 "if not found"; but what does it return
// if the value of the header is "foo"?")
double expTimeValue = hduHeader.getDoubleValue("EXPTIME");
if (expTimeValue != 0.0) {
long expTimeInMillis = (long) (expTimeValue * 1000);
dbgLog.fine("EXPTIME in MILLISECONDS: " + expTimeInMillis);
Calendar endDateCal = Calendar.getInstance();
endDateCal.setTime(endDate);
long endTimeInMillis = endDateCal.getTimeInMillis() + expTimeInMillis;
dbgLog.fine("END TIME in MILLISECONDS: " + endTimeInMillis);
endDateCal.setTimeInMillis(endTimeInMillis);
endDate = endDateCal.getTime();
if ((endTimeInMillis / 1000) * 1000 != endTimeInMillis) {
endDateFormatted = TIME_FORMATS[0].format(endDate);
} else {
endDateFormatted = TIME_FORMATS[1].format(endDate);
}
// While we are at it, we will also populate the
// Resolution.Temporal field, where EXPTIME
// maps in the Astro metadata block:
String indexableKeyExpTime = getIndexableMetaKey("EXPTIME");
if (fitsMetaMap.get(indexableKeyExpTime) == null) {
fitsMetaMap.put(indexableKeyExpTime, new HashSet<String>());
}
fitsMetaMap.get(indexableKeyExpTime).add(Double.toString(expTimeValue));
metadataKeys.add("EXPTIME");
}
// Check if it's the max. end date value so far:
if (maxDate == null) {
maxDate = endDate;
stopObsTime = endDateFormatted;
} else if (endDate.after(maxDate)) {
maxDate = endDate;
stopObsTime = endDateFormatted;
}
}
}
/* TODO:
* use the Axes values for determining if this is a spectrum:
*/
if (hdu.getAxes() != null) {
for (int j = 0; j < hdu.getAxes().length; j++) {
int nAxisN = hdu.getAxes()[j];
metadataKeys.add("NAXIS"+j);
dbgLog.fine("NAXIS"+j+" value: "+nAxisN);
}
} else {
dbgLog.fine("NULL Axes array.");
}
// Process individual header cards:
HeaderCard headerCard = null;
int j = 0;
while ((headerCard = hduHeader.nextCard()) != null) {
String headerKey = headerCard.getKey();
String headerValue = headerCard.getValue();
String headerComment = headerCard.getComment();
dbgLog.fine("Processing header key: "+headerKey);
dbgLog.fine("Value: "+headerValue);
boolean recognized = false;
if (headerKey != null) {
/*
if (i > 1 && headerKey.equals("EXTNAME")) {
hduNames.set(i-2, headerValue);
} */
if (isRecognizedKey(headerKey)) {
dbgLog.fine("recognized key: " + headerKey);
recognized = true;
metadataKeys.add(headerKey);
} /*else if (isRecognizedColumnKey(headerKey)) {
dbgLog.fine("recognized column key: " + headerKey);
recognized = true;
//columnKeys.add(getTrimmedColumnKey(headerKey));
columnKeys.add(headerKey);
}*/
}
if (recognized) {
String indexableKey =
getIndexableMetaKey(headerKey) != null ?
getIndexableMetaKey(headerKey) :
headerKey;
if (headerValue != null) {
dbgLog.fine("value: " + headerValue);
if (fitsMetaMap.get(indexableKey) == null) {
fitsMetaMap.put(indexableKey, new HashSet<String>());
}
// if the key is supposed to be a FLOAT, we only
// want to process it if the value actually validates
// as a float:
// TODO: make sure all other values that may be
// be expected to parse as certain formats/types are
// also validated!
// -- L.A. 4.0 beta
if (isRecognizedFloatKey(headerKey)) {
try {
Double.parseDouble(headerValue);
fitsMetaMap.get(indexableKey).add(headerValue);
} catch (Exception e) {}
} else {
fitsMetaMap.get(indexableKey).add(headerValue);
}
} else if (headerKey.equals("COMMENT") && headerComment != null) {
dbgLog.fine("comment: " + headerComment);
if (fitsMetaMap.get(indexableKey) == null) {
fitsMetaMap.put(indexableKey, new HashSet<String>());
}
fitsMetaMap.get(indexableKey).add(headerComment);
} else {
dbgLog.fine("value is null");
}
}
j++;
}
dbgLog.fine ("processed "+j+" cards total;");
// not sure this is legit: hdu.skipData(fitsFile.getStream());
// the following is legit, but seemingly unnecessary:
// Data fitsData = hdu.getData();
// dbgLog.info ("data size: "+fitsData.getSize());
// TODO: confirm memory use implications of reading the Data
// section vs. skipping it explicitly vs. not doing anything. :)
// -- L.A. June 1 2014.
dbgLog.fine("total size of the HDU is "+hdu.getSize());
}
} catch (FitsException fEx) {
throw new IOException("Failed to read HDU number " + i);
}
dbgLog.fine ("processed "+i+" HDUs total;");
int n = fitsFile.getNumberOfHDUs();
if (n != i) {
dbgLog.fine("WARNING: mismatch between the number of cards processed and reported!");
}
dbgLog.fine("Total (current) number of HDUs: "+n);
// Make final decisions on the "type(s)" of the file we have just
// processed:
String imageFileType = determineImageFileType (nImageHDUs, hduTypes);
if (imageFileType != null) {
fitsMetaMap.get(ATTRIBUTE_TYPE).add(imageFileType);
}
if (fitsMetaMap.get(ATTRIBUTE_TYPE).isEmpty()) {
String tableFileType = determineTableFileType (nTableHDUs, hduTypes);
if (tableFileType != null) {
fitsMetaMap.get(ATTRIBUTE_TYPE).add(tableFileType);
}
}
if (n == 1 && fitsMetaMap.get(ATTRIBUTE_TYPE).isEmpty()) {
// If there's only 1 (primary) HDU in the file, we'll make sure
// the file type is set to (at least) "image" - even if we skipped
// that HDU because it looked empty:
fitsMetaMap.get(ATTRIBUTE_TYPE).add(FILE_TYPE_IMAGE);
}
// Final post-processing.
// Some values are derived from the collected fields
// (for example, the coverage.temporal.StopTime is the min.
// of all the collected OBS-DATE values).
// Specific rules are applied below:
// start time and and stop time:
if (!startObsTime.equals("")) {
fitsMetaMap.put(ATTRIBUTE_START_TIME, new HashSet<String>());
fitsMetaMap.get(ATTRIBUTE_START_TIME).add(startObsTime);
}
if (!stopObsTime.equals("")) {
fitsMetaMap.put(ATTRIBUTE_STOP_TIME, new HashSet<String>());
fitsMetaMap.get(ATTRIBUTE_STOP_TIME).add(stopObsTime);
}
// TODO:
// Numeric fields should also be validated!
// -- L.A. 4.0 beta
String metadataSummary = createMetadataSummary (n, nTableHDUs, nImageHDUs, nUndefHDUs, metadataKeys); //, columnKeys, hduNames, fitsMetaMap.get("Column-Label"));
ingest.setMetadataMap(fitsMetaMap);
ingest.setMetadataSummary(metadataSummary);
//return fitsMetaMap;
return ingest;
}
private void readConfig () {
// Initialize the field configuration.
// We'll attempt to read the configuration file in the domain config
// directory. If not available, we'll use some hard-coded default values.
Properties p = System.getProperties();
String domainRoot = p.getProperty("com.sun.aas.instanceRoot");
dbgLog.fine("PROPERTY: com.sun.aas.instanceRoot="+domainRoot);
if (domainRoot == null || domainRoot.equals("")) {
domainRoot = "/usr/local/glassfish4/glassfish/domains/domain1";
}
int nConfiguredKeys = 0;
if (domainRoot != null && !(domainRoot.equals(""))) {
String configFileName = domainRoot + "/config/fits.conf_DONOTREAD";
File configFile = new File (configFileName);
BufferedReader configFileReader = null;
boolean success = true;
dbgLog.fine("FITS plugin: checking for the config file: "+configFileName);
if (configFile.exists()) {
recognizedFitsMetadataKeys = new HashMap<String, Integer>();
recognizedFitsColumnKeys = new HashMap<String, Integer>();
indexableFitsMetaKeys = new HashMap<String, String>();
String line;
try {
dbgLog.fine("FITS plugin: attempting to read the config file: "+configFileName);
configFileReader = new BufferedReader(new InputStreamReader(new FileInputStream(configFile)));
while ((line = configFileReader.readLine()) != null) {
// lines that start with "#" are comments;
// we skip them.
if (line.indexOf('#') != 0) {
String[] configTokens = line.split("\t", -2);
if (configTokens == null || configTokens.length < 2) {
continue;
}
if (configTokens[0].equalsIgnoreCase(CONFIG_TOKEN_META_KEY)) {
if (configTokens[1] != null
&& !(configTokens[1].equals(""))) {
dbgLog.fine("FITS plugin: found metadata key config entry for " +
configTokens[1]);
recognizedFitsMetadataKeys.put(configTokens[1], 0);
if (configTokens.length > 2 && configTokens[2] != null
&& !(configTokens[2].equals(""))) {
indexableFitsMetaKeys.put(configTokens[1], configTokens[2]);
} else {
dbgLog.fine("FITS plugin: (warning) no index name specified for "+configTokens[1]);
indexableFitsMetaKeys.put(configTokens[1], configTokens[1]);
}
// Extra field options:
// (the only option currently supported is prefix-steam searching
// on the field)
/*
if (configTokens.length > 3 && configTokens[3] != null) {
if (configTokens[3].equalsIgnoreCase(OPTION_PREFIX_SEARCHABLE)) {
recognizedFitsMetadataKeys.put(configTokens[1], 1);
}
}
*/
nConfiguredKeys++;
} else {
dbgLog.warning("FITS plugin: empty (or malformed) meta key entry in the config file.");
}
} else if (configTokens[0].equalsIgnoreCase(CONFIG_TOKEN_COLUMN_KEY)) {
if (configTokens[1] != null
&& !(configTokens[1].equals(""))) {
dbgLog.fine("FITS plugin: found column key config entry for " +
configTokens[1]);
recognizedFitsColumnKeys.put(configTokens[1], 0);
if (configTokens.length > 2 && configTokens[2] != null
&& !(configTokens[2].equals(""))) {
indexableFitsMetaKeys.put(configTokens[1], configTokens[2]);
} else {
dbgLog.fine("FITS plugin: (warning) no index name specified for "+configTokens[1]);
indexableFitsMetaKeys.put(configTokens[1], configTokens[1]);
}
// Extra field options:
/*
if (configTokens.length > 3 && configTokens[3] != null) {
if (configTokens[3].equalsIgnoreCase(OPTION_PREFIX_SEARCHABLE)) {
recognizedFitsColumnKeys.put(configTokens[1], 1);
}
} */
nConfiguredKeys++;
} else {
dbgLog.warning("FITS plugin: empty (or malformed) column key entry in the config file.");
}
}
}
}
if (nConfiguredKeys == 0) {
dbgLog.warning("FITS plugin: parsed the config file successfully; " +
"but no metadata fields found. will proceed with the " +
"default configuration.");
}
} catch (IOException ioex) {
dbgLog.warning("FITS plugin: Caught an exception reading "
+ "the configuration file; will proceed with the "
+ "default configuration.");
success = false;
// We may have already read some values from the config
// file, before the exception was encountered. We will
// now resort to using the hard-coded, default
// configuration. What we don't want to happen is end up
// with a mix of that hard-coded config, and whatever
// partial configuration we may have read. So we
// need to clear the configuration maps now:
nConfiguredKeys=0;
} finally {
try {
configFileReader.close();
} catch (Exception e) {
}
}
} else {
dbgLog.fine("FITS plugin: no config file; will proceed with "
+ "the default configurtion.");
}
} else {
dbgLog.warning("FITS plugin: could not find domain room property. "+
"(default configuration will be used)");
}
// If no config file/no keys in the config file, this is the default
// configuration we'll be using:
if (nConfiguredKeys == 0) {
recognizedFitsMetadataKeys = defaultRecognizedFitsMetadataKeys;
recognizedFitsColumnKeys = defaultRecognizedFitsColumnKeys;
indexableFitsMetaKeys = defaultIndexableFitsMetaKeys;
}
}
private String determineImageFileType (int nImageHDUs, List<String> hduTypes) {
if (nImageHDUs > 0) {
// At least one HDU is an image; so the whole file gets to be typed
// as image - unless it qualifies as one of the Image sub-types:
for (int j = 0; j < hduTypes.size(); j++) {
if (hduTypes.get(j).equals(HDU_TYPE_IMAGE_CUBE)) {
return FILE_TYPE_CUBE;
}
}
if (nImageHDUs > 1) {
return FILE_TYPE_MOSAIC;
}
return FILE_TYPE_IMAGE;
}
return null;
}
private String determineTableFileType (int nTableHDUs, List<String> hduTypes) {
if (nTableHDUs > 0) {
return FILE_TYPE_TABLE;
}
return null;
}
private boolean isRecognizedKey (String key) {
if (recognizedFitsMetadataKeys.containsKey(key)) {
return true;
}
return false;
}
private boolean isRecognizedFloatKey (String key) {
if (recognizedFitsMetadataKeys.containsKey(key) && recognizedFitsMetadataKeys.get(key).intValue() == FIELD_TYPE_FLOAT) {
return true;
}
return false;
}
private boolean isRecognizedColumnKey (String key) {
if (key.matches(".*[0-9]$")) {
String trimmedKey = getTrimmedColumnKey(key);
if (recognizedFitsColumnKeys.containsKey(trimmedKey)) {
return true;
}
}
return false;
}
private String getIndexableMetaKey (String key) {
String indexableKey = null;
if (isRecognizedKey(key)) {
indexableKey = indexableFitsMetaKeys.get(key);
} else if (isRecognizedColumnKey(key)) {
indexableKey = indexableFitsMetaKeys.get(getTrimmedColumnKey(key));
}
return indexableKey;
}
private String getTrimmedColumnKey (String key) {
if (key != null) {
return key.replaceFirst("[0-9][0-9]*$", "");
}
return null;
}
private String createMetadataSummary (int nHDU, int nTableHDUs, int nImageHDUs, int nUndefHDUs, Set<String> metadataKeys) { //, Set<String> columnKeys, List<String> hduNames, Set<String> columnNames) {
String summary = "";
if (nHDU > 1) {
summary = "FITS file, "+nHDU+" HDUs total:\n";
summary = summary.concat("The primary HDU; ");
if (nTableHDUs > 0) {
summary = summary.concat(nTableHDUs + " Table HDU(s) ");
//summary = summary.concat("(column names: "+StringUtils.join(columnNames, ", ")+"); ");
}
if (nImageHDUs > 0) {
summary = summary.concat(nImageHDUs + " Image HDU(s); ");
}
if (nUndefHDUs > 0) {
summary = summary.concat(nUndefHDUs + " undefined HDU(s); ");
}
summary = summary.concat("\n");
//summary = summary.concat("HDU names: "+StringUtils.join(hduNames, ", ")+"; ");
} else {
summary = "This is a FITS file with 1 (primary) HDU.\n";
}
if (metadataKeys != null && metadataKeys.size() > 0) {
summary = summary.concat ("The following recognized metadata keys " +
"have been found in the FITS file:\n");
for (String key : metadataKeys) {
summary = summary.concat(key+"; ");
}
summary=summary.concat("\n");
}
/*
* Per feedback from Gus: it's not necessary to list the column keys.
*
if (columnKeys != null && columnKeys.size() > 0) {
summary = summary.concat ("In addition, the following column keys "+
"have been found in the table HDUs: \n");
for (String key : columnKeys) {
summary = summary.concat(key+"; ");
}
summary=summary.concat("\n");
}
*/
return summary;
}
private int typeCount (List<String> typeList, String typeToken) {
if (typeToken == null || typeToken.equals("")) {
return 0;
}
int count = 0;
if (typeList != null) {
for (int i = 0; i<typeList.size(); i++) {
if (typeToken.equals(typeList.get(i))) {
count++;
}
}
}
return count;
}
@Override
public String getFormatName() throws IOException {
if (originatingProvider != null) {
return originatingProvider.getFormatNames()[0];
}
return "fits";
}
/**
* main() method, for testing
* usage: java edu.harvard.iq.dataverse.ingest.metadataextraction.impl.plugins.fits.FITSFileMetadataExtractor testfile.fits
* make sure the CLASSPATH contains fits.jar.
*
*/
public static void main(String[] args) {
BufferedInputStream fitsStream = null;
String fitsFile = args[0];
FileMetadataIngest fitsIngest = null;
Map<String, Set<String>> fitsMetadata = null;
try {
fitsStream = new BufferedInputStream(new FileInputStream(fitsFile));
FITSFileMetadataExtractor fitsIngester = new FITSFileMetadataExtractor();
fitsIngest = fitsIngester.ingest(fitsStream);
fitsMetadata = fitsIngest.getMetadataMap();
} catch (IOException ex) {
System.out.println(ex.getMessage());
}
for (String mKey : fitsMetadata.keySet()) {
//if (mKey.equals(METADATA_SUMMARY)) {
// continue;
//}
Set<String> mValues = fitsMetadata.get(mKey);
System.out.println("key: " + mKey);
if (mValues != null) {
for (String mValue : mValues) {
if (mValue != null) {
System.out.println("value: " + mValue);
} else {
System.out.println("value is null");
}
}
}
}
if (fitsIngest.getMetadataSummary() != null) {
System.out.println("\nFITS Metadata summary: \n"+fitsIngest.getMetadataSummary());
}
}
}