/*
* EDUnits Copyright 2009, NOAA.
* See the LICENSE.txt file in this file's directory.
*/
package ucar.nc2.ogc.erddap.util;
import com.google.common.math.DoubleMath;
import java.io.IOException;
import java.io.InputStream;
import java.util.HashMap;
/**
* This class has static methods to convert units from one standard to another.
*/
public class ErddapEDUnits {
/**
* UDUNITS and UCUM support metric prefixes.
*/
public static String metricName[] = {
"yotta", "zetta", "exa", "peta", "tera",
"giga", "mega", "kilo", "hecto", "deka",
"deci", "centi", "milli", "micro", "nano",
"pico", "femto", "atto", "zepto", "yocto",
"µ",};
public static String metricAcronym[] = {
"Y", "Z", "E", "P", "T",
"G", "M", "k", "h", "da",
"d", "c", "m", "u", "n",
"p", "f", "a", "z", "y",
"u"};
public static int nMetric = metricName.length;
/**
* UCUM supports power-of-two prefixes, but UDUNITS doesn't.
*/
public static String twoAcronym[] = {
"Ki", "Mi", "Gi", "Ti"};
public static String twoValue[] = {
"1024", "1048576", "1073741824", "1.099511627776e12"};
public static int nTwo = twoAcronym.length;
private static final HashMap<String, String> udHashMap;
private static final HashMap<String, String> ucHashMap;
static {
try ( InputStream udunitsToUcumStream = ErddapEDUnits.class.getResourceAsStream("UdunitsToUcum.properties");
InputStream ucumToUdunitsStream = ErddapEDUnits.class.getResourceAsStream("UcumToUdunits.properties")) {
udHashMap = getHashMapStringString(udunitsToUcumStream, "ISO-8859-1");
ucHashMap = getHashMapStringString(ucumToUdunitsStream, "ISO-8859-1");
} catch (IOException e) {
throw new ExceptionInInitializerError(e);
}
}
/**
* This converts UDUnits to UCUM.
* <br>UDUnits: http://www.unidata.ucar.edu/software/udunits/udunits-1/etc/udunits.dat
* http://www.unidata.ucar.edu/software/udunits/udunits-2/udunits2.html
* I worked with v 2.1.9
* <br>UCUM: http://unitsofmeasure.org/ucum.html
* I worked with Version: 1.8, $Revision: 28894 $
* <p/>
* <p>UDUnits supports lots of aliases (short and long)
* and plurals (usually by adding 's').
* These all get reduced to UCUM's short and canonical-only units.
* <p/>
* <p>Notes:
* <ul>
* <li>This method is a strictly case sensitive.
* The only UDUnits that should be capitalized (other than acronyms) are
* Btu, Gregorian..., Julian..., PI.
* <br>The only UDUnits that may be capitalized are
* Celsius, Fahrenheit, Kelvin, Rankine.
* <li>For "10 to the", UCUM allows 10* or 10^. This method uses 10^.
* <li>NTU becomes {ntu}.
* <li>PSU or psu becomes {psu}.
* </ul>
* <p/>
* return the UDUnits converted to UCUM.
* null returns null. "" returns "".
* throws Exception if trouble.
*/
public static String udunitsToUcum(String udunits) {
if (udunits == null) {
return null;
}
//is it a point in time? e.g., seconds since 1970-01-01T00:00:00T
int sincePo = udunits.indexOf(" since ");
if (sincePo > 0) {
try {
//test if really appropriate
double baf[] = ErddapCalendar2.getTimeBaseAndFactor(udunits); //throws exception if trouble
//use 'factor', since it is more forgiving than udunitsToUcum converter
String u;
if (DoubleMath.fuzzyEquals(baf[1], 0.001, 1e-6)) { // Can't simply do "baf[1] == 0.001".
u = "ms";
} else if (baf[1] == 1) {
u = "s";
} else if (baf[1] == ErddapCalendar2.SECONDS_PER_MINUTE) {
u = "min";
} else if (baf[1] == ErddapCalendar2.SECONDS_PER_HOUR) {
u = "h";
} else if (baf[1] == ErddapCalendar2.SECONDS_PER_DAY) {
u = "d";
} else if (baf[1] == 30 * ErddapCalendar2.SECONDS_PER_DAY) { // mo_j ?
u = "mo";
} else if (baf[1] == 360 * ErddapCalendar2.SECONDS_PER_DAY) { // a_j ?
u = "a";
} else {
u = udunitsToUcum(udunits.substring(0, sincePo)); //shouldn't happen, but weeks? microsec?
}
//make "s{since 1970-01-01T00:00:00T}
return u + "{" + udunits.substring(sincePo + 1) + "}";
} catch (Exception e) {
}
}
//parse udunits and build ucum, till done
StringBuilder ucum = new StringBuilder();
int udLength = udunits.length();
int po = 0; //po is next position to be read
while (po < udLength) {
char ch = udunits.charAt(po);
//letter
if (isUdunitsLetter(ch)) { //includes 'µ' and '°'
//find contiguous letters|_|digit (no '-')
int po2 = po + 1;
while (po2 < udLength &&
(isUdunitsLetter(udunits.charAt(po2)) || udunits.charAt(po2) == '_' ||
ErddapString2.isDigit(udunits.charAt(po2)))) {
po2++;
}
String tUdunits = udunits.substring(po, po2);
po = po2;
//some udunits have internal digits, but none end in digits
//if it ends in digits, treat as exponent
//find contiguous digits at end
int firstDigit = tUdunits.length();
while (firstDigit >= 1 && ErddapString2.isDigit(tUdunits.charAt(firstDigit - 1))) {
firstDigit--;
}
String exponent = tUdunits.substring(firstDigit);
tUdunits = tUdunits.substring(0, firstDigit);
String tUcum = oneUdunitsToUcum(tUdunits);
//deal with PER -> /
if (tUcum.equals("/")) {
char lastUcum = ucum.length() == 0 ? '\u0000' : ucum.charAt(ucum.length() - 1);
if (lastUcum == '/') {
ucum.setCharAt(ucum.length() - 1, '.'); //2 '/' cancel out
} else if (lastUcum == '.') {
ucum.setCharAt(ucum.length() - 1, '/'); // '/' replaces '.'
} else {
ucum.append('/');
}
} else {
ucum.append(tUcum);
}
//add the exponent
ucum.append(exponent);
//catch -exponent as a number below
continue;
}
//number
if (ch == '-' || ErddapString2.isDigit(ch)) {
//find contiguous digits
int po2 = po + 1;
while (po2 < udLength && ErddapString2.isDigit(udunits.charAt(po2))) {
po2++;
}
//decimal place + digit (not just .=multiplication)
boolean hasDot = false;
if (po2 < udLength - 1 && udunits.charAt(po2) == '.' && ErddapString2.isDigit(udunits.charAt(po2 + 1))) {
hasDot = true;
po2 += 2;
while (po2 < udLength && ErddapString2.isDigit(udunits.charAt(po2))) {
po2++;
}
}
//exponent? e- or e{digit}
boolean hasE = false;
if (po2 < udLength - 1 && Character.toLowerCase(udunits.charAt(po2)) == 'e' &&
(udunits.charAt(po2 + 1) == '-' || ErddapString2.isDigit(udunits.charAt(po2 + 1)))) {
hasE = true;
po2 += 2;
while (po2 < udLength && ErddapString2.isDigit(udunits.charAt(po2))) {
po2++;
}
}
String num = udunits.substring(po, po2);
po = po2;
//convert floating point to rational number
if (hasDot || hasE) {
int rational[] = ErddapString2.toRational(ErddapString2.parseDouble(num));
if (rational[1] == Integer.MAX_VALUE) {
ucum.append(num); //ignore the trouble !!! ???
} else if (rational[1] == 0) //includes {0, 0}
{
ucum.append(rational[0]);
} else {
ucum.append(rational[0]).append(".10^").append(rational[1]);
}
} else {
//just copy num
ucum.append(num);
}
continue;
}
//space or . or · (183) (multiplication)
if (ch == ' ' || ch == '.' || ch == 183) {
char lastUcum = ucum.length() == 0 ? '\u0000' : ucum.charAt(ucum.length() - 1);
if (lastUcum == '/' || lastUcum == '.') {
//if last token was / or ., do nothing
} else {
ucum.append('.');
}
po++;
continue;
}
// * (multiplication * or exponent **)
if (ch == '*') {
po++;
if (po < udLength && udunits.charAt(po) == '*') {
ucum.append('^'); // exponent: ** -> ^
po++;
} else {
char lastUcum = ucum.length() == 0 ? '\u0000' : ucum.charAt(ucum.length() - 1);
if (lastUcum == '/' || lastUcum == '.') {
//if last token was / or ., do nothing
} else {
ucum.append('.');
}
}
continue;
}
// /
if (ch == '/') {
po++;
char lastUcum = ucum.length() == 0 ? '\u0000' : ucum.charAt(ucum.length() - 1);
if (lastUcum == '/') {
ucum.setCharAt(ucum.length() - 1, '.'); // 2 '/' cancel out
} else if (lastUcum == '.') {
ucum.setCharAt(ucum.length() - 1, '/'); // '/' replaces '.'
} else {
ucum.append('/');
}
continue;
}
// "
if (ch == '\"') {
po++;
ucum.append("''");
continue;
}
//otherwise, punctuation. copy it
ucum.append(ch);
po++;
}
return ucum.toString();
}
private static boolean isUdunitsLetter(char ch) {
return ErddapString2.isLetter(ch) || ch == 'µ' || ch == '°';
}
/**
* This converts one udunits term (perhaps with metric prefix(es)) to the corresponding ucum string.
* If udunits is just metric prefix(es), this returns the prefix acronym(s) with "{count}" as suffix
* (e.g., dkilo returns dk{count}).
* If this can't completely convert udunits, it returns the original udunits
* (e.g., kiloBobs remains kiloBobs (to avoid 'exact' becoming 'ect' ).
*/
private static String oneUdunitsToUcum(String udunits) {
//repeatedly pull off start of udunits and build ucum, till done
String oldUdunits = udunits;
StringBuilder ucum = new StringBuilder();
MAIN:
while (true) {
//try to find udunits in hashMap
String tUcum = udHashMap.get(udunits);
if (tUcum != null) {
//success! done!
ucum.append(tUcum);
return ucum.toString();
}
//try to separate out a metricName prefix (e.g., "kilo")
for (int p = 0; p < nMetric; p++) {
if (udunits.startsWith(metricName[p])) {
udunits = udunits.substring(metricName[p].length());
ucum.append(metricAcronym[p]);
if (udunits.length() == 0) {
ucum.append("{count}");
return ucum.toString();
}
continue MAIN;
}
}
//try to separate out a metricAcronym prefix (e.g., "k")
for (int p = 0; p < nMetric; p++) {
if (udunits.startsWith(metricAcronym[p])) {
udunits = udunits.substring(metricAcronym[p].length());
ucum.append(metricAcronym[p]);
if (udunits.length() == 0) {
ucum.append("{count}");
return ucum.toString();
}
continue MAIN;
}
}
return oldUdunits;
}
}
/**
* This converts UCUM to UDUnits.
* <br>UDUnits: http://www.unidata.ucar.edu/software/udunits/udunits-1/etc/udunits.dat
* http://www.unidata.ucar.edu/software/udunits/udunits-2/udunits2.html
* <br>UCUM: http://unitsofmeasure.org/ucum.html
* <p/>
* <p>UCUM tends to be short, canonical-only, and strict.
* Many UCUM units are the same in UDUnits.
* <p/>
* <p>UDUnits supports lots of aliases (short and long)
* and plurals (usually by adding 's').
* This tries to convert UCUM to a short, common UDUNIT units.
* <p/>
* <p>Problems:
* <ul>
* <li> UCUM has only "deg", no concept of degree_east|north|true|true.
* </ul>
* <p/>
* <p>Notes:
* <ul>
* <li>This method is a strictly case sensitive.
* <li>For "10 to the", UCUM allows 10* or 10^. This method uses 10^.
* <li>{ntu} becomes NTU.
* <li>{psu} becomes PSU.
* </ul>
* <p/>
* return the UCUM converted to UDUNITS.
* null returns null. "" returns "".
*/
public static String ucumToUdunits(String ucum) {
if (ucum == null) {
return null;
}
StringBuilder udunits = new StringBuilder();
int ucLength = ucum.length();
if (ucLength == 0) {
return "";
}
//is it a time point? e.g., s{since 1970-01-01T00:00:00T}
if (ucum.charAt(ucLength - 1) == '}' && //quick reject
ucum.indexOf('}') == ucLength - 1) { //reasonably quick reject
int sincePo = ucum.indexOf("{since ");
if (sincePo > 0) {
//is first part an atomic ucum unit?
String tUdunits = ucHashMap.get(ucum.substring(0, sincePo));
if (tUdunits != null) {
return tUdunits + " " + ucum.substring(sincePo + 1, ucLength - 1);
}
}
}
//parse ucum and build udunits, till done
int po = 0; //po is next position to be read
while (po < ucLength) {
char ch = ucum.charAt(po);
//letter
if (isUcumLetter(ch)) { //includes [, ], {, }, 'µ' and "'"
//find contiguous letters|_|digit (no '-')
int po2 = po + 1;
while (po2 < ucLength &&
(isUcumLetter(ucum.charAt(po2)) || ucum.charAt(po2) == '_' ||
ErddapString2.isDigit(ucum.charAt(po2)))) {
po2++;
}
String tUcum = ucum.substring(po, po2);
po = po2;
//some ucum have internal digits, but none end in digits
//if it ends in digits, treat as exponent
//find contiguous digits at end
int firstDigit = tUcum.length();
while (firstDigit >= 1 && ErddapString2.isDigit(tUcum.charAt(firstDigit - 1))) {
firstDigit--;
}
String exponent = tUcum.substring(firstDigit);
tUcum = tUcum.substring(0, firstDigit);
String tUdunits = oneUcumToUdunits(tUcum);
//deal with PER -> /
if (tUdunits.equals("/")) {
char lastUdunits = udunits.length() == 0 ? '\u0000' : udunits.charAt(udunits.length() - 1);
if (lastUdunits == '/') {
udunits.setCharAt(udunits.length() - 1, '.'); //2 '/' cancel out
} else if (lastUdunits == '.') {
udunits.setCharAt(udunits.length() - 1, '/'); // '/' replaces '.'
} else {
udunits.append('/');
}
} else {
udunits.append(tUdunits);
}
//add the exponent
udunits.append(exponent);
//catch -exponent as a number below
continue;
}
//number
if (ch == '-' || ErddapString2.isDigit(ch)) {
//find contiguous digits
int po2 = po + 1;
while (po2 < ucLength && ErddapString2.isDigit(ucum.charAt(po2))) {
po2++;
}
// ^- or ^{digit}
if (po2 < ucLength - 1 && Character.toLowerCase(ucum.charAt(po2)) == '^' &&
(ucum.charAt(po2 + 1) == '-' || ErddapString2.isDigit(ucum.charAt(po2 + 1)))) {
po2 += 2;
while (po2 < ucLength && ErddapString2.isDigit(ucum.charAt(po2))) {
po2++;
}
}
String num = ucum.substring(po, po2);
po = po2;
udunits.append(num);
continue;
}
// .
if (ch == '.') {
po++;
udunits.append(' '); // ' ' is more common than '.' in udunits
continue;
}
// *
if (ch == '*') {
po++;
udunits.append('^');
continue;
}
// ' ''
if (ch == '\'') {
po++;
if (po < ucLength && ucum.charAt(po) == '\'') {
udunits.append("arc_second");
po++;
} else {
udunits.append("arc_minute");
}
continue;
}
//otherwise, punctuation. copy it
// / (division), " doesn't occur,
udunits.append(ch);
po++;
}
return udunits.toString();
}
private static boolean isUcumLetter(char ch) {
return ErddapString2.isLetter(ch) ||
ch == '[' || ch == ']' ||
ch == '{' || ch == '}' ||
ch == 'µ' || ch == '\'';
}
/**
* This converts one ucum term (perhaps with metric prefix(es))
* ( to the corresponding udunits string.
* If ucum is just metric prefix(es), this returns the metric prefix
* acronym(s) with "{count}" as suffix (e.g., dkilo returns dk{count}).
* If this can't completely convert ucum, it returns the original ucum
* (e.g., kiloBobs remains kiloBobs (to avoid 'exact' becoming 'ect' ).
*/
private static String oneUcumToUdunits(String ucum) {
//repeatedly pull off start of ucum and build udunits, till done
String oldUcum = ucum;
StringBuilder udunits = new StringBuilder();
MAIN:
while (true) {
//try to find ucum in hashMap
String tUdunits = ucHashMap.get(ucum);
if (tUdunits != null) {
//success! done!
udunits.append(tUdunits);
return udunits.toString();
}
//try to separate out a metricAcronym prefix (e.g., "k")
for (int p = 0; p < nMetric; p++) {
if (ucum.startsWith(metricAcronym[p])) {
ucum = ucum.substring(metricAcronym[p].length());
udunits.append(metricAcronym[p]);
if (ucum.length() == 0) {
udunits.append("{count}");
return udunits.toString();
}
continue MAIN;
}
}
//try to separate out a twoAcronym prefix (e.g., "Ki")
for (int p = 0; p < nTwo; p++) {
if (ucum.startsWith(twoAcronym[p])) {
ucum = ucum.substring(twoAcronym[p].length());
char udch = udunits.length() > 0 ? udunits.charAt(udunits.length() - 1) : '\u0000';
if (udch != '\u0000' && udch != '.' && udch != '/') {
udunits.append('.');
}
if (ucum.length() == 0) {
udunits.append("{count}");
return udunits.toString();
}
udunits.append(twoValue[p]).append(".");
continue MAIN;
}
}
//ends in comment? try to just convert the beginning
int po1 = oldUcum.lastIndexOf('{');
if (po1 > 0 && oldUcum.endsWith("}")) {
return oneUcumToUdunits(oldUcum.substring(0, po1)) + oldUcum.substring(po1);
}
return oldUcum;
}
}
/**
* Reads the contents of {@code inputStream} into a HashMap. Each key-value pair in the input will result in an
* entry in the map.
* <p/>
* The specified stream remains open after this method returns.
*
* @param inputStream a stream with line-based, key-value pairs.
* @param charset the name of a supported {@link java.nio.charset.Charset charset}.
* @return a HashMap initialized from the stream.
* @throws java.io.IOException if an I/O error occurs
*/
public static HashMap<String, String> getHashMapStringString(InputStream inputStream, String charset)
throws IOException {
HashMap<String, String> ht = new HashMap<>();
ErddapStringArray sa = ErddapStringArray.fromInputStream(inputStream, charset);
int n = sa.size();
int i = 0;
while (i < n) {
String s = sa.get(i++);
if (s.startsWith("#")) {
continue;
}
while (i < n && s.endsWith("\\")) {
s = s.substring(0, s.length() - 1) + sa.get(i++);
}
int po = s.indexOf('=');
if (po < 0) {
continue;
}
//new String: so not linked to big source file's text
ht.put(s.substring(0, po).trim(), s.substring(po + 1).trim());
}
return ht;
}
}