/*
* Copyright 1999-2002 Carnegie Mellon University.
* Portions Copyright 2002 Sun Microsystems, Inc.
* Portions Copyright 2002 Mitsubishi Electric Research Laboratories.
* All Rights Reserved. Use is subject to license terms.
*
* See the file "license.terms" for information on usage and
* redistribution of this file, and for a DISCLAIMER OF ALL
* WARRANTIES.
*
*/
package edu.cmu.sphinx.frontend.util;
import edu.cmu.sphinx.frontend.DoubleData;
import edu.cmu.sphinx.frontend.FloatData;
import edu.cmu.sphinx.util.Utilities;
import javax.sound.sampled.*;
import java.io.FileOutputStream;
import java.io.IOException;
import java.text.DecimalFormat;
/** Defines utility methods for manipulating data values. */
public class DataUtil {
private static final int HEXADECIMAL = 1;
private static final int SCIENTIFIC = 2;
private static final int DECIMAL = 3;
/** DecimalFormat object to be used by all the methods. */
private static final DecimalFormat format = new DecimalFormat();
private static final int decimalIntegerDigits = 10;
private static final int decimalFractionDigits = 5;
private static final int floatScientificFractionDigits = 8;
private static final int doubleScientificFractionDigits = 8;
/** The number format to be used by *ArrayToString() methods. The default is scientific. */
private static int dumpFormat = SCIENTIFIC;
/**
* Static initialization of dumpFormat
*/
static {
String formatProperty = System.getProperty("frontend.util.dumpformat",
"SCIENTIFIC");
if (formatProperty.compareToIgnoreCase("DECIMAL") == 0) {
dumpFormat = DECIMAL;
} else if (formatProperty.compareToIgnoreCase("HEXADECIMAL") == 0) {
dumpFormat = HEXADECIMAL;
} else if (formatProperty.compareToIgnoreCase("SCIENTIFIC") == 0) {
dumpFormat = SCIENTIFIC;
}
}
/** Uninstantiable class. */
private DataUtil() {
}
/**
* Converts a byte array into a short array. Since a byte is 8-bits, and a short is 16-bits, the returned short
* array will be half in length than the byte array. If the length of the byte array is odd, the length of the short
* array will be <code>(byteArray.length - 1)/2</code>, i.e., the last byte is discarded.
*
* @param byteArray a byte array
* @param offset which byte to start from
* @param length how many bytes to convert
* @return a short array, or <code>null</code> if byteArray is of zero length
* @throws java.lang.ArrayIndexOutOfBoundsException if index goes out of bounds
*
*/
public static short[] byteToShortArray
(byte[] byteArray, int offset, int length)
throws ArrayIndexOutOfBoundsException {
if (0 < length && (offset + length) <= byteArray.length) {
int shortLength = length / 2;
short[] shortArray = new short[shortLength];
int temp;
for (int i = offset, j = 0; j < shortLength;
j++, temp = 0x00000000) {
temp = byteArray[i++] << 8;
temp |= 0x000000FF & byteArray[i++];
shortArray[j] = (short) temp;
}
return shortArray;
} else {
throw new ArrayIndexOutOfBoundsException
("offset: " + offset + ", length: " + length
+ ", array length: " + byteArray.length);
}
}
/**
* Converts a big-endian byte array into an array of doubles. Each consecutive bytes in the byte array are converted
* into a double, and becomes the next element in the double array. The size of the returned array is
* (length/bytesPerValue). Currently, only 1 byte (8-bit) or 2 bytes (16-bit) samples are supported.
*
* @param byteArray a byte array
* @param offset which byte to start from
* @param length how many bytes to convert
* @param bytesPerValue the number of bytes per value
* @param signedData whether the data is signed
* @return a double array, or <code>null</code> if byteArray is of zero length
* @throws java.lang.ArrayIndexOutOfBoundsException if index goes out of bounds
*
*/
public static double[] bytesToValues(byte[] byteArray,
int offset,
int length,
int bytesPerValue,
boolean signedData)
throws ArrayIndexOutOfBoundsException {
if (0 < length && (offset + length) <= byteArray.length) {
assert (length % bytesPerValue == 0);
double[] doubleArray = new double[length / bytesPerValue];
int i = offset;
for (int j = 0; j < doubleArray.length; j++) {
int val = byteArray[i++];
if (!signedData) {
val &= 0xff; // remove the sign extension
}
for (int c = 1; c < bytesPerValue; c++) {
int temp = byteArray[i++] & 0xff;
val = (val << 8) + temp;
}
doubleArray[j] = val;
}
return doubleArray;
} else {
throw new ArrayIndexOutOfBoundsException
("offset: " + offset + ", length: " + length
+ ", array length: " + byteArray.length);
}
}
/**
* Converts a little-endian byte array into an array of doubles. Each consecutive bytes of a float are converted
* into a double, and becomes the next element in the double array. The number of bytes in the double is specified
* as an argument. The size of the returned array is (data.length/bytesPerValue).
*
* @param data a byte array
* @param offset which byte to start from
* @param length how many bytes to convert
* @param bytesPerValue the number of bytes per value
* @param signedData whether the data is signed
* @return a double array, or <code>null</code> if byteArray is of zero length
* @throws java.lang.ArrayIndexOutOfBoundsException if index goes out of bounds
*
*/
public static double[] littleEndianBytesToValues(byte[] data,
int offset,
int length,
int bytesPerValue,
boolean signedData)
throws ArrayIndexOutOfBoundsException {
if (0 < length && (offset + length) <= data.length) {
assert (length % bytesPerValue == 0);
double[] doubleArray = new double[length / bytesPerValue];
int i = offset + bytesPerValue - 1;
for (int j = 0; j < doubleArray.length; j++) {
int val = data[i--];
if (!signedData) {
val &= 0xff; // remove the sign extension
}
for (int c = 1; c < bytesPerValue; c++) {
int temp = data[i--] & 0xff;
val = (val << 8) + temp;
}
// advance 'i' to the last byte of the next value
i += (bytesPerValue * 2);
doubleArray[j] = val;
}
return doubleArray;
} else {
throw new ArrayIndexOutOfBoundsException
("offset: " + offset + ", length: " + length
+ ", array length: " + data.length);
}
}
/**
* Convert the two bytes starting at the given offset to a short.
*
* @param byteArray the byte array
* @param offset where to start
* @return a short
* @throws java.lang.ArrayIndexOutOfBoundsException if index goes out of bounds
*
*/
public static short bytesToShort(byte[] byteArray, int offset)
throws ArrayIndexOutOfBoundsException {
short result = (short)
((byteArray[offset++] << 8) |
(0x000000FF & byteArray[offset]));
return result;
}
/**
* Returns the string representation of the given short array. The string will be in the form:
* <pre>data.length data[0] data[1] ... data[data.length-1]</pre>
*
* @param data the short array to convert
* @return a string representation of the short array
*/
public static String shortArrayToString(short[] data) {
StringBuilder dump = new StringBuilder().append(data.length);
for (short val : data) {
dump.append(' ').append(val);
}
return dump.toString();
}
/**
* Returns the given double array as a string. The string will be in the form:
* <pre>data.length data[0] data[1] ... data[data.length-1]</pre>where
* <code>data[i]</code>.
* <p>
* The doubles can be written as decimal, hexadecimal, or scientific notation. In decimal notation, it is formatted
* by the method <code>Util.formatDouble(data[i], 10, 5)</code>. Use the System property
* <code>"frontend.util.dumpformat"</code> to control the dump format (permitted values are "decimal",
* "hexadecimal", and "scientific".
*
* @param data the double array to dump
* @return a string representation of the double array
*/
public static String doubleArrayToString(double[] data) {
return doubleArrayToString(data, dumpFormat);
}
/**
* Returns the given double array as a string. The dump will be in the form:
* <pre>data.length data[0] data[1] ... data[data.length-1]</pre>where
* <code>data[i]</code> is formatted by the method <code>Util.formatDouble(data[i], 10, 5)</code>.
*
* @param data the double array to dump
* @param format either HEXADECIMAL, SCIENTIFIC or DECIMAL
* @return a string representation of the double array
*/
private static String doubleArrayToString(double[] data, int format) {
StringBuilder dump = new StringBuilder().append(data.length);
for (double val : data) {
if (format == DECIMAL) {
dump.append(' ').append(formatDouble
(val, decimalIntegerDigits,
decimalFractionDigits));
} else if (format == HEXADECIMAL) {
long binary = Double.doubleToRawLongBits(val);
dump.append(" 0x").append(Long.toHexString(binary));
} else if (format == SCIENTIFIC) {
dump.append(' ').append(Utilities.doubleToScientificString
(val, doubleScientificFractionDigits));
}
}
return dump.toString();
}
/**
* Returns the given float array as a string. The string is of the form:
* <pre>data.length data[0] data[1] ... data[data.length-1]</pre>
* <p>
* The floats can be written as decimal, hexadecimal, or scientific notation. In decimal notation, it is formatted
* by the method <code>Util.formatDouble(data[i], 10, 5)</code>. Use the System property
* <code>"frontend.util.dumpformat"</code> to control the dump format (permitted values are "decimal",
* "hexadecimal", and "scientific".
*
* @param data the float array to dump
* @return a string of the given float array
*/
public static String floatArrayToString(float[] data) {
return floatArrayToString(data, dumpFormat);
}
/**
* Returns the given float array as a string. The string is of the form:
* <pre>data.length data[0] data[1] ... data[data.length-1]</pre>
*
* @param data the float array to dump
* @param format either DECIMAL, HEXADECIMAL or SCIENTIFIC
* @return a string of the given float array
*/
private static String floatArrayToString(float[] data, int format) {
StringBuilder dump = new StringBuilder().append(data.length);
for (float val : data) {
if (format == DECIMAL) {
dump.append(' ').append(formatDouble
(val,
decimalIntegerDigits, decimalFractionDigits));
} else if (format == HEXADECIMAL) {
int binary = Float.floatToRawIntBits(val);
dump.append(" 0x").append(Integer.toHexString(binary));
} else if (format == SCIENTIFIC) {
dump.append(' ').append(Utilities.doubleToScientificString
(val, floatScientificFractionDigits));
}
}
return dump.toString();
}
/**
* Returns a formatted string of the given number, with the given numbers of digit space for the integer and
* fraction parts. If the integer part has less than <code>integerDigits</code> digits, spaces will be prepended to
* it. If the fraction part has less than <code>fractionDigits</code>, spaces will be appended to it. Therefore,
* <code>formatDouble(12345.6789, 6, 6)</code> will give
* the string <pre>" 12345.6789 "</pre> (one space before 1, two spaces
* after 9).
*
* @param number the number to format
* @param integerDigits the length of the integer part
* @param fractionDigits the length of the fraction part
* @return a formatted number
*/
public static String formatDouble(double number, int integerDigits,
int fractionDigits) {
StringBuilder formatter = new StringBuilder(2 + fractionDigits).append("0.");
for (int i = 0; i < fractionDigits; i++) {
formatter.append('0');
}
format.applyPattern(formatter.toString());
String formatted = format.format(number);
// pad preceding spaces before the number
int dotIndex = formatted.indexOf('.');
if (dotIndex == -1) {
formatted += ".";
dotIndex = formatted.length() - 1;
}
StringBuilder result = new StringBuilder();
for (int i = dotIndex; i < integerDigits; i++) {
result.append(' ');
}
result.append(formatted);
return result.toString();
}
/**
* Returns the number of samples per window given the sample rate (in Hertz) and window size (in milliseconds).
*
* @param sampleRate the sample rate in Hertz (i.e., frequency per seconds)
* @param windowSizeInMs the window size in milliseconds
* @return the number of samples per window
*/
public static int getSamplesPerWindow(int sampleRate,
float windowSizeInMs) {
return (int) (sampleRate * windowSizeInMs / 1000);
}
/**
* Returns the number of samples in a window shift given the sample rate (in Hertz) and the window shift (in
* milliseconds).
*
* @param sampleRate the sample rate in Hertz (i.e., frequency per seconds)
* @param windowShiftInMs the window shift in milliseconds
* @return the number of samples in a window shift
*/
public static int getSamplesPerShift(int sampleRate,
float windowShiftInMs) {
return (int) (sampleRate * windowShiftInMs / 1000);
}
/**
* Saves the given bytes to the given binary file.
*
* @param data the bytes to save
* @param filename the binary file name
* @throws IOException if an I/O error occurs
*/
public static void bytesToFile(byte[] data, String filename)
throws IOException {
FileOutputStream file = new FileOutputStream(filename);
file.write(data);
file.close();
}
/**
* Returns a native audio format that has the same encoding, endianness and sample size as the given format, and a
* sample rate that is larger than the given sample rate.
*
* @param format format for the data
* @return a suitable native audio format
*/
public static AudioFormat getNativeAudioFormat(AudioFormat format) {
return getNativeAudioFormat(format, null);
}
/**
* Returns a native audio format that has the same encoding, endianness and sample size as the given format, and a
* sample rate that is greater than or equal to the given sample rate.
*
* @param format the desired format
* @param mixer if non-null, use this Mixer; otherwise use AudioSystem
* @return a suitable native audio format
*/
public static AudioFormat getNativeAudioFormat(AudioFormat format,
Mixer mixer) {
Line.Info[] lineInfos;
if (mixer != null) {
lineInfos = mixer.getTargetLineInfo
(new Line.Info(TargetDataLine.class));
} else {
lineInfos = AudioSystem.getTargetLineInfo
(new Line.Info(TargetDataLine.class));
}
AudioFormat nativeFormat = null;
// find a usable target line
for (Line.Info info : lineInfos) {
AudioFormat[] formats = ((TargetDataLine.Info)info).getFormats();
for (AudioFormat thisFormat : formats) {
// for now, just accept downsampling, not checking frame
// size/rate (encoding assumed to be PCM)
if (thisFormat.getEncoding() == format.getEncoding()
&& thisFormat.isBigEndian() == format.isBigEndian()
&& thisFormat.getSampleSizeInBits() ==
format.getSampleSizeInBits()
&& thisFormat.getSampleRate() >= format.getSampleRate()) {
nativeFormat = thisFormat;
break;
}
}
if (nativeFormat != null) {
//no need to look through remaining lineinfos
break;
}
}
return nativeFormat;
}
/**
* Converts DoubleData object to FloatDatas.
* @param data data to convert
* @return converted data
*/
public static DoubleData FloatData2DoubleData(FloatData data) {
int numSamples = data.getValues().length;
double[] doubleData = new double[numSamples];
float[] values = data.getValues();
for (int i = 0; i < values.length; i++) {
doubleData[i] = values[i];
}
return new DoubleData(doubleData, data.getSampleRate(), data.getFirstSampleNumber());
}
/**
* Converts FloatData object to DoubleData.
* @param data data to convert
* @return converted data
*/
public static FloatData DoubleData2FloatData(DoubleData data) {
int numSamples = data.getValues().length;
float[] floatData = new float[numSamples];
double[] values = data.getValues();
for (int i = 0; i < values.length; i++) {
floatData[i] = (float) values[i];
}
return new FloatData(floatData, data.getSampleRate(), data.getFirstSampleNumber());
}
}