/* * Copyright 1999-2004 Carnegie Mellon University. * Portions Copyright 2002-2004 Sun Microsystems, Inc. * Portions Copyright 2002-2004 Mitsubishi Electric Research Laboratories. * All Rights Reserved. Use is subject to license terms. * * See the file "license.terms" for information on usage and * redistribution of this file, and for a DISCLAIMER OF ALL * WARRANTIES. * */ package edu.cmu.sphinx.tools.audio; import javax.sound.sampled.AudioFormat; import java.io.IOException; import java.io.InputStream; import java.util.ArrayList; /** * Reads a raw audio file (i.e., a file that is just plain raw samples and nothing else) and converts it to signed * data. */ public class RawReader { /** * Reads raw bytes from the given audioStream and returns samples based on the audioFormat. * * @param audioStream the stream containing the raw bytes * @param audioFormat a hint of what to expect from the stream * @return samples, one sample per array element * @throws IOException if IO went wrong */ public static short[] readAudioData(InputStream audioStream, AudioFormat audioFormat) throws IOException { /* Initialize ourselves based on the input data format. */ int bytesPerSample; boolean signedData = true; boolean bigEndian; AudioFormat.Encoding encoding = audioFormat.getEncoding(); bytesPerSample = audioFormat.getSampleSizeInBits() / 8; if (encoding == AudioFormat.Encoding.PCM_SIGNED) { signedData = true; } else if (encoding == AudioFormat.Encoding.PCM_UNSIGNED) { signedData = false; } else { System.err.println("Unsupported audio encoding: " + encoding); System.exit(-1); } bigEndian = audioFormat.isBigEndian(); /* Now read in the data, saving the samples in an array list. * Along the way, convert each sample to little endian signed * data. */ byte[] buffer = new byte[bytesPerSample]; ArrayList<Short> samples = new ArrayList<Short>(); int read = 0; int totalRead = 0; boolean done = false; while (!done) { totalRead = read = audioStream.read(buffer, 0, bytesPerSample); while (totalRead < bytesPerSample) { if (read == -1) { done = true; break; } else { read = audioStream.read(buffer, totalRead, bytesPerSample - totalRead); totalRead += read; } } if (!done) { int val = 0; if (bigEndian) { val = buffer[0]; if (!signedData) { val &= 0xff; } for (int i = 1; i < bytesPerSample; i++) { int temp = buffer[i] & 0xff; val = (val << 8) + temp; } } else { val = buffer[bytesPerSample - 1]; if (!signedData) { val &= 0xff; } for (int i = bytesPerSample - 2; i >= 0; i--) { int temp = buffer[i] & 0xff; val = (val << 8) + temp; } } /* We'll always give signed data. So, if the input * is unsigned, convert it to signed. */ if (!signedData) { val = val - (1 << ((bytesPerSample * 8) - 1)); } samples.add((short) val); } } /* Convert the array list to an array of shorts and return. */ short[] audioData = new short[samples.size()]; for (int i = 0; i < audioData.length; i++) { audioData[i] = samples.get(i); } return audioData; } }