/******************************************************************************
* *
* Copyright (c) 1999-2003 Wimba S.A., All Rights Reserved. *
* *
* COPYRIGHT: *
* This software is the property of Wimba S.A. *
* This software is redistributed under the Xiph.org variant of *
* the BSD license. *
* Redistribution and use in source and binary forms, with or without *
* modification, are permitted provided that the following conditions *
* are met: *
* - Redistributions of source code must retain the above copyright *
* notice, this list of conditions and the following disclaimer. *
* - Redistributions in binary form must reproduce the above copyright *
* notice, this list of conditions and the following disclaimer in the *
* documentation and/or other materials provided with the distribution. *
* - Neither the name of Wimba, the Xiph.org Foundation nor the names of *
* its contributors may be used to endorse or promote products derived *
* from this software without specific prior written permission. *
* *
* WARRANTIES: *
* This software is made available by the authors in the hope *
* that it will be useful, but without any warranty. *
* Wimba S.A. is not liable for any consequence related to the *
* use of the provided software. *
* *
* Class: Pcm2SpeexAudioInputStream.java *
* *
* Author: Marc GIMPEL *
* *
* Date: 12th July 2003 *
* *
******************************************************************************/
/* $Id: Pcm2SpeexAudioInputStream.java,v 1.2 2004/10/21 16:21:58 mgimpel Exp $ */
package org.xiph.speex.spi;
import java.util.Random;
import java.io.IOException;
import java.io.InputStream;
import java.io.StreamCorruptedException;
import javax.sound.sampled.AudioFormat;
import org.xiph.speex.AudioFileWriter;
import org.xiph.speex.OggCrc;
import org.xiph.speex.Encoder;
import org.xiph.speex.SpeexEncoder;
/**
* Converts a PCM 16bits/sample mono audio stream to Ogg Speex
*
* @author Marc Gimpel, Wimba S.A. (mgimpel@horizonwimba.com)
* @version $Revision: 1.2 $
*/
public class Pcm2SpeexAudioInputStream
extends FilteredAudioInputStream
{
/** The default size of the buffer (UWB stereo requires at least 2560b). */
public static final int DEFAULT_BUFFER_SIZE = 2560;
// public static final boolean DEFAULT_VBR = true;
/** The default sample rate if none is given in the constructor. */
public static final int DEFAULT_SAMPLERATE = 8000;
/** The default number of channels if none is given in the constructor. */
public static final int DEFAULT_CHANNELS = 1;
/** The default quality setting for the Speex encoder. */
public static final int DEFAULT_QUALITY = 3;
/** The default number of Speex frames that will be put in each Ogg packet. */
public static final int DEFAULT_FRAMES_PER_PACKET = 1;
/** The default number of Ogg packets that will be put in each Ogg page. */
public static final int DEFAULT_PACKETS_PER_OGG_PAGE = 20; // .4s of audio
/** Indicates the value is unknown or undetermined. */
public static final int UNKNOWN = -1;
// Speex variables
/** The Speex Encoder class. */
private SpeexEncoder encoder;
/** The encoder mode (0=NB, 1=WB, 2=UWB). */
private int mode;
/** The size in bytes of PCM data that will be encoded into 1 Speex frame. */
private int frameSize;
/** The number of Speex frames that will be put in each Ogg packet. */
private int framesPerPacket;
/** The number of audio channels (1=mono, 2=stereo). */
private int channels;
// Ogg variables
/** The comment String that will appear in the Ogg comment packet. */
private String comment = null;
/** A counter for the number of PCM samples that have been encoded. */
private int granulepos;
/** A unique serial number that identifies the Ogg stream. */
private int streamSerialNumber;
/** The number of Ogg packets that will be put in each Ogg page. */
private int packetsPerOggPage;
/** The number of Ogg packets that have been encoded in the current page. */
private int packetCount;
/** The number of Ogg pages that have been written to the stream. */
private int pageCount;
/** Pointer in the buffer to the point where Ogg data is added. */
private int oggCount;
/** Flag to indicate if this is the first time a encode method is called. */
private boolean first;
/**
* Constructor
* @param in the underlying input stream.
* @param format the target format of this stream's audio data.
* @param length the length in sample frames of the data in this stream.
*/
public Pcm2SpeexAudioInputStream(final InputStream in,
final AudioFormat format,
final long length)
{
this(UNKNOWN, UNKNOWN, in, format, length, DEFAULT_BUFFER_SIZE);
}
/**
* Constructor
* @param mode the mode of the encoder (0=NB, 1=WB, 2=UWB).
* @param quality the quality setting of the encoder (between 0 and 10).
* @param in the underlying input stream.
* @param format the target format of this stream's audio data.
* @param length the length in sample frames of the data in this stream.
*/
public Pcm2SpeexAudioInputStream(final int mode,
final int quality,
final InputStream in,
final AudioFormat format,
final long length)
{
this(mode, quality, in, format, length, DEFAULT_BUFFER_SIZE);
}
/**
* Constructor
* @param in the underlying input stream.
* @param format the target format of this stream's audio data.
* @param length the length in sample frames of the data in this stream.
* @param size the buffer size.
* @exception IllegalArgumentException if size <= 0.
*/
public Pcm2SpeexAudioInputStream(final InputStream in,
final AudioFormat format,
final long length,
final int size)
{
this(UNKNOWN, UNKNOWN, in, format, length, size);
}
/**
* Constructor
* @param mode the mode of the encoder (0=NB, 1=WB, 2=UWB).
* @param quality the quality setting of the encoder (between 0 and 10).
* @param in the underlying input stream.
* @param format the target format of this stream's audio data.
* @param length the length in sample frames of the data in this stream.
* @param size the buffer size.
* @exception IllegalArgumentException if size <= 0.
*/
public Pcm2SpeexAudioInputStream(int mode,
int quality,
final InputStream in,
final AudioFormat format,
final long length,
final int size)
{
super(in, format, length, size);
// Ogg initialisation
granulepos = 0;
if (streamSerialNumber == 0)
streamSerialNumber = new Random().nextInt();
packetsPerOggPage = DEFAULT_PACKETS_PER_OGG_PAGE;
packetCount = 0;
pageCount = 0;
// Speex initialisation
framesPerPacket = DEFAULT_FRAMES_PER_PACKET;
int samplerate = (int) format.getSampleRate();
if (samplerate < 0)
samplerate = DEFAULT_SAMPLERATE;
channels = format.getChannels();
if (channels < 0)
channels = DEFAULT_CHANNELS;
if (mode < 0)
mode = (samplerate < 12000) ? 0 : ((samplerate < 24000) ? 1 : 2);
this.mode = mode;
AudioFormat.Encoding encoding = format.getEncoding();
if (quality < 0) {
if (encoding instanceof SpeexEncoding) {
quality = ((SpeexEncoding) encoding).getQuality();
}
else {
quality = DEFAULT_QUALITY;
}
}
encoder = new SpeexEncoder();
encoder.init(mode, quality, samplerate, channels);
if (encoding instanceof SpeexEncoding &&
((SpeexEncoding) encoding).isVBR()) {
setVbr(true);
}
else {
setVbr(false);
}
frameSize = 2 * channels * encoder.getFrameSize();
// Misc initialsation
comment = "Encoded with " + SpeexEncoder.VERSION;
first = true;
}
/**
* Sets the Stream Serial Number.
* Must not be changed mid stream.
* @param serialNumber
*/
public void setSerialNumber(final int serialNumber)
{
if (first) {
this.streamSerialNumber = serialNumber;
}
}
/**
* Sets the number of Audio Frames that are to be put in every Speex Packet.
* An Audio Frame contains 160 samples for narrowband, 320 samples for
* wideband and 640 samples for ultra-wideband.
* @param framesPerPacket
* @see #DEFAULT_FRAMES_PER_PACKET
*/
public void setFramesPerPacket(int framesPerPacket)
{
if (framesPerPacket <= 0) {
framesPerPacket = DEFAULT_FRAMES_PER_PACKET;
}
this.framesPerPacket = framesPerPacket;
}
/**
* Sets the number of Speex Packets that are to be put in every Ogg Page.
* This value must be less than 256 as the value is encoded in 1 byte in the
* Ogg Header (just before the array of packet sizes)
* @param packetsPerOggPage
* @see #DEFAULT_PACKETS_PER_OGG_PAGE
*/
public void setPacketsPerOggPage(int packetsPerOggPage)
{
if (packetsPerOggPage <= 0) {
packetsPerOggPage = DEFAULT_PACKETS_PER_OGG_PAGE;
}
if (packetsPerOggPage > 255) {
packetsPerOggPage = 255;
}
this.packetsPerOggPage = packetsPerOggPage;
}
/**
* Sets the comment for the Ogg Comment Header.
* @param comment
* @param appendVersion
*/
public void setComment(final String comment,
final boolean appendVersion)
{
this.comment = comment;
if (appendVersion) {
this.comment += SpeexEncoder.VERSION;
}
}
/**
* Sets the Speex encoder Quality.
* @param quality
*/
public void setQuality(final int quality)
{
encoder.getEncoder().setQuality(quality);
if (encoder.getEncoder().getVbr()) {
encoder.getEncoder().setVbrQuality((float)quality);
}
}
/**
* Sets whether of not the encoder is to use VBR.
* @param vbr
*/
public void setVbr(final boolean vbr)
{
encoder.getEncoder().setVbr(vbr);
}
/**
* Returns the Encoder.
* @return the Encoder.
*/
public Encoder getEncoder()
{
return encoder.getEncoder();
}
/**
* Fills the buffer with more data, taking into account
* shuffling and other tricks for dealing with marks.
* Assumes that it is being called by a synchronized method.
* This method also assumes that all data has already been read in,
* hence pos > count.
* @exception IOException
*/
protected void fill()
throws IOException
{
makeSpace();
if (first) {
writeHeaderFrames();
first = false;
}
while (true) {
if ((prebuf.length - prepos) < framesPerPacket*frameSize*packetsPerOggPage) { // grow prebuf
int nsz = prepos + framesPerPacket*frameSize*packetsPerOggPage;
byte[] nbuf = new byte[nsz];
System.arraycopy(prebuf, 0, nbuf, 0, precount);
prebuf = nbuf;
}
int read = in.read(prebuf, precount, prebuf.length - precount);
if (read < 0) { // inputstream has ended
if ((precount-prepos) % 2 != 0) { // we don't have a complete last PCM sample
throw new StreamCorruptedException("Incompleted last PCM sample when stream ended");
}
while (prepos < precount) { // still data to encode
if ((precount - prepos) < framesPerPacket*frameSize) {
// fill end of frame with zeros
for (;precount < (prepos+framesPerPacket*frameSize); precount++) {
prebuf[precount] = 0;
}
}
if (packetCount == 0) {
writeOggPageHeader(packetsPerOggPage, 0);
}
for (int i=0; i<framesPerPacket; i++) {
encoder.processData(prebuf, prepos, frameSize);
prepos += frameSize;
}
int size = encoder.getProcessedDataByteSize();
while ((buf.length - oggCount) < size) { // grow buffer
int nsz = buf.length * 2;
byte[] nbuf = new byte[nsz];
System.arraycopy(buf, 0, nbuf, 0, oggCount);
buf = nbuf;
}
buf[count + 27 + packetCount] = (byte)(0xff & size);
encoder.getProcessedData(buf, oggCount);
oggCount += size;
packetCount++;
if (packetCount >= packetsPerOggPage) {
writeOggPageChecksum();
return;
}
}
if (packetCount > 0) {
// we have less than the normal number of packets in this page.
buf[count+5] = (byte)(0xff & 4); // set page header type to end of stream
buf[count+26] = (byte)(0xff & packetCount);
System.arraycopy(buf, count+27+packetsPerOggPage,
buf, count+27+packetCount,
oggCount-(count+27+packetsPerOggPage));
oggCount -= packetsPerOggPage-packetCount;
writeOggPageChecksum();
}
return;
}
else if (read > 0) {
precount += read;
if ((precount - prepos) >= framesPerPacket*frameSize*packetsPerOggPage) { // enough data to encode frame
while ((precount - prepos) >= framesPerPacket*frameSize*packetsPerOggPage) { // lets encode all we can
if (packetCount == 0) {
writeOggPageHeader(packetsPerOggPage, 0);
}
while (packetCount < packetsPerOggPage) {
for (int i=0; i<framesPerPacket; i++) {
encoder.processData(prebuf, prepos, frameSize);
prepos += frameSize;
}
int size = encoder.getProcessedDataByteSize();
while ((buf.length - oggCount) < size) { // grow buffer
int nsz = buf.length * 2;
byte[] nbuf = new byte[nsz];
System.arraycopy(buf, 0, nbuf, 0, oggCount);
buf = nbuf;
}
buf[count + 27 + packetCount] = (byte)(0xff & size);
encoder.getProcessedData(buf, oggCount);
oggCount += size;
packetCount++;
}
if (packetCount >= packetsPerOggPage) {
writeOggPageChecksum();
}
}
System.arraycopy(prebuf, prepos, prebuf, 0, precount-prepos);
precount -= prepos;
prepos = 0;
// we have encoded some data (all that we could),
// so we can leave now, otherwise we return to a potentially
// blocking read of the underlying inputstream.
return;
}
}
else { // read == 0
// read 0 bytes from underlying stream yet it is not finished.
if (precount >= prebuf.length) {
// no more room in buffer
if (prepos > 0) {
// free some space
System.arraycopy(prebuf, prepos, prebuf, 0, precount-prepos);
precount -= prepos;
prepos = 0;
}
else {
// we could grow the pre-buffer but that risks in turn growing the
// buffer which could lead sooner or later to an
// OutOfMemoryException.
return;
}
}
else {
return;
}
}
}
}
/**
* Returns the number of bytes that can be read from this inputstream without
* blocking.
* <p>
* The <code>available</code> method of <code>FilteredAudioInputStream</code>
* returns the sum of the the number of bytes remaining to be read in the
* buffer (<code>count - pos</code>) and the result of calling the
* <code>available</code> method of the underlying inputstream.
*
* @return the number of bytes that can be read from this inputstream without
* blocking.
* @exception IOException if an I/O error occurs.
* @see #in
*/
public synchronized int available()
throws IOException
{
int avail = super.available();
int unencoded = precount - prepos + in.available();
if (encoder.getEncoder().getVbr()) {
switch (mode) {
case 0: // Narrowband
// ogg header size = 27 + packetsPerOggPage
// count 1 byte (min 5 bits) for each block available
return avail + (27 + 2 * packetsPerOggPage) *
(unencoded / (packetsPerOggPage*framesPerPacket*320));
case 1: // Wideband
// ogg header size = 27 + packetsPerOggPage
// count 2 byte (min 9 bits) for each block available
return avail + (27 + 2 * packetsPerOggPage) *
(unencoded / (packetsPerOggPage*framesPerPacket*640));
case 2: // Ultra wideband
// ogg header size = 27 + packetsPerOggPage
// count 2 byte (min 13 bits) for each block available
return avail + (27 + 3 * packetsPerOggPage) *
(unencoded / (packetsPerOggPage*framesPerPacket*1280));
default:
return avail;
}
}
else {
// Calculate size of a packet of Speex data.
int spxpacketsize = encoder.getEncoder().getEncodedFrameSize();
if (channels > 1) {
spxpacketsize += 17; // 1+4(14=inband)+4(9=stereo)+8(stereo data)
}
spxpacketsize *= framesPerPacket;
spxpacketsize = (spxpacketsize + 7) >> 3; // convert bits to bytes
// Calculate size of an Ogg packet containing X Speex packets.
// Ogg Packet = Ogg header + size of each packet + Ogg packets
int oggpacketsize = 27 + packetsPerOggPage * (spxpacketsize + 1);
int pcmframesize; // size of PCM data necessary to encode 1 Speex packet.
switch (mode) {
case 0: // Narrowband
// 1 frame = 20ms = 160ech * channels = 320bytes * channels
pcmframesize = framesPerPacket * 320 * encoder.getChannels();
avail += oggpacketsize *
(unencoded / (packetsPerOggPage * pcmframesize));
return avail;
case 1: // Wideband
// 1 frame = 20ms = 320ech * channels = 640bytes * channels
pcmframesize = framesPerPacket * 640 * encoder.getChannels();
avail += oggpacketsize *
(unencoded / (packetsPerOggPage * pcmframesize));
return avail;
case 2: // Ultra wideband
// 1 frame = 20ms = 640ech * channels = 1280bytes * channels
pcmframesize = framesPerPacket * 1280 * encoder.getChannels();
avail += oggpacketsize *
(unencoded / (packetsPerOggPage * pcmframesize));
return avail;
default:
return avail;
}
}
}
//---------------------------------------------------------------------------
// Ogg Specific Code
//---------------------------------------------------------------------------
/**
* Write an OGG page header.
* @param packets - the number of packets in the Ogg Page (must be between 1 and 255)
* @param headertype - 2=bos: beginning of sream, 4=eos: end of sream
*/
private void writeOggPageHeader(final int packets,
final int headertype)
{
while ((buf.length - count) < (27 + packets)) { // grow buffer
int nsz = buf.length * 2;
byte[] nbuf = new byte[nsz];
System.arraycopy(buf, 0, nbuf, 0, count);
buf = nbuf;
}
AudioFileWriter.writeOggPageHeader(buf, count, headertype, granulepos,
streamSerialNumber, pageCount++,
packets, new byte[packets]);
oggCount = count + 27 + packets;
}
/**
* Calculate and write the OGG page checksum. This now closes the Ogg page.
*/
private void writeOggPageChecksum()
{
// write the granulpos
granulepos += framesPerPacket * frameSize * packetCount / 2;
AudioFileWriter.writeLong(buf, count+6, granulepos);
// write the checksum
int chksum = OggCrc.checksum(0, buf, count, oggCount-count);
AudioFileWriter.writeInt(buf, count+22, chksum);
// reset variables for a new page.
count = oggCount;
packetCount = 0;
}
/**
* Write the OGG Speex header then the comment header.
*/
private void writeHeaderFrames()
{
int length = comment.length();
if (length > 247) {
comment = comment.substring(0, 247);
length = 247;
}
while ((buf.length - count) < length + 144) {
// grow buffer (108 = 28 + 80 = size of Ogg Header Frame)
// (28 + length + 8 = size of Comment Frame)
int nsz = buf.length * 2;
byte[] nbuf = new byte[nsz];
System.arraycopy(buf, 0, nbuf, 0, count);
buf = nbuf;
}
// writes the OGG header page
AudioFileWriter.writeOggPageHeader(buf, count, 2, granulepos,
streamSerialNumber, pageCount++,
1, new byte[] {80});
oggCount = count + 28;
/* writes the Speex header */
AudioFileWriter.writeSpeexHeader(buf, oggCount, encoder.getSampleRate(),
mode, encoder.getChannels(),
encoder.getEncoder().getVbr(),
framesPerPacket);
oggCount += 80;
/* Calculate Checksum */
int chksum = OggCrc.checksum(0, buf, count, oggCount-count);
AudioFileWriter.writeInt(buf, count+22, chksum);
count = oggCount;
// writes the OGG header page
AudioFileWriter.writeOggPageHeader(buf, count, 0, granulepos,
streamSerialNumber, pageCount++,
1, new byte[] {(byte)(length+8)});
oggCount = count + 28;
/* writes the OGG comment page */
AudioFileWriter.writeSpeexComment(buf, oggCount, comment);
oggCount += length+8;
/* Calculate Checksum */
chksum = OggCrc.checksum(0, buf, count, oggCount-count);
AudioFileWriter.writeInt(buf, count+22, chksum);
count = oggCount;
// reset variables for a new page.
packetCount = 0;
}
}