/****************************************************************************** * * * Copyright (c) 1999-2003 Wimba S.A., All Rights Reserved. * * * * COPYRIGHT: * * This software is the property of Wimba S.A. * * This software is redistributed under the Xiph.org variant of * * the BSD license. * * Redistribution and use in source and binary forms, with or without * * modification, are permitted provided that the following conditions * * are met: * * - Redistributions of source code must retain the above copyright * * notice, this list of conditions and the following disclaimer. * * - Redistributions in binary form must reproduce the above copyright * * notice, this list of conditions and the following disclaimer in the * * documentation and/or other materials provided with the distribution. * * - Neither the name of Wimba, the Xiph.org Foundation nor the names of * * its contributors may be used to endorse or promote products derived * * from this software without specific prior written permission. * * * * WARRANTIES: * * This software is made available by the authors in the hope * * that it will be useful, but without any warranty. * * Wimba S.A. is not liable for any consequence related to the * * use of the provided software. * * * * Class: NbCodec.java * * * * Author: Marc GIMPEL * * Based on code by: Jean-Marc VALIN * * * * Date: 14th July 2003 * * * ******************************************************************************/ /* $Id: NbCodec.java,v 1.2 2004/10/21 16:21:57 mgimpel Exp $ */ package org.xiph.speex; /** * Narrowband Codec. * This class contains all the basic structures needed by the Narrowband * encoder and decoder. * * @author Marc Gimpel, Wimba S.A. (mgimpel@horizonwimba.com) * @version $Revision: 1.2 $ */ public class NbCodec implements Codebook { //--------------------------------------------------------------------------- // Constants //--------------------------------------------------------------------------- /** Very small initial value for some of the buffers. */ public static final float VERY_SMALL = (float) 0e-30; /** The Narrowband Frame Size gives the size in bits of a Narrowband frame for a given narrowband submode. */ public static final int[] NB_FRAME_SIZE = {5, 43, 119, 160, 220, 300, 364, 492, 79, 1, 1, 1, 1, 1, 1, 1}; /** The Narrowband Submodes gives the number of submodes possible for the Narrowband codec. */ public static final int NB_SUBMODES = 16; /** The Narrowband Submodes Bits gives the number bits used to encode the Narrowband Submode*/ public static final int NB_SUBMODE_BITS = 4; /** */ public static final float[] exc_gain_quant_scal1 = {-0.35f, 0.05f}; /** */ public static final float[] exc_gain_quant_scal3 = {-2.794750f, -1.810660f, -1.169850f, -0.848119f, -0.587190f, -0.329818f, -0.063266f, 0.282826f}; //--------------------------------------------------------------------------- // Tools //--------------------------------------------------------------------------- /** */ protected Lsp m_lsp; /** */ protected Filters filters; //--------------------------------------------------------------------------- // Parameters //--------------------------------------------------------------------------- protected SubMode[] submodes; /** Sub-mode data */ protected int submodeID; /** Activated sub-mode */ protected int first; /** Is this the first frame? */ protected int frameSize; /** Size of frames */ protected int subframeSize; /** Size of sub-frames */ protected int nbSubframes; /** Number of sub-frames */ protected int windowSize; /** Analysis (LPC) window length */ protected int lpcSize; /** LPC order */ protected int bufSize; /** Buffer size */ protected int min_pitch; /** Minimum pitch value allowed */ protected int max_pitch; /** Maximum pitch value allowed */ protected float gamma1; /** Perceptual filter: A(z/gamma1) */ protected float gamma2; /** Perceptual filter: A(z/gamma2) */ protected float lag_factor; /** Lag windowing Gaussian width */ protected float lpc_floor; /** Noise floor multiplier for A[0] in LPC analysis*/ protected float preemph; /** Pre-emphasis: P(z) = 1 - a*z^-1*/ protected float pre_mem; /** 1-element memory for pre-emphasis */ //--------------------------------------------------------------------------- // Variables //--------------------------------------------------------------------------- protected float[] frmBuf; /** Input buffer (original signal) */ protected int frmIdx; protected float[] excBuf; /** Excitation buffer */ protected int excIdx; /** Start of excitation frame */ protected float[] innov; /** Innovation for the frame */ protected float[] lpc; /** LPCs for current frame */ protected float[] qlsp; /** Quantized LSPs for current frame */ protected float[] old_qlsp; /** Quantized LSPs for previous frame */ protected float[] interp_qlsp; /** Interpolated quantized LSPs */ protected float[] interp_qlpc; /** Interpolated quantized LPCs */ protected float[] mem_sp; /** Filter memory for synthesis signal */ protected float[] pi_gain; /** Gain of LPC filter at theta=pi (fe/2) */ protected float[] awk1, awk2, awk3; // Vocoder data protected float voc_m1; protected float voc_m2; protected float voc_mean; protected int voc_offset; protected int dtx_enabled; /** 1 for enabling DTX, 0 otherwise */ /** * Constructor. */ public NbCodec() { m_lsp = new Lsp(); filters = new Filters(); } /** * Narrowband initialisation. */ public void nbinit() { // Initialize SubModes submodes = buildNbSubModes(); submodeID = 5; // Initialize narrwoband parameters and variables init(160, 40, 10, 640); } /** * Initialisation. * @param frameSize * @param subframeSize * @param lpcSize * @param bufSize */ protected void init(final int frameSize, final int subframeSize, final int lpcSize, final int bufSize) { first = 1; // Codec parameters, should eventually have several "modes" this.frameSize = frameSize; this.windowSize = frameSize*3/2; this.subframeSize = subframeSize; this.nbSubframes = frameSize/subframeSize; this.lpcSize = lpcSize; this.bufSize = bufSize; min_pitch = 17; max_pitch = 144; preemph = 0.0f; pre_mem = 0.0f; gamma1 = 0.9f; gamma2 = 0.6f; lag_factor = .01f; lpc_floor = 1.0001f; frmBuf = new float[bufSize]; frmIdx = bufSize - windowSize; excBuf = new float[bufSize]; excIdx = bufSize - windowSize; innov = new float[frameSize]; lpc = new float[lpcSize+1]; qlsp = new float[lpcSize]; old_qlsp = new float[lpcSize]; interp_qlsp = new float[lpcSize]; interp_qlpc = new float[lpcSize+1]; mem_sp = new float[5*lpcSize]; // TODO - check why 5 (why not 2 or 1) pi_gain = new float[nbSubframes]; awk1 = new float[lpcSize+1]; awk2 = new float[lpcSize+1]; awk3 = new float[lpcSize+1]; voc_m1 = voc_m2 = voc_mean = 0; voc_offset = 0; dtx_enabled = 0; // disabled by default } /** * Build narrowband submodes */ private static SubMode[] buildNbSubModes() { /* Initialize Long Term Predictions */ Ltp3Tap ltpNb = new Ltp3Tap(gain_cdbk_nb, 7, 7); Ltp3Tap ltpVlbr = new Ltp3Tap(gain_cdbk_lbr, 5, 0); Ltp3Tap ltpLbr = new Ltp3Tap(gain_cdbk_lbr, 5, 7); Ltp3Tap ltpMed = new Ltp3Tap(gain_cdbk_lbr, 5, 7); LtpForcedPitch ltpFP = new LtpForcedPitch(); /* Initialize Codebook Searches */ NoiseSearch noiseSearch = new NoiseSearch(); SplitShapeSearch ssNbVlbrSearch = new SplitShapeSearch(40, 10, 4, exc_10_16_table, 4, 0); SplitShapeSearch ssNbLbrSearch = new SplitShapeSearch(40, 10, 4, exc_10_32_table, 5, 0); SplitShapeSearch ssNbSearch = new SplitShapeSearch(40, 5, 8, exc_5_64_table, 6, 0); SplitShapeSearch ssNbMedSearch = new SplitShapeSearch(40, 8, 5, exc_8_128_table, 7, 0); SplitShapeSearch ssSbSearch = new SplitShapeSearch(40, 5, 8, exc_5_256_table, 8, 0); SplitShapeSearch ssNbUlbrSearch = new SplitShapeSearch(40, 20, 2, exc_20_32_table, 5, 0); /* Initialize Line Spectral Pair Quantizers */ NbLspQuant nbLspQuant = new NbLspQuant(); LbrLspQuant lbrLspQuant = new LbrLspQuant(); /* Initialize narrow-band modes */ SubMode[] nbSubModes = new SubMode[NB_SUBMODES]; /* 2150 bps "vocoder-like" mode for comfort noise */ nbSubModes[1] = new SubMode(0, 1, 0, 0, lbrLspQuant, ltpFP, noiseSearch, .7f, .7f, -1, 43); /* 5.95 kbps very low bit-rate mode */ nbSubModes[2] = new SubMode(0, 0, 0, 0, lbrLspQuant, ltpVlbr, ssNbVlbrSearch, 0.7f, 0.5f, .55f, 119); /* 8 kbps low bit-rate mode */ nbSubModes[3] = new SubMode(-1, 0, 1, 0, lbrLspQuant, ltpLbr, ssNbLbrSearch, 0.7f, 0.55f, .45f, 160); /* 11 kbps medium bit-rate mode */ nbSubModes[4] = new SubMode(-1, 0, 1, 0, lbrLspQuant, ltpMed, ssNbMedSearch, 0.7f, 0.63f, .35f, 220); /* 15 kbps high bit-rate mode */ nbSubModes[5] = new SubMode(-1, 0, 3, 0, nbLspQuant, ltpNb, ssNbSearch, 0.7f, 0.65f, .25f, 300); /* 18.2 high bit-rate mode */ nbSubModes[6] = new SubMode(-1, 0, 3, 0, nbLspQuant, ltpNb, ssSbSearch, 0.68f, 0.65f, .1f, 364); /* 24.6 kbps high bit-rate mode */ nbSubModes[7] = new SubMode(-1, 0, 3, 1, nbLspQuant, ltpNb, ssNbSearch, 0.65f, 0.65f, -1, 492); /* 3.95 kbps very low bit-rate mode */ nbSubModes[8] = new SubMode(0, 1, 0, 0, lbrLspQuant, ltpFP, ssNbUlbrSearch, .7f, .5f, .65f, 79); /* Return the Narrowband SubModes*/ return nbSubModes; } /** * Returns the size of a frame (ex: 160 samples for a narrowband frame, * 320 for wideband and 640 for ultra-wideband). * @return the size of a frame (number of audio samples in a frame). */ public int getFrameSize() { return frameSize; } /** * Returns whether or not we are using Discontinuous Transmission encoding. * @return whether or not we are using Discontinuous Transmission encoding. */ public boolean getDtx() { return dtx_enabled != 0; } /** * Returns the Pitch Gain array. * @return the Pitch Gain array. */ public float[] getPiGain() { return pi_gain; } /** * Returns the excitation array. * @return the excitation array. */ public float[] getExc() { float[] excTmp = new float[frameSize]; System.arraycopy(excBuf, excIdx, excTmp, 0, frameSize); return excTmp; } /** * Returns the innovation array. * @return the innovation array. */ public float[] getInnov() { return innov; } }