SpeechDetector.java example

Explorer
Openfire-master
- dbutil
  - src
    - main
      - java
        org
        jivesoftware
        database
        AbstractConnection.java
        CallableStatementWrapper.java
        PreparedStatementWrapper.java
        ProfiledConnection.java
        ProfiledConnectionEntry.java
        StatementWrapper.java
- src
/*
 * Copyright 2007 Sun Microsystems, Inc.
 *
 * This file is part of jVoiceBridge.
 *
 * jVoiceBridge is free software: you can redistribute it and/or modify 
 * it under the terms of the GNU General Public License version 2 as 
 * published by the Free Software Foundation and distributed hereunder 
 * to you.
 *
 * jVoiceBridge is distributed in the hope that it will be useful, 
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 *
 * Sun designates this particular file as subject to the "Classpath"
 * exception as provided by Sun in the License file that accompanied this 
 * code. 
 */

package com.sun.voip;

import java.io.IOException;

public class SpeechDetector {
    private boolean speakingChanged = false;
    private boolean isSpeaking  = false;

    static final int POW_THRESH = 50000; // initial power threshold

    static int cnThresh = 50;  // # of avgs to test speaking (1/10 sec at 8k hz)

    static double powerThresholdLimit = 1.05f;

    static int onThresh    = 1;
    static int offThresh   = 4;

    double powthresh = POW_THRESH;
    int oncount     = 0;
    int offcount    = 0;
    double sum       = 0;
    double cnt       = 0;

    int speechDetectorCalls;
    long speechDetectorTime;

    String id;
    MediaInfo mediaInfo;

    public SpeechDetector(String id, MediaInfo mediaInfo) {
	this.id = id;
        this.mediaInfo = mediaInfo;

	cnThresh = mediaInfo.getSampleRate() / 8000 * 50;

	if (mediaInfo.getChannels() == 2) {
	    cnThresh *= 2;
	}
    }

    public static void setCnThresh(int cnThresh) {
	SpeechDetector.cnThresh = cnThresh;

	if (Logger.logLevel >= Logger.LOG_MOREINFO) {
	    Logger.println("cnThresh set to " + cnThresh);
	}
    }

    public static int getCnThresh() {
	return cnThresh;
    }

    public static void setPowerThresholdLimit(double powerThresholdLimit) {
	SpeechDetector.powerThresholdLimit = powerThresholdLimit;

	if (Logger.logLevel >= Logger.LOG_MOREINFO) {
	    Logger.println("powerThresholdLimit set to " + powerThresholdLimit);
	}
    }

    public static double getPowerThresholdLimit() {
	return powerThresholdLimit;
    }

    public static void setOnThresh(int onThresh) {
	SpeechDetector.onThresh = onThresh;
	if (Logger.logLevel >= Logger.LOG_MOREINFO) {
	    Logger.println("onThresh set to " + onThresh);
	}
    }

    public static int getOnThresh() {
	return onThresh;
    }

    public static void setOffThresh(int offThresh) {
        SpeechDetector.offThresh = offThresh;
	if (Logger.logLevel >= Logger.LOG_MOREINFO) {
	    Logger.println("offThresh set to " + offThresh);
	}
    }   

    public static int getOffThresh() {
        return offThresh;
    }

    public boolean reset() {
	sum = 0;
	cnt = 0;
	powthresh = POW_THRESH;

	boolean oldIsSpeaking = isSpeaking;

	speakingChanged = false;
	isSpeaking = false;

	return oldIsSpeaking;
    }

    /*
     * linearData contains 16-bit linear data in a byte array.
     * Returns true if speaking started or stopped.
     */ 
    public boolean processData(byte[] linearData) {
	speechDetectorCalls++;
        long start = CurrentTime.getTime();

	/*
	 * Round down to 16 byte boundary in case length isn't
	 * a multiple of 16.
	 */
	int length = (linearData.length / 16) * 16;

	long sq = 0;

	for (int i = 0;  i <= length - 16; i += 16) {  
	    /* 
	     * average next 8 samples (MSB only), square result, 
	     * add to running avg
	     */
	    double avg = (double)
	    	((linearData[i + 0]
	        + linearData[i + 2]
	        + linearData[i + 4]
	        + linearData[i + 6]
	        + linearData[i + 8]
	        + linearData[i + 10]
	        + linearData[i + 12]
	        + linearData[i + 14]) / 8.);

	    /*
	     * Divide by the number of channels.  For stereo we're
	     * likely to be getting similar sounds in each channel.
	     */
	    avg /= mediaInfo.getChannels();

	    /*
	     * By squaring the average, the larger values weigh more 
	     * than the smaller ones.
	     * Also, squaring makes everything positive
	     */ 
	    sum += (avg * avg);
	    cnt++;
	}

	speechDetectorTime += (CurrentTime.getTime() - start);
	return (speakingChanged());
    }

    /*
     * linearData contains 16-bit linear data in a int array.
     * Returns true if speaking started or stopped.
     */ 
    public boolean processData(int[] linearData) {
	speechDetectorCalls++;
        long start = CurrentTime.getTime();

	/*
	 * Round down to 8 sample boundary
	 */
	int nSamples = (linearData.length / 8) * 8;

	long sq = 0;

	for (int i = 0;  i <= nSamples - 8; i += 8) {  
	    /* 
	     * average next 8 samples (MSB only), square result, 
	     * add to running avg
	     */
	    double avg = (double) (
		((byte)(linearData[i + 0] >> 8) +
	    	 (byte)(linearData[i + 1] >> 8) +
	    	 (byte)(linearData[i + 2] >> 8) +
	    	 (byte)(linearData[i + 3] >> 8) +
	    	 (byte)(linearData[i + 4] >> 8) +
	    	 (byte)(linearData[i + 5] >> 8) +
	    	 (byte)(linearData[i + 6] >> 8) +
	    	 (byte)(linearData[i + 7] >> 8)) / 8.);

	    /*
	     * Divide by the number of channels.  For stereo we're
	     * likely to be getting similar sounds in each channel.
	     */
	    avg /= mediaInfo.getChannels();

	    /*
	     * By squaring the average, the larger values weigh more 
	     * than the smaller ones.
	     * Also, squaring makes everything positive
	     */ 
	    sum += (avg * avg);
	    cnt++;
	}

	speechDetectorTime += (CurrentTime.getTime() - start);
	return (speakingChanged());
    }

    /*
     * Returns true if speaker has started or stopped speaking
     */
    private boolean speakingChanged() {
	boolean speakingChanged = false;

	if (cnt < cnThresh) {
	    return false; 	// can't tell for sure yet
	}

	double value = sum / cnt;

        /* value is now the power in this sample set. */
        if (value > powthresh) {
            oncount++;
            offcount = 0;

            if (oncount > onThresh) { 
		if (isSpeaking == false) {
                    isSpeaking = true;
		    speakingChanged = true;
		}
            }
            // drag powthresh up
            powthresh = ((powthresh + 2) * 63 + value) / 64;
        } else {
            offcount++;
            oncount= 0;

            if (offcount > offThresh) {
		if (isSpeaking == true) {
                    isSpeaking = false;
		    speakingChanged = true;
		}
            } 

            // make sure powthresh is < value * 1.1 + 2
            if (powthresh > value * powerThresholdLimit + 2) {
                powthresh = value * powerThresholdLimit + 2;
	    }
        }

	sum = 0;
	cnt = 0;

	return speakingChanged;
    }

    /*
     * return true if we can determine that there is speech
     * in the data.  If there's not enough data or we're
     * sure there's not speech, return true.
     */
    public boolean isSpeaking() {
	return isSpeaking;
    }

    public void printStatistics() {
	String s = "";

	if (id != null) {
	    s += "Call " + id + ":  ";
	}

        Logger.writeFile(s + "Speech detector calls:  "
	    + speechDetectorCalls);

        if (speechDetectorCalls != 0) {
            Logger.writeFile(s + "SpeechDetector average ms per call:  "
                + ((float)((float)speechDetectorTime / speechDetectorCalls) /
		    CurrentTime.getTimeUnitsPerSecond()));
        }
    }

    public static void main(String[] args) {
	if (args.length != 1) {
	    Logger.println("Usage:  java SpeechDetector <.au file>");
	    System.exit(1);
	}

	TreatmentManager treatmentManager = null;

	try {
	    treatmentManager = new TreatmentManager(args[0], 0);
	} catch (IOException e) {
	    System.out.println("Can't get treatment " + e.getMessage());
	    System.exit(1);
	}

	MediaInfo mediaInfo = new MediaInfo((byte) 0, RtpPacket.PCM_ENCODING,
	    treatmentManager.getSampleRate(), treatmentManager.getChannels(),
	    false);

	Logger.println("MediaInfo " + mediaInfo);

	SpeechDetector speechDetector = new SpeechDetector("Test", mediaInfo);

	byte[] linearData;

	while ((linearData = treatmentManager.getLinearDataBytes(
	    RtpPacket.PACKET_PERIOD)) != null) {

	    if (speechDetector.processData(linearData) == true) {
		if (speechDetector.isSpeaking()) {
		    Logger.println("Started speaking...");
		} else {
		    Logger.println("Stopped speaking...");
		}
	    }
	}
    }

}