LargeNGramModel.java example

Explorer

sphinx4-master
- sphinx4-core
  - src
    - main
      - java
        edu
        cmu
        sphinx
        alignment
        LongTextAligner.java
        SimpleTokenizer.java
        TextTokenizer.java
        Token.java
        USEnglishTokenizer.java
        tokenizer
        CharTokenizer.java
        DecisionTree.java
        FeatureSet.java
        Item.java
        ItemContents.java
        NumberExpander.java
        PathExtractor.java
        PrefixFSM.java
        PronounceableFSM.java
        Relation.java
        SuffixFSM.java
        Utterance.java
        WordRelation.java
        api
        AbstractSpeechRecognizer.java
        Configuration.java
        Context.java
        LiveSpeechRecognizer.java
        Microphone.java
        SpeechAligner.java
        SpeechResult.java
        SpeechSourceProvider.java
        StreamSpeechRecognizer.java
        decoder
        AbstractDecoder.java
        Decoder.java
        FrameDecoder.java
        ResultListener.java
        ResultProducer.java
        adaptation
        ClusteredDensityFileData.java
        Stats.java
        Transform.java
        pruner
        NullPruner.java
        Pruner.java
        SimplePruner.java
        scorer
        AcousticScorer.java
        BackgroundModelNormalizer.java
        MaxScoreNormalizer.java
        ScoreNormalizer.java
        ScoreProvider.java
        Scoreable.java
        SimpleAcousticScorer.java
        ThreadedAcousticScorer.java
        search
        ActiveList.java
        ActiveListFactory.java
        ActiveListManager.java
        AlternateHypothesisManager.java
        PartitionActiveListFactory.java
        Partitioner.java
        SearchManager.java
        SimpleActiveListFactory.java
        SimpleActiveListManager.java
        SimpleBreadthFirstSearchManager.java
        SkewPruningSearchManager.java
        SortingActiveListFactory.java
        Token.java
        TokenHeapSearchManager.java
        TokenSearchManager.java
        WordActiveListFactory.java
        WordPruningBreadthFirstLookaheadSearchManager.java
        WordPruningBreadthFirstSearchManager.java
        stats
        StateHistoryTracker.java
        TokenTracker.java
        TokenTypeTracker.java
        WordTracker.java
        frontend
        AutoCepstrum.java
        BaseDataProcessor.java
        Data.java
        DataBlocker.java
        DataEndSignal.java
        DataProcessingException.java
        DataProcessor.java
        DataStartSignal.java
        DoubleData.java
        FloatData.java
        FrontEnd.java
        GainControlProcessor.java
        Signal.java
        SignalListener.java
        databranch
        DataBufferProcessor.java
        DataListener.java
        DataProducer.java
        FrontEndSplitter.java
        denoise
        Denoise.java
        endpoint
        AbstractVoiceActivityDetector.java
        SpeechClassifiedData.java
        SpeechClassifier.java
        SpeechEndSignal.java
        SpeechMarker.java
        SpeechStartSignal.java
        feature
        AbstractFeatureExtractor.java
        BatchAGC.java
        BatchCMN.java
        BatchVarNorm.java
        ConcatFeatureExtractor.java
        DeltasFeatureExtractor.java
        FeatureTransform.java
        FrameDropper.java
        KaldiDeltasFeatureExtractor.java
        LiveCMN.java
        S3FeatureExtractor.java
        filter
        Dither.java
        EnergyFilter.java
        Preemphasizer.java
        frequencywarp
        FrequencyWarper.java
        LinearPredictor.java
        MelFilter.java
        MelFilter2.java
        MelFrequencyFilterBank.java
        MelFrequencyFilterBank2.java
        PLPCepstrumProducer.java
        PLPFilter.java
        PLPFrequencyFilterBank.java
        transform
        DiscreteCosineTransform.java
        DiscreteCosineTransform2.java
        DiscreteFourierTransform.java
        KaldiDiscreteCosineTransform.java
        Lifter.java
        util
        AudioContinuityTester.java
        AudioFileDataSource.java
        AudioFileProcessListener.java
        ConcatAudioFileDataSource.java
        ConcatFileDataSource.java
        DataConverter.java
        DataDumper.java
        DataUtil.java
        EnergyPlotter.java
        FrontEndUtils.java
        Microphone.java
        StreamCepstrumSource.java
        StreamDataSource.java
        StreamHTKCepstrum.java
        Utterance.java
        VUMeter.java
        VUMeterMonitor.java
        VUMeterPanel.java
        WavWriter.java
        window
        RaisedCosineWindower.java
        fst
        Arc.java
        Convert.java
        Export.java
        Fst.java
        ImmutableFst.java
        ImmutableState.java
        Import.java
        State.java
        operations
        ArcSort.java
        Compose.java
        Connect.java
        Determinize.java
        ExtendFinal.java
        ILabelCompare.java
        NShortestPaths.java
        OLabelCompare.java
        Project.java
        ProjectType.java
        Reverse.java
        RmEpsilon.java
        semiring
        LogSemiring.java
        ProbabilitySemiring.java
        Semiring.java
        TropicalSemiring.java
        sequitur
        SequiturImport.java
        utils
        Pair.java
        Utils.java
        instrumentation
        AccuracyTracker.java
        BestPathAccuracyTracker.java
        ConfigMonitor.java
        MemoryTracker.java
        Monitor.java
        RecognizerMonitor.java
        RejectionTracker.java
        Resetable.java
        SpeedTracker.java
        jsgf
        GrXMLGrammar.java
        GrXMLHandler.java
        JSGFGrammar.java
        JSGFGrammarException.java
        JSGFGrammarParseException.java
        JSGFRuleGrammar.java
        JSGFRuleGrammarFactory.java
        JSGFRuleGrammarManager.java
        parser
        JSGFParser.java
        JSGFParserConstants.java
        JSGFParserTokenManager.java
        JavaCharStream.java
        ParseException.java
        Token.java
        TokenMgrError.java
        rule
        JSGFRule.java
        JSGFRuleAlternatives.java
        JSGFRuleCount.java
        JSGFRuleName.java
        JSGFRuleSequence.java
        JSGFRuleTag.java
        JSGFRuleToken.java
        linguist
        HMMSearchState.java
        Linguist.java
        LinguistProcessor.java
        SearchGraph.java
        SearchState.java
        SearchStateArc.java
        UnitSearchState.java
        WordSearchState.java
        WordSequence.java
        acoustic
        AcousticModel.java
        Context.java
        HMM.java
        HMMPool.java
        HMMPosition.java
        HMMState.java
        HMMStateArc.java
        LeftRightContext.java
        Unit.java
        UnitManager.java
        tiedstate
        CompositeSenone.java
        GaussianMixture.java
        GaussianWeights.java
        HMMManager.java
        HTK
        GMMDiag.java
        HMMSet.java
        HMMState.java
        Lab.java
        NamesConversion.java
        SingleHMM.java
        HTKLoader.java
        KaldiLoader.java
        LazyHmmManager.java
        Loader.java
        MixtureComponent.java
        Pool.java
        Saver.java
        ScoreCachingSenone.java
        Senone.java
        SenoneHMM.java
        SenoneHMMState.java
        SenoneSequence.java
        Sphinx3Loader.java
        TiedStateAcousticModel.java
        kaldi
        ConstantEventMap.java
        DiagGmm.java
        EventMap.java
        EventMapWithKey.java
        KaldiGmmPool.java
        KaldiTextParser.java
        SplitEventMap.java
        TableEventMap.java
        TransitionModel.java
        tiedmixture
        MixtureComponentSet.java
        MixtureComponentSetScores.java
        PrunableMixtureComponent.java
        SetBasedGaussianMixture.java
        trainer
        Buffer.java
        HMMPoolManager.java
        ModelInitializerLoader.java
        Sphinx3Saver.java
        TrainerAcousticModel.java
        TrainerScore.java
        trivial
        TrivialAcousticModel.java
        aflat
        AFlatLinguist.java
        PhoneLoop.java
        allphone
        AllphoneLinguist.java
        AllphoneSearchGraph.java
        PhoneHmmSearchState.java
        PhoneNonEmittingSearchState.java
        PhoneWordSearchState.java
        dflat
        DynamicFlatLinguist.java
        OutOfGrammarGraph.java
        dictionary
        Dictionary.java
        MappingDictionary.java
        Pronunciation.java
        TextDictionary.java
        Word.java
        flat
        AlternativeState.java
        BranchState.java
        CIPhoneLoop.java
        ExtendedUnitState.java
        FlatLinguist.java
        GrammarPoint.java
        GrammarState.java
        HMMStateState.java
        NonEmittingHMMState.java
        PronunciationState.java
        SentenceHMMState.java
        SentenceHMMStateArc.java
        SentenceHMMStateVisitor.java
        UnitState.java
        WordState.java
        g2p
        G2PConverter.java
        Path.java
        PathComparator.java
        language
        classes
        ClassBasedLanguageModel.java
        ClassMap.java
        ClassPresentingDictionary.java
        ClassProbability.java
        grammar
        AlignerGrammar.java
        BatchForcedAlignerGrammar.java
        FSTGrammar.java
        ForcedAlignerGrammar.java
        Grammar.java
        GrammarArc.java
        GrammarInterface.java
        GrammarNode.java
        LMGrammar.java
        LatticeGrammar.java
        SimpleWordListGrammar.java
        ngram
        DynamicTrigramModel.java
        InterpolatedLanguageModel.java
        KeywordOptimizerModel.java
        LanguageModel.java
        NetworkLanguageModel.java
        SimpleNGramModel.java
        large
        BinaryLoader.java
        BinaryStreamLoader.java
        KeywordOptimizerLargeNGramModel.java
        LargeNGramModel.java
        LargeTrigramModel.java
        NGramBuffer.java
        NGramProbability.java
        NMaxGramBuffer.java
        UnigramProbability.java
        trie
        BinaryLoader.java
        NgramTrie.java
        NgramTrieBitarr.java
        NgramTrieModel.java
        NgramTrieQuant.java
        lextree
        HMMTree.java
        LexTreeLinguist.java
        util
        GDLDumper.java
        LRUCache.java
        LinguistDumper.java
        LinguistStats.java
        LinguistTimer.java
        recognizer
        Recognizer.java
        StateListener.java
        result
        BoundedPriorityQueue.java
        Edge.java
        FrameStatistics.java
        GDLLatticeFactory.java
        Lattice.java
        LatticeOptimizer.java
        LatticeRescorer.java
        Nbest.java
        Node.java
        Path.java
        Result.java
        TokenGraphDumper.java
        WordResult.java
        speakerid
        Segment.java
        SpeakerCluster.java
        SpeakerIdentification.java
        Tester.java
        tools
        aligner
        Aligner.java
        audio
        AudioData.java
        AudioDataInputStream.java
        AudioPanel.java
        AudioPlayer.java
        AudioTool.java
        CepstrumPanel.java
        Downsampler.java
        FilenameDialog.java
        RawReader.java
        RawRecorder.java
        RawWriter.java
        Sinusoid.java
        SpectrogramPanel.java
        Utils.java
        bandwidth
        BandDetector.java
        batch
        BatchDecoderRecognizer.java
        BatchForcedAlignerRecognizer.java
        BatchModeRecognizer.java
        BatchNISTRecognizer.java
        SphinxShell.java
        endpoint
        Segmenter.java
        feature
        FeatureFileDumper.java
        live
        LiveModeRecognizer.java
        transcriber
        Transcriber.java
        trainer
        BaumWelchLearner.java
        BuildTranscriptHMM.java
        ControlFile.java
        Edge.java
        FlatInitializerLearner.java
        Graph.java
        Learner.java
        Node.java
        NodeType.java
        SimpleControlFile.java
        SimpleTrainManager.java
        SimpleTranscript.java
        SimpleUtterance.java
        Stage.java
        TrainManager.java
        Trainer.java
        TrainerDictionary.java
        TrainerLink.java
        TrainerToken.java
        Transcript.java
        TranscriptGraph.java
        TranscriptHMMGraph.java
        Utterance.java
        UtteranceGraph.java
        UtteranceHMMGraph.java
        util
        BatchFile.java
        BatchItem.java
        BatchManager.java
        Cache.java
        CommandInterface.java
        CommandInterpreter.java
        Complex.java
        CustomThreadFactory.java
        ExtendedStreamTokenizer.java
        FileReferenceSource.java
        GapInsertionDetector.java
        LogMath.java
        MatrixUtils.java
        NISTAlign.java
        PooledBatchManager.java
        Range.java
        ReferenceSource.java
        SimpleBatchManager.java
        SocketCommandClient.java
        SocketCommandInterpreter.java
        SphinxHelp.java
        SphinxLogFormatter.java
        StatisticsVariable.java
        StreamFactory.java
        TimeFrame.java
        Timer.java
        TimerPool.java
        Utilities.java
        machlearn
        OVector.java
        props
        ConfCategory.java
        ConfigHandler.java
        Configurable.java
        ConfigurableAdapter.java
        ConfigurationChangeListener.java
        ConfigurationManager.java
        ConfigurationManagerUtils.java
        InternalConfigurationException.java
        PropertyException.java
        PropertySheet.java
        PropertyType.java
        RawPropertyData.java
        S4Boolean.java
        S4Component.java
        S4ComponentList.java
        S4Double.java
        S4Integer.java
        S4PropWrapper.java
        S4Property.java
        S4String.java
        S4StringList.java
        SaxLoader.java
        tools
        GDLDumper.java
        HTMLDumper.java
    - test
      - java
        edu
        cmu
        sphinx
        alignment
        SpeechAlignerTest.java
        TextAlignerLargeTest.java
        TextAlignerSmallTest.java
        UsEnglishWordExpanderTest.java
        api
        LiveRecognizerTest.java
        decoder
        scorer
        ScorerTests.java
        search
        test
        PartitionerTest.java
        frontend
        AudioDataSourcesTest.java
        DataBlockerTest.java
        FrontendElementTest.java
        RandomDataProcessor.java
        RandomSampleRepeater.java
        SpeechMarkerTest.java
        fst
        ArcSortTest.java
        ComposeEpsilonTest.java
        ComposeTest.java
        ConnectTest.java
        DeterminizeTest.java
        ImportTest.java
        NShortestPathsTest.java
        ProjectTest.java
        ReverseTest.java
        RmEpsilonTest.java
        linguist
        WordSequenceTest.java
        acoustic
        tiedstate
        test
        MixtureComponentTest.java
        dictionary
        DictionaryTest.java
        language
        grammar
        BatchForcedAlignerGrammarTest.java
        FSTGrammarTest.java
        ngram
        DynamicTrigramModelTest.java
        large
        LargeNgramTest.java
        trie
        NgramTrieTest.java
        result
        LatticeCompTest.java
        LatticeIOTest.java
        PosteriorTest.java
        tools
        bandwidth
        BandDetectorTest.java
        util
        props
        AnotherDummyFrontEnd.java
        AnotherDummyProcessor.java
        CMUTests.java
        ComponentListTests.java
        ComponentPropertyTest.java
        ConfigurationManagerTest.java
        DummyComp.java
        DummyFrontEnd.java
        DummyFrontEndProcessor.java
        DummyProcessor.java
        TestConfigurable.java
- sphinx4-samples
  - src
    - main
      - java
        edu
        cmu
        sphinx
        demo
        DemoRunner.java
        aligner
        AlignerDemo.java
        allphone
        AllphoneDemo.java
        dialog
        DialogDemo.java
        speakerid
        SpeakerIdentificationDemo.java
        transcriber
        TranscriberDemo.java

/*
 * Copyright 2010 LIUM, based on Carnegie Mellon University previous work.  
 * Portions Copyright 2002 Sun Microsystems, Inc.  
 * Portions Copyright 2002 Mitsubishi Electric Research Laboratories.
 * Portions Copyright 2010 LIUM, University of Le Mans, France
  -> Yannick Esteve, Anthony Rousseau

 * All Rights Reserved.  Use is subject to license terms.
 * 
 * See the file "license.terms" for information on usage and
 * redistribution of this file, and for a DISCLAIMER OF ALL 
 * WARRANTIES.
 *
 */

package edu.cmu.sphinx.linguist.language.ngram.large;

import java.io.*;
import java.net.URL;
import java.util.*;
import java.util.logging.Level;
import java.util.logging.Logger;

import edu.cmu.sphinx.linguist.WordSequence;
import edu.cmu.sphinx.linguist.dictionary.Dictionary;
import edu.cmu.sphinx.linguist.dictionary.Word;
import edu.cmu.sphinx.linguist.language.ngram.LanguageModel;
import edu.cmu.sphinx.linguist.util.LRUCache;
import edu.cmu.sphinx.util.LogMath;
import edu.cmu.sphinx.util.TimerPool;
import edu.cmu.sphinx.util.props.*;

/**
 * Language model that uses a binary NGram language model file ("DMP file")
 * generated by the SphinxBase sphinx_lm_convert.
 */

public class LargeNGramModel implements LanguageModel {

    /**
     * The property for the name of the file that logs all the queried N-grams.
     * If this property is set to null, it means that the queried N-grams are
     * not logged.
     */
    @S4String(mandatory = false)
    public static final String PROP_QUERY_LOG_FILE = "queryLogFile";

    /** The property that defines that maximum number of ngrams to be cached */
    @S4Integer(defaultValue = 100000)
    public static final String PROP_NGRAM_CACHE_SIZE = "ngramCacheSize";

    /**
     * The property that controls whether the ngram caches are cleared after
     * every utterance
     */
    @S4Boolean(defaultValue = false)
    public static final String PROP_CLEAR_CACHES_AFTER_UTTERANCE = "clearCachesAfterUtterance";

    /** The property that defines the language weight for the search */
    @S4Double(defaultValue = 1.0f)
    public final static String PROP_LANGUAGE_WEIGHT = "languageWeight";

    /**
     * The property that controls whether or not the language model will apply
     * the language weight and word insertion probability
     */
    @S4Boolean(defaultValue = false)
    public final static String PROP_APPLY_LANGUAGE_WEIGHT_AND_WIP = "applyLanguageWeightAndWip";

    /** Word insertion probability property */
    @S4Double(defaultValue = 1.0f)
    public final static String PROP_WORD_INSERTION_PROBABILITY = "wordInsertionProbability";

    /** If true, use full bigram information to determine smear */
    @S4Boolean(defaultValue = false)
    public final static String PROP_FULL_SMEAR = "fullSmear";

    /**
     * The number of bytes per N-gram in the LM file generated by the
     * CMU-Cambridge Statistical Language Modeling Toolkit.
     */
    public static final int BYTES_PER_NGRAM = 4;
    public static final int BYTES_PER_NMAXGRAM = 2;

    private final static int SMEAR_MAGIC = 0xC0CAC01A; // things go better

    // ------------------------------
    // Configuration data
    // ------------------------------
    URL location;
    protected Logger logger;
    protected LogMath logMath;
    protected int maxDepth;

    protected int ngramCacheSize;
    protected boolean clearCacheAfterUtterance;

    protected boolean fullSmear;

    protected Dictionary dictionary;
    protected String format;
    protected boolean applyLanguageWeightAndWip;
    protected float languageWeight;
    protected float unigramWeight;
    protected double wip;

    // -------------------------------
    // Statistics
    // -------------------------------
    private int ngramMisses;
    private int ngramHits;
    private int smearTermCount;
    protected String ngramLogFile;

    // -------------------------------
    // subcomponents
    // --------------------------------
    private BinaryLoader loader;
    private PrintWriter logFile;

    // -------------------------------
    // Working data
    // --------------------------------
    private Map<Word, UnigramProbability> unigramIDMap;
    private Map<WordSequence, NGramBuffer>[] loadedNGramBuffers;
    private LRUCache<WordSequence, Float> ngramProbCache;
    private Map<Long, Float> bigramSmearMap;

    private NGramBuffer[] loadedBigramBuffers;
    private UnigramProbability[] unigrams;
    private int[][] ngramSegmentTable;
    private float[][] ngramProbTable;
    private float[][] ngramBackoffTable;
    private float[] unigramSmearTerm;

    public LargeNGramModel(String format, URL location, String ngramLogFile,
            int maxNGramCacheSize, boolean clearCacheAfterUtterance,
            int maxDepth, Dictionary dictionary,
            boolean applyLanguageWeightAndWip, float languageWeight,
            double wip, float unigramWeight, boolean fullSmear) {
        logger = Logger.getLogger(getClass().getName());
        this.format = format;
        this.location = location;
        this.ngramLogFile = ngramLogFile;
        this.ngramCacheSize = maxNGramCacheSize;
        this.clearCacheAfterUtterance = clearCacheAfterUtterance;
        this.maxDepth = maxDepth;
        logMath = LogMath.getLogMath();
        this.dictionary = dictionary;
        this.applyLanguageWeightAndWip = applyLanguageWeightAndWip;
        this.languageWeight = languageWeight;
        this.wip = wip;
        this.unigramWeight = unigramWeight;
        this.fullSmear = fullSmear;
    }

    public LargeNGramModel() {

    }

    /*
     * (non-Javadoc)
     * 
     * @see edu.cmu.sphinx.util.props.Configurable#newProperties(edu.cmu.sphinx.
     * util.props.PropertySheet)
     */
    public void newProperties(PropertySheet ps) throws PropertyException {
        logger = ps.getLogger();
        location = ConfigurationManagerUtils.getResource(PROP_LOCATION, ps);
        ngramLogFile = ps.getString(PROP_QUERY_LOG_FILE);
        ngramCacheSize = ps.getInt(PROP_NGRAM_CACHE_SIZE);
        clearCacheAfterUtterance = ps
                .getBoolean(PROP_CLEAR_CACHES_AFTER_UTTERANCE);
        maxDepth = ps.getInt(LanguageModel.PROP_MAX_DEPTH);
        dictionary = (Dictionary) ps.getComponent(PROP_DICTIONARY);
        applyLanguageWeightAndWip = ps
                .getBoolean(PROP_APPLY_LANGUAGE_WEIGHT_AND_WIP);
        languageWeight = ps.getFloat(PROP_LANGUAGE_WEIGHT);
        wip = ps.getDouble(PROP_WORD_INSERTION_PROBABILITY);
        unigramWeight = ps.getFloat(PROP_UNIGRAM_WEIGHT);
        fullSmear = ps.getBoolean(PROP_FULL_SMEAR);
    }

    /*
     * (non-Javadoc)
     * 
     * @see edu.cmu.sphinx.linguist.language.ngram.LanguageModel#allocate()
     */
    @SuppressWarnings("unchecked")
    public void allocate() throws IOException {
        TimerPool.getTimer(this, "Load LM").start();

        logger.info("Loading n-gram language model from: " + location);

        // create the log file if specified
        if (ngramLogFile != null)
            logFile = new PrintWriter(new FileOutputStream(ngramLogFile));

        if (location.getProtocol() == null
                || location.getProtocol().equals("file")) {
            try {
                loader = new BinaryLoader(new File(location.toURI()), format,
                        applyLanguageWeightAndWip, languageWeight, wip,
                        unigramWeight);
            } catch (Exception e) {
                loader = new BinaryLoader(new File(location.getPath()), format,
                        applyLanguageWeightAndWip, languageWeight, wip,
                        unigramWeight);
            }
        } else {
            loader = new BinaryStreamLoader(location, format,
                    applyLanguageWeightAndWip, languageWeight, wip,
                    unigramWeight);
        }

        unigramIDMap = new HashMap<Word, UnigramProbability>();
        unigrams = loader.getUnigrams();
        loadedNGramBuffers = new Map[loader.getMaxDepth()];
        ngramProbTable = new float[loader.getMaxDepth()][];
        ngramBackoffTable = new float[loader.getMaxDepth()][];
        ngramSegmentTable = new int[loader.getMaxDepth()][];

        for (int i = 1; i <= loader.getMaxDepth(); i++) {
            loadedNGramBuffers[i - 1] = new HashMap<WordSequence, NGramBuffer>();

            if (i >= 2)
                ngramProbTable[i - 1] = loader.getNGramProbabilities(i);

            if (i > 2) {
                ngramBackoffTable[i - 1] = loader.getNGramBackoffWeights(i);
                ngramSegmentTable[i - 1] = loader.getNGramSegments(i);
            }
        }

        ngramProbCache = new LRUCache<WordSequence, Float>(ngramCacheSize);
        if (dictionary != null)
            buildUnigramIDMap(dictionary);
        else
            buildUnigramIDMap();
        loadedBigramBuffers = new NGramBuffer[unigrams.length];

        if (maxDepth <= 0 || maxDepth > loader.getMaxDepth())
            maxDepth = loader.getMaxDepth();

        for (int i = 1; i <= loader.getMaxDepth(); i++)
            logger.info(Integer.toString(i) + "-grams: "
                    + loader.getNumberNGrams(i));

        if (fullSmear) {
            System.out.println("Full Smear");
            try {
                System.out.println("... Reading ...");
                readSmearInfo("smear.dat");
                System.out.println("... Done ");
            } catch (IOException e) {
                System.out.println("... " + e);
                System.out.println("... Calculating");
                buildSmearInfo();
                System.out.println("... Writing");
                // writeSmearInfo("smear.dat");
                System.out.println("... Done");
            }
        }

        TimerPool.getTimer(this, "Load LM").stop();
    }

    /*
     * (non-Javadoc)
     * 
     * @see edu.cmu.sphinx.linguist.language.ngram.LanguageModel#deallocate()
     */
    public void deallocate() throws IOException {
        loader.deallocate();
    }

    /**
     * Builds the map from unigram to unigramID. Also finds the startWordID and
     * endWordID.
     * 
     * @param dictionary
     * */
    private void buildUnigramIDMap(Dictionary dictionary) {
        int missingWords = 0;
        String[] words = loader.getWords();
        for (int i = 0; i < words.length; i++) {
            Word word = dictionary.getWord(words[i]);

            if (word == null) {
                logger.warning("The dictionary is missing a phonetic transcription for the word '"
                        + words[i] + "'");
                missingWords++;
            }

            unigramIDMap.put(word, unigrams[i]);

            if (logger.isLoggable(Level.FINE))
                logger.fine("Word: " + word);
        }

        if (missingWords > 0)
            logger.warning("Dictionary is missing " + missingWords
                    + " words that are contained in the language model.");
    }

    private void buildUnigramIDMap() {
        String[] words = loader.getWords();
        for (int i = 0; i < words.length; i++) {
            Word word = new Word(words[i], null, false);

            unigramIDMap.put(word, unigrams[i]);
        }
    }

    @Override
    public void onUtteranceEnd() {
        clearCache();

        if (logFile != null) {
            logFile.println("<END_UTT>");
            logFile.flush();
        }
    }

    /** Clears the various N-gram caches. */
    private void clearCache() {
        for (int i = 0; i < loadedBigramBuffers.length; i++) {
            NGramBuffer buffer = loadedBigramBuffers[i];

            if (buffer != null) {
                if (!buffer.getUsed())
                    loadedBigramBuffers[i] = null; // free the BigramBuffer
                else
                    buffer.setUsed(false);
            }
        }

        loadedBigramBuffers = new NGramBuffer[unigrams.length];
        for (int i = 2; i <= loader.getMaxDepth(); i++) {
            loadedNGramBuffers[i - 1] = new HashMap<WordSequence, NGramBuffer>();
        }
        logger.info("LM Cache Size: " + ngramProbCache.size() + " Hits: "
                + ngramHits + " Misses: " + ngramMisses);
        if (clearCacheAfterUtterance) {
            ngramProbCache = new LRUCache<WordSequence, Float>(ngramCacheSize);
        }
    }

    /**
     * Gets the ngram probability of the word sequence represented by the word
     * list
     * 
     * @param wordSequence
     *            the word sequence
     * @return the probability of the word sequence. Probability is in logMath
     *         log base
     */
    public float getProbability(WordSequence wordSequence) {
        int numberWords = wordSequence.size();
        Float probability;

        if (numberWords > maxDepth) {
            throw new Error("Unsupported NGram: " + wordSequence.size());
        }

        if (numberWords == maxDepth) {
            probability = ngramProbCache.get(wordSequence);

            if (probability != null) {
                ngramHits++;
                return probability;
            }
            ngramMisses++;
        }

        probability = getNGramProbability(wordSequence);

        if (numberWords == maxDepth)
            ngramProbCache.put(wordSequence, probability);

        if (logFile != null && probability != null)
            logFile.println(wordSequence.toString().replace("][", " ") + " : "
                    + Float.toString(probability));

        return probability;
    }

    private Float getNGramProbability(WordSequence wordSequence) {
        int numberWords = wordSequence.size();
        Word firstWord = wordSequence.getWord(0);

        if (loader.getNumberNGrams(numberWords) == 0 || !hasUnigram(firstWord))
            return getNGramProbability(wordSequence.getNewest());

        if (numberWords < 2) {
            return getUnigramProbability(wordSequence);
        }

        NGramProbability nGProbability = findNGram(wordSequence);

        if (nGProbability != null) {
            return ngramProbTable[numberWords - 1][nGProbability
                    .getProbabilityID()];
        }

        if (numberWords == 2) {
            UnigramProbability unigramProb = getUnigram(firstWord);
            UnigramProbability unigramProb1 = getUnigram(wordSequence
                    .getWord(1));
            return unigramProb.getLogBackoff()
                    + unigramProb1.getLogProbability();
        }

        NGramProbability nMinus1Gram = findNGram(wordSequence.getOldest());

        if (nMinus1Gram != null) {
            return ngramBackoffTable[numberWords - 1][nMinus1Gram
                    .getBackoffID()] + getProbability(wordSequence.getNewest());
        }

        return getProbability(wordSequence.getNewest());
    }

    /**
     * Finds or loads the NGram probability of the given NGram.
     * 
     * @param wordSequence
     *            the NGram to load
     * @return a NGramProbability of the given NGram
     */
    private NGramProbability findNGram(WordSequence wordSequence) {
        int numberWords = wordSequence.size();
        NGramProbability nGram = null;

        WordSequence oldest = wordSequence.getOldest();
        NGramBuffer nGramBuffer = loadedNGramBuffers[numberWords - 1]
                .get(oldest);
        if (nGramBuffer == null) {
            nGramBuffer = getNGramBuffer(oldest);
            if (nGramBuffer != null)
                loadedNGramBuffers[numberWords - 1].put(oldest, nGramBuffer);
        }

        if (nGramBuffer != null) {
            int nthWordID = getWordID(wordSequence.getWord(numberWords - 1));
            nGram = nGramBuffer.findNGram(nthWordID);
        }

        return nGram;
    }

    /**
     * Tells if the model is 16 or 32 bits.
     * 
     * @return true if 32 bits, false otherwise
     */
    private boolean is32bits() {
        if (loader.getBytesPerField() == 4)
            return true;
        return false;
    }

    /**
     * Loads into a buffer all the NGram followers of the given N-1Gram.
     * 
     * @param ws
     *            the N-1Gram to find followers
     * 
     * @return a NGramBuffer of all the NGram followers of the given sequence
     */
    private NGramBuffer loadNGramBuffer(WordSequence ws) {
        int firstWordID = getWordID(ws.getWord(0));
        int firstCurrentNGramEntry = 0;
        int numberNGrams = 0;
        int size = 0;
        long position = 0;
        int orderBuffer = ws.size() + 1;
        NGramBuffer currentBuffer = null;
        NGramBuffer nMinus1Buffer = null;

        firstCurrentNGramEntry = unigrams[firstWordID].getFirstBigramEntry();
        numberNGrams = getNumberBigramFollowers(firstWordID) + 1;

        if (numberNGrams == 1) // 1 means that there is no bigram starting with
                               // firstWordID
            return null;

        if (orderBuffer == 2) {
            size = numberNGrams
                    * ((loader.getMaxDepth() == orderBuffer) ? BYTES_PER_NMAXGRAM
                            : BYTES_PER_NGRAM) * loader.getBytesPerField();
            position = (loader.getNGramOffset(orderBuffer) + (firstCurrentNGramEntry
                    * ((loader.getMaxDepth() == orderBuffer) ? BYTES_PER_NMAXGRAM
                            : BYTES_PER_NGRAM) * loader.getBytesPerField()));
        } else { // only for ws.size() >= 2
            int lastWordId = getWordID(ws.getWord(ws.size() - 1));
            nMinus1Buffer = getNGramBuffer(ws.getOldest());
            int index = nMinus1Buffer.findNGramIndex(lastWordId);

            if (index == -1)
                return null;

            int firstNMinus1GramEntry = nMinus1Buffer.getFirstNGramEntry();
            firstCurrentNGramEntry = getFirstNGramEntry(
                    nMinus1Buffer.getNGramProbability(index),
                    firstNMinus1GramEntry, orderBuffer);
            int firstNextNGramEntry = getFirstNGramEntry(
                    nMinus1Buffer.getNGramProbability(index + 1),
                    firstNMinus1GramEntry, orderBuffer);
            numberNGrams = firstNextNGramEntry - firstCurrentNGramEntry;

            if (numberNGrams == 0)
                return null;

            if (loader.getMaxDepth() != orderBuffer)
                numberNGrams++;

            size = numberNGrams
                    * ((loader.getMaxDepth() == orderBuffer) ? BYTES_PER_NMAXGRAM
                            : BYTES_PER_NGRAM) * loader.getBytesPerField();
            position = loader.getNGramOffset(orderBuffer)
                    + (long) firstCurrentNGramEntry
                    * (long) ((loader.getMaxDepth() == orderBuffer) ? BYTES_PER_NMAXGRAM
                            : BYTES_PER_NGRAM) * loader.getBytesPerField();
        }

        try {
            byte[] buffer = loader.loadBuffer(position, size);

            if (loader.getMaxDepth() == orderBuffer) {
                currentBuffer = new NMaxGramBuffer(buffer, numberNGrams,
                        loader.getBigEndian(), is32bits(), orderBuffer,
                        firstCurrentNGramEntry);
            } else {
                currentBuffer = new NGramBuffer(buffer, numberNGrams,
                        loader.getBigEndian(), is32bits(), orderBuffer,
                        firstCurrentNGramEntry);
            }
        } catch (IOException ioe) {
            ioe.printStackTrace();
            throw new Error("Error loading " + orderBuffer + "-Grams.");
        }

        return currentBuffer;
    }

    /**
     * Returns the NGrams of the given word sequence
     * 
     * @param wordSequence
     *            the word sequence from which to get the buffer
     * @return the NGramBuffer of the word sequence
     */
    private NGramBuffer getNGramBuffer(WordSequence wordSequence) {
        NGramBuffer nGramBuffer = null;
        int order = wordSequence.size();

        if (order > 1)
            nGramBuffer = loadedNGramBuffers[order - 1].get(wordSequence); // better
                                                                           // when
                                                                           // using
                                                                           // containsKey

        if (nGramBuffer == null) {
            nGramBuffer = loadNGramBuffer(wordSequence);

            if (nGramBuffer != null)
                loadedNGramBuffers[order - 1].put(wordSequence, nGramBuffer); // optimizable
                                                                              // by
                                                                              // adding
                                                                              // an
                                                                              // 'empty'
                                                                              // nGramBuffer
        }

        return nGramBuffer;
    }

    /**
     * Returns the index of the first NGram entry of the given N-1Gram
     * 
     * @param nMinus1Gram
     *            the N-1Gram which first NGram entry we're looking for
     * @param firstNMinus1GramEntry
     *            the index of the first N-1Gram entry of the N-1Gram in
     *            question
     * @param n
     *            the order of the NGram
     * @return the index of the first NGram entry of the given N-1Gram
     */
    private int getFirstNGramEntry(NGramProbability nMinus1Gram,
            int firstNMinus1GramEntry, int n) {
        int firstNGramEntry = ngramSegmentTable[n - 1][(firstNMinus1GramEntry + nMinus1Gram
                .getWhichFollower()) >> loader.getLogNGramSegmentSize()]
                + nMinus1Gram.getFirstNPlus1GramEntry();

        return firstNGramEntry;
    }

    /**
     * Returns the unigram probability of the given unigram.
     * 
     * @param wordSequence
     *            the unigram word sequence
     * @return the unigram probability
     */
    private float getUnigramProbability(WordSequence wordSequence) {
        Word unigram = wordSequence.getWord(0);
        UnigramProbability unigramProb = getUnigram(unigram);

        if (unigramProb == null)
            throw new Error("Unigram not in LM: " + unigram);

        return unigramProb.getLogProbability();
    }

    /**
     * Returns its UnigramProbability if this language model has the given
     * unigram.
     * 
     * @param unigram
     *            the unigram to find
     * @return the UnigramProbability, or null if this language model does not
     *         have the unigram
     */
    private UnigramProbability getUnigram(Word unigram) {
        return unigramIDMap.get(unigram);
    }

    /**
     * Returns true if this language model has the given unigram.
     * 
     * @param unigram
     *            the unigram to find
     * @return true if this LM has this unigram, false otherwise
     */
    private boolean hasUnigram(Word unigram) {
        return (unigramIDMap.get(unigram) != null);
    }

    /**
     * Returns the ID of the given word.
     * 
     * @param word
     *            the word to find the ID
     * @return the ID of the word
     */
    public final int getWordID(Word word) {
        UnigramProbability probability = getUnigram(word);

        if (probability == null)
            throw new IllegalArgumentException("No word ID: " + word);
        else
            return probability.getWordID();
    }

    /**
     * Returns true if the language model contains the given word
     * 
     * @param w word
     * @return if word is in the language model
     */
    public boolean hasWord(Word w) {
        return (unigramIDMap.get(new Word(w.toString(), null, false)) != null);
    }

    /**
     * Gets the smear term for the given wordSequence
     * 
     * @param wordSequence
     *            the word sequence
     * @return the smear term associated with this word sequence
     */
    public float getSmearOld(WordSequence wordSequence) {
        float smearTerm = 0.0f;

        if (fullSmear) {
            int length = wordSequence.size();

            if (length > 0) {
                int wordID = getWordID(wordSequence.getWord(length - 1));
                smearTerm = unigramSmearTerm[wordID];
            }
        }

        if (fullSmear && logger.isLoggable(Level.FINE))
            logger.fine("SmearTerm: " + smearTerm);

        return smearTerm;
    }

    int smearCount;
    int smearBigramHit;

    public float getSmear(WordSequence wordSequence) {
        float smearTerm = 0.0f;

        if (fullSmear) {
            smearCount++;
            int length = wordSequence.size();

            if (length == 1) {
                int wordID = getWordID(wordSequence.getWord(0));
                smearTerm = unigramSmearTerm[wordID];
            } else if (length >= 2) {
                int size = wordSequence.size();
                int wordID1 = getWordID(wordSequence.getWord(size - 2));
                int wordID2 = getWordID(wordSequence.getWord(size - 1));
                Float st = getSmearTerm(wordID1, wordID2);

                if (st == null)
                    smearTerm = unigramSmearTerm[wordID2];
                else {
                    smearTerm = st;
                    smearBigramHit++;
                }
            }

            if (smearCount % 100000 == 0)
                System.out.println("Smear hit: " + smearBigramHit + " tot: "
                        + smearCount);
        }

        if (fullSmear && logger.isLoggable(Level.FINE))
            logger.fine("SmearTerm: " + smearTerm);

        return smearTerm;
    }

    /**
     * Returns the number of bigram followers of a word.
     * 
     * @param wordID
     *            the ID of the word
     * @return the number of bigram followers
     */
    private int getNumberBigramFollowers(int wordID) {
        if (wordID == unigrams.length - 1)
            return 0;
        else
            return unigrams[wordID + 1].getFirstBigramEntry()
                    - unigrams[wordID].getFirstBigramEntry();
    }

    /**
     * Returns the maximum depth of the language model
     * 
     * @return the maximum depth of the language model
     */
    public int getMaxDepth() {
        return maxDepth;
    }

    /**
     * Returns the set of words in the language model. The set is unmodifiable.
     * 
     * @return the unmodifiable set of words
     */
    public Set<String> getVocabulary() {
        Set<String> vocabulary = new HashSet<String>(Arrays.asList(loader
                .getWords()));
        return Collections.unmodifiableSet(vocabulary);
    }

    /**
     * Returns the number of times when a NGram is queried, but there is no such
     * NGram in the LM (in which case it uses the backoff probabilities).
     * 
     * @return the number of NGram misses
     */
    public int getNGramMisses() {
        return ngramMisses;
    }

    /**
     * Returns the number of NGram hits.
     * 
     * @return the number of NGram hits
     */
    public int getNGramHits() {
        return ngramHits;
    }

    /**
     * Returns the bigrams of the given word
     * 
     * @param firstWordID
     *            the ID of the word
     * 
     * @return the bigrams of the word
     */
    private NGramBuffer getBigramBuffer(int firstWordID) {
        Word[] wd = new Word[1];
        wd[0] = dictionary.getWord(loader.getWords()[firstWordID]);
        WordSequence ws = new WordSequence(wd);

        return loadNGramBuffer(ws);
    }

    /**
     * Loads into a buffer all the trigram followers of the given bigram.
     * 
     * @param firstWordID
     *            the ID of the first word
     * @param secondWordID
     *            the ID of the second word
     * 
     * @return a TrigramBuffer of all the trigram followers of the given two
     *         words
     */
    private NGramBuffer loadTrigramBuffer(int firstWordID, int secondWordID) {
        Word[] wd = new Word[2];
        wd[0] = dictionary.getWord(loader.getWords()[firstWordID]);
        wd[1] = dictionary.getWord(loader.getWords()[secondWordID]);
        WordSequence ws = new WordSequence(wd);

        return loadNGramBuffer(ws);
    }

    private void buildSmearInfo() throws IOException {
        double S0 = 0;
        double R0 = 0;

        bigramSmearMap = new HashMap<Long, Float>();

        double[] ugNumerator = new double[unigrams.length];
        double[] ugDenominator = new double[unigrams.length];
        double[] ugAvgLogProb = new double[unigrams.length];

        unigramSmearTerm = new float[unigrams.length];

        for (UnigramProbability unigram : unigrams) {
            float logp = unigram.getLogProbability();
            double p = logMath.logToLinear(logp);
            S0 += p * logp;
            R0 += p * logp * logp;
        }

        System.out.println("R0 S0 " + R0 + ' ' + S0);

        for (int i = 0; i < loadedBigramBuffers.length; i++) {
            NGramBuffer bigram = getBigramBuffer(i);

            if (bigram == null) {
                unigramSmearTerm[i] = LogMath.LOG_ONE;
                continue;
            }

            ugNumerator[i] = 0.0;
            ugDenominator[i] = 0.0;
            ugAvgLogProb[i] = 0.0;

            float logugbackoff = unigrams[i].getLogBackoff();
            double ugbackoff = logMath.logToLinear(logugbackoff);

            for (int j = 0; j < bigram.getNumberNGrams(); j++) {
                int wordID = bigram.getWordID(j);
                NGramProbability bgProb = bigram.getNGramProbability(j);

                float logugprob = unigrams[wordID].getLogProbability();
                float logbgprob = ngramProbTable[1][bgProb.getProbabilityID()];

                double ugprob = logMath.logToLinear(logugprob);
                double bgprob = logMath.logToLinear(logbgprob);

                double backoffbgprob = ugbackoff * ugprob;
                double logbackoffbgprob = logMath.linearToLog(backoffbgprob);

                ugNumerator[i] += (bgprob * logbgprob - backoffbgprob
                        * logbackoffbgprob)
                        * logugprob;

                ugDenominator[i] += (bgprob - backoffbgprob) * logugprob;
                // dumpProbs(ugNumerator, ugDenominator, i, j, logugprob,
                // logbgprob, ugprob, bgprob, backoffbgprob,
                // logbackoffbgprob);
            }

            ugNumerator[i] += ugbackoff * (logugbackoff * S0 + R0);
            ugAvgLogProb[i] = ugDenominator[i] + ugbackoff * S0;
            ugDenominator[i] += ugbackoff * R0;

            // System.out.println("n/d " + ugNumerator[i] + " " +
            // ugDenominator[i]);

            unigramSmearTerm[i] = (float) (ugNumerator[i] / ugDenominator[i]);
            // / unigramSmearTerm[i] =
            // logMath.linearToLog(ugNumerator[i] / ugDenominator[i]);
            // System.out.println("ugs " + unigramSmearTerm[i]);
        }

        for (int i = 0; i < loadedBigramBuffers.length; i++) {
            System.out.println("Processed " + i + " of "
                    + loadedBigramBuffers.length);
            NGramBuffer bigram = getBigramBuffer(i);

            if (bigram == null)
                continue;

            for (int j = 0; j < bigram.getNumberNGrams(); j++) {
                float smearTerm;
                NGramProbability bgProb = bigram.getNGramProbability(j);
                float logbgbackoff = ngramBackoffTable[2][bgProb.getBackoffID()];
                double bgbackoff = logMath.logToLinear(logbgbackoff);
                int k = bigram.getWordID(j);
                NGramBuffer trigram = loadTrigramBuffer(i, k);

                if (trigram == null)
                    smearTerm = unigramSmearTerm[k];
                else {
                    double bg_numerator = 0;
                    double bg_denominator = 0;
                    for (int l = 0; l < trigram.getNumberNGrams(); l++) {
                        int m = trigram.getWordID(l);
                        float logtgprob = ngramProbTable[2][trigram
                                .getProbabilityID(l)];
                        double tgprob = logMath.logToLinear(logtgprob);
                        float logbgprob = getBigramProb(k, m);
                        double bgprob = logMath.logToLinear(logbgprob);
                        float logugprob = unigrams[m].getLogProbability();
                        double backofftgprob = bgbackoff * bgprob;
                        double logbackofftgprob = logMath
                                .linearToLog(backofftgprob);

                        bg_numerator += (tgprob * logtgprob - backofftgprob
                                * logbackofftgprob)
                                * logugprob;

                        bg_denominator += (tgprob - backofftgprob) * logugprob
                                * logugprob;
                    }

                    bg_numerator += bgbackoff
                            * (logbgbackoff * ugAvgLogProb[k] - ugNumerator[k]);
                    bg_denominator += bgbackoff * ugDenominator[k];
                    // bigram.ugsmear = bg_numerator / bg_denominator;
                    smearTerm = (float) (bg_numerator / bg_denominator);
                    smearTermCount++;
                }

                putSmearTerm(i, k, smearTerm);
            }
        }

        System.out.println("Smear count is " + smearTermCount);
    }

    @SuppressWarnings("unused")
    private void dumpProbs(double[] ugNumerator, double[] ugDenominator, int i,
            int j, float logugprob, float logbgprob, double ugprob,
            double bgprob, double backoffbgprob, double logbackoffbgprob) {

        System.out
                .println("ubo " + ugprob + ' ' + bgprob + ' ' + backoffbgprob);
        System.out.println("logubo " + logugprob + ' ' + logbgprob + ' '
                + logbackoffbgprob);
        System.out.println("n/d " + j + ' ' + ugNumerator[i] + ' '
                + ugDenominator[i]);

        System.out.print(ugprob + " " + bgprob + ' ' + backoffbgprob);
        System.out.print(" " + logugprob + ' ' + logbgprob + ' '
                + logbackoffbgprob);
        System.out.println("  " + ugNumerator[i] + ' ' + ugDenominator[i]);
    }

    /**
     * Writes the smear info to the given file
     * 
     * @param filename
     *            the file to write the smear info to
     * @throws IOException
     *             if an error occurs on write
     */
    @SuppressWarnings("unused")
    private void writeSmearInfo(String filename) throws IOException {
        DataOutputStream out = new DataOutputStream(new FileOutputStream(
                filename));
        out.writeInt(SMEAR_MAGIC);
        System.out.println("writing " + unigrams.length);
        out.writeInt(unigrams.length);

        for (int i = 0; i < unigrams.length; i++)
            out.writeFloat(unigramSmearTerm[i]);

        for (int i = 0; i < unigrams.length; i++) {
            System.out.println("Writing " + i + " of " + unigrams.length);
            NGramBuffer bigram = getBigramBuffer(i);

            if (bigram == null) {
                out.writeInt(0);
                continue;
            }

            out.writeInt(bigram.getNumberNGrams());

            for (int j = 0; j < bigram.getNumberNGrams(); j++) {
                int k = bigram.getWordID(j);
                Float smearTerm = getSmearTerm(i, k);
                out.writeInt(k);
                out.writeFloat(smearTerm.floatValue());
            }
        }

        out.close();
    }

    /**
     * Reads the smear info from the given file
     * 
     * @param filename
     *            where to read the smear info from
     * @throws IOException
     *             if an inconsistent file is found or on any general I/O error
     */
    private void readSmearInfo(String filename) throws IOException {
        DataInputStream in = new DataInputStream(new FileInputStream(filename));

        if (in.readInt() != SMEAR_MAGIC) {
            in.close();
            throw new IOException("Bad smear format for " + filename);
        }

        if (in.readInt() != unigrams.length) {
            in.close();
            throw new IOException("Bad unigram length in " + filename);
        }

        bigramSmearMap = new HashMap<Long, Float>();
        unigramSmearTerm = new float[unigrams.length];
        System.out.println("Reading " + unigrams.length);

        for (int i = 0; i < unigrams.length; i++)
            unigramSmearTerm[i] = in.readFloat();

        for (int i = 0; i < unigrams.length; i++) {
            System.out.println("Processed " + i + " of "
                    + loadedBigramBuffers.length);
            int numBigrams = in.readInt();
            NGramBuffer bigram = getBigramBuffer(i);

            if (bigram.getNumberNGrams() != numBigrams) {
                in.close();
                throw new IOException("Bad ngrams for unigram " + i + " Found "
                        + numBigrams + " expected " + bigram.getNumberNGrams());
            }

            for (int j = 0; j < numBigrams; j++) {
                int k = bigram.getWordID(j);
                putSmearTerm(i, k, in.readFloat());
            }
        }

        in.close();
    }

    /**
     * Puts the smear term for the two words
     * 
     * @param word1
     *            the first word
     * @param word2
     *            the second word
     * @param smearTerm
     *            the smear term
     */
    private void putSmearTerm(int word1, int word2, float smearTerm) {
        long bigramID = (((long) word1) << 32) | word2;
        bigramSmearMap.put(bigramID, smearTerm);
    }

    /**
     * Retrieves the smear term for the two words
     * 
     * @param word1
     *            the first word
     * @param word2
     *            the second word
     * @return the smear term
     */
    private Float getSmearTerm(int word1, int word2) {
        long bigramID = (((long) word1) << 32) | word2;
        return bigramSmearMap.get(bigramID);
    }

    /**
     * Retrieves the bigram probability for the two given words
     * 
     * @param word1
     *            the first word of the bigram
     * @param word2
     *            the second word of the bigram
     * @return the log probability
     */
    private float getBigramProb(int word1, int word2) {
        NGramBuffer bigram = getBigramBuffer(word1);
        NGramProbability bigramProbability = bigram.findNGram(word2);
        return ngramProbTable[1][bigramProbability.getProbabilityID()];
    }

}