SGDText.java example

Explorer

MGPWeka-master
- weka-dev
  - resources
    - MacArffOpenFilesHandler.java
  - src
    - main
      - java
        weka
        Run.java
        associations
        AbstractAssociator.java
        Apriori.java
        AprioriItemSet.java
        AssociationRule.java
        AssociationRules.java
        AssociationRulesProducer.java
        Associator.java
        AssociatorEvaluation.java
        BinaryItem.java
        CARuleMiner.java
        CheckAssociator.java
        DefaultAssociationRule.java
        FPGrowth.java
        FilteredAssociationRules.java
        FilteredAssociator.java
        Item.java
        ItemSet.java
        LabeledItemSet.java
        NominalItem.java
        NumericItem.java
        SingleAssociatorEnhancer.java
        attributeSelection
        ASEvaluation.java
        ASSearch.java
        AttributeEvaluator.java
        AttributeSelection.java
        AttributeSetEvaluator.java
        AttributeTransformer.java
        BestFirst.java
        CfsSubsetEval.java
        CheckAttributeSelection.java
        CorrelationAttributeEval.java
        ErrorBasedMeritEvaluator.java
        GainRatioAttributeEval.java
        GreedyStepwise.java
        HoldOutSubsetEvaluator.java
        InfoGainAttributeEval.java
        OneRAttributeEval.java
        PrincipalComponents.java
        RankedOutputSearch.java
        Ranker.java
        ReliefFAttributeEval.java
        StartSetHandler.java
        SubsetEvaluator.java
        SymmetricalUncertAttributeEval.java
        UnsupervisedAttributeEvaluator.java
        UnsupervisedSubsetEvaluator.java
        WrapperSubsetEval.java
        classifiers
        AbstractClassifier.java
        AggregateableEvaluation.java
        BVDecompose.java
        BVDecomposeSegCVSub.java
        CheckClassifier.java
        CheckSource.java
        Classifier.java
        ConditionalDensityEstimator.java
        CostMatrix.java
        Evaluation.java
        IntervalEstimator.java
        IteratedSingleClassifierEnhancer.java
        IterativeClassifier.java
        MultipleClassifiersCombiner.java
        ParallelIteratedSingleClassifierEnhancer.java
        ParallelMultipleClassifiersCombiner.java
        RandomizableClassifier.java
        RandomizableIteratedSingleClassifierEnhancer.java
        RandomizableMultipleClassifiersCombiner.java
        RandomizableParallelIteratedSingleClassifierEnhancer.java
        RandomizableParallelMultipleClassifiersCombiner.java
        RandomizableSingleClassifierEnhancer.java
        SingleClassifierEnhancer.java
        Sourcable.java
        UpdateableClassifier.java
        bayes
        BayesNet.java
        NaiveBayes.java
        NaiveBayesMultinomial.java
        NaiveBayesMultinomialText.java
        NaiveBayesMultinomialUpdateable.java
        NaiveBayesUpdateable.java
        net
        ADNode.java
        BIFReader.java
        BayesNetGenerator.java
        EditableBayesNet.java
        GUI.java
        MarginCalculator.java
        ParentSet.java
        VaryNode.java
        estimate
        BMAEstimator.java
        BayesNetEstimator.java
        DiscreteEstimatorBayes.java
        DiscreteEstimatorFullBayes.java
        MultiNomialBMAEstimator.java
        SimpleEstimator.java
        search
        SearchAlgorithm.java
        ci
        CISearchAlgorithm.java
        ICSSearchAlgorithm.java
        fixed
        FromFile.java
        NaiveBayes.java
        global
        GeneticSearch.java
        GlobalScoreSearchAlgorithm.java
        HillClimber.java
        K2.java
        RepeatedHillClimber.java
        SimulatedAnnealing.java
        TAN.java
        TabuSearch.java
        local
        GeneticSearch.java
        HillClimber.java
        K2.java
        LAGDHillClimber.java
        LocalScoreSearchAlgorithm.java
        RepeatedHillClimber.java
        Scoreable.java
        SimulatedAnnealing.java
        TAN.java
        TabuSearch.java
        evaluation
        ConfusionMatrix.java
        CostCurve.java
        EvaluationUtils.java
        MarginCurve.java
        NominalPrediction.java
        NumericPrediction.java
        Prediction.java
        ThresholdCurve.java
        TwoClassStats.java
        output
        prediction
        AbstractOutput.java
        CSV.java
        HTML.java
        Null.java
        PlainText.java
        XML.java
        functions
        GaussianProcesses.java
        LinearRegression.java
        Logistic.java
        MultilayerPerceptron.java
        SGD.java
        SGDText.java
        SMO.java
        SMOreg.java
        SimpleLinearRegression.java
        SimpleLogistic.java
        VotedPerceptron.java
        neural
        LinearUnit.java
        NeuralConnection.java
        NeuralMethod.java
        NeuralNode.java
        SigmoidUnit.java
        supportVector
        CachedKernel.java
        CheckKernel.java
        Kernel.java
        KernelEvaluation.java
        NormalizedPolyKernel.java
        PolyKernel.java
        PrecomputedKernelMatrixKernel.java
        Puk.java
        RBFKernel.java
        RegOptimizer.java
        RegSMO.java
        RegSMOImproved.java
        SMOset.java
        StringKernel.java
        lazy
        IBk.java
        KStar.java
        LWL.java
        kstar
        KStarCache.java
        KStarConstants.java
        KStarNominalAttribute.java
        KStarNumericAttribute.java
        KStarWrapper.java
        meta
        AdaBoostM1.java
        AdditiveRegression.java
        AttributeSelectedClassifier.java
        Bagging.java
        CVParameterSelection.java
        ClassificationViaRegression.java
        CostSensitiveClassifier.java
        FilteredClassifier.java
        LogitBoost.java
        MultiClassClassifier.java
        MultiClassClassifierUpdateable.java
        MultiScheme.java
        RandomCommittee.java
        RandomSubSpace.java
        RegressionByDiscretization.java
        Stacking.java
        Vote.java
        misc
        InputMappedClassifier.java
        InputMappedClassifierBeanInfo.java
        SerializedClassifier.java
        pmml
        consumer
        GeneralRegression.java
        NeuralNetwork.java
        PMMLClassifier.java
        Regression.java
        RuleSetModel.java
        SupportVectorMachineModel.java
        TreeModel.java
        rules
        DecisionTable.java
        DecisionTableHashKey.java
        JRip.java
        M5Rules.java
        OneR.java
        PART.java
        Rule.java
        RuleStats.java
        ZeroR.java
        part
        C45PruneableDecList.java
        ClassifierDecList.java
        MakeDecList.java
        PruneableDecList.java
        trees
        DecisionStump.java
        J48.java
        LMT.java
        M5P.java
        REPTree.java
        RandomForest.java
        RandomTree.java
        j48
        BinC45ModelSelection.java
        BinC45Split.java
        C45ModelSelection.java
        C45PruneableClassifierTree.java
        C45Split.java
        ClassifierSplitModel.java
        ClassifierTree.java
        Distribution.java
        EntropyBasedSplitCrit.java
        EntropySplitCrit.java
        GainRatioSplitCrit.java
        InfoGainSplitCrit.java
        ModelSelection.java
        NBTreeClassifierTree.java
        NBTreeModelSelection.java
        NBTreeNoSplit.java
        NBTreeSplit.java
        NoSplit.java
        PruneableClassifierTree.java
        SplitCriterion.java
        Stats.java
        lmt
        LMTNode.java
        LogisticBase.java
        ResidualModelSelection.java
        ResidualSplit.java
        m5
        CorrelationSplitInfo.java
        Impurity.java
        M5Base.java
        PreConstructedLinearModel.java
        Rule.java
        RuleNode.java
        SplitEvaluate.java
        Values.java
        YongSplitInfo.java
        xml
        XMLClassifier.java
        clusterers
        AbstractClusterer.java
        AbstractDensityBasedClusterer (TAE's conflicted copy 2012-11-25).java
        AbstractDensityBasedClusterer.java
        CheckClusterer.java
        ClusterEvaluation.java
        Clusterer.java
        Cobweb.java
        DensityBasedClusterer.java
        EM.java
        FarthestFirst.java
        FilteredClusterer.java
        HierarchicalClusterer.java
        MakeDensityBasedClusterer.java
        NumberOfClustersRequestable.java
        RandomizableClusterer.java
        RandomizableDensityBasedClusterer.java
        RandomizableSingleClustererEnhancer.java
        SimpleKMeans.java
        SingleClustererEnhancer.java
        UpdateableClusterer.java
        core
        AbstractInstance.java
        AdditionalMeasureProducer.java
        AlgVector.java
        AllJavadoc.java
        Attribute.java
        AttributeExpression.java
        AttributeLocator.java
        AttributeStats.java
        BatchPredictor.java
        BinarySparseInstance.java
        Capabilities.java
        CapabilitiesHandler.java
        ChebyshevDistance.java
        Check.java
        CheckGOE.java
        CheckOptionHandler.java
        CheckScheme.java
        ClassCache.java
        ClassDiscovery.java
        ClassloaderUtil.java
        CommandlineRunnable.java
        ConjugateGradientOptimization.java
        ContingencyTables.java
        Copyable.java
        Copyright.java
        CustomDisplayStringProvider.java
        Debug.java
        DenseInstance.java
        DistanceFunction.java
        Drawable.java
        Environment.java
        EnvironmentHandler.java
        EuclideanDistance.java
        FastVector.java
        FindWithCapabilities.java
        GlobalInfoJavadoc.java
        Instance.java
        InstanceComparator.java
        Instances.java
        Javadoc.java
        ListOptions.java
        ManhattanDistance.java
        Matchable.java
        MathematicalExpression.java
        Matrix.java
        Memory.java
        MinkowskiDistance.java
        MultiInstanceCapabilitiesHandler.java
        NoSupportForMissingValuesException.java
        NormalizableDistance.java
        Optimization.java
        Option.java
        OptionHandler.java
        OptionHandlerJavadoc.java
        PropertyPath.java
        ProtectedProperties.java
        Queue.java
        RandomVariates.java
        Randomizable.java
        Range.java
        RelationalLocator.java
        RepositoryIndexGenerator.java
        RevisionHandler.java
        RevisionUtils.java
        SelectedTag.java
        SerializationHelper.java
        SerializedObject.java
        SingleIndex.java
        SparseInstance.java
        SpecialFunctions.java
        Statistics.java
        Stopwords.java
        StringLocator.java
        Summarizable.java
        SystemInfo.java
        Tag.java
        TechnicalInformation.java
        TechnicalInformationHandler.java
        TechnicalInformationHandlerJavadoc.java
        Tee.java
        TestInstances.java
        Trie.java
        UnassignedClassException.java
        UnassignedDatasetException.java
        Undoable.java
        UnsupportedAttributeTypeException.java
        UnsupportedClassTypeException.java
        Utils.java
        Version.java
        WeightedInstancesHandler.java
        WekaEnumeration.java
        WekaException.java
        WekaPackageManager.java
        converters
        AbstractFileLoader.java
        AbstractFileSaver.java
        AbstractLoader.java
        AbstractSaver.java
        ArffLoader.java
        ArffSaver.java
        BatchConverter.java
        C45Loader.java
        C45Saver.java
        CSVLoader.java
        CSVSaver.java
        ConverterUtils.java
        DatabaseConnection.java
        DatabaseConverter.java
        DatabaseLoader.java
        DatabaseSaver.java
        FileSourcedConverter.java
        IncrementalConverter.java
        JSONLoader.java
        JSONSaver.java
        LibSVMLoader.java
        LibSVMSaver.java
        Loader.java
        MatlabLoader.java
        MatlabSaver.java
        SVMLightLoader.java
        SVMLightSaver.java
        Saver.java
        SerializedInstancesLoader.java
        SerializedInstancesSaver.java
        TextDirectoryLoader.java
        URLSourcedLoader.java
        XRFFLoader.java
        XRFFSaver.java
        json
        JSONInstances.java
        JSONNode.java
        Parser.java
        Scanner.java
        sym.java
        logging
        ConsoleLogger.java
        FileLogger.java
        Logger.java
        OutputLogger.java
        mathematicalexpression
        Parser.java
        Scanner.java
        sym.java
        matrix
        CholeskyDecomposition.java
        DoubleVector.java
        EigenvalueDecomposition.java
        ExponentialFormat.java
        FlexibleDecimalFormat.java
        FloatingPointFormat.java
        IntVector.java
        JCublasMatrixMultiplication.java
        LUDecomposition.java
        LinearRegression.java
        Maths.java
        Matrix.java
        MulticoreMatrixMult.java
        QRDecomposition.java
        SingularValueDecomposition.java
        neighboursearch
        BallTree.java
        CoverTree.java
        KDTree.java
        LinearNNSearch.java
        NearestNeighbourSearch.java
        PerformanceStats.java
        TreePerformanceStats.java
        balltrees
        BallNode.java
        BallSplitter.java
        BallTreeConstructor.java
        BottomUpConstructor.java
        MedianDistanceFromArbitraryPoint.java
        MedianOfWidestDimension.java
        MiddleOutConstructor.java
        PointsClosestToFurthestChildren.java
        TopDownConstructor.java
        covertrees
        Stack.java
        kdtrees
        KDTreeNode.java
        KDTreeNodeSplitter.java
        KMeansInpiredMethod.java
        MedianOfWidestDimension.java
        MidPointOfWidestDimension.java
        SlidingMidPointOfWidestSide.java
        pmml
        Apply.java
        Array.java
        BuiltInArithmetic.java
        BuiltInMath.java
        BuiltInString.java
        Constant.java
        DefineFunction.java
        DerivedFieldMetaInfo.java
        Discretize.java
        Expression.java
        FieldMetaInfo.java
        FieldRef.java
        Function.java
        MappingInfo.java
        MiningFieldMetaInfo.java
        MiningSchema.java
        NormContinuous.java
        NormDiscrete.java
        PMMLFactory.java
        PMMLModel.java
        PMMLUtils.java
        SparseArray.java
        TargetMetaInfo.java
        TransformationDictionary.java
        VectorDictionary.java
        VectorInstance.java
        scripting
        Groovy.java
        Jython.java
        JythonObject.java
        JythonSerializableObject.java
        stemmers
        IteratedLovinsStemmer.java
        LovinsStemmer.java
        NullStemmer.java
        SnowballStemmer.java
        Stemmer.java
        Stemming.java
        tokenizers
        AlphabeticTokenizer.java
        CharacterDelimitedTokenizer.java
        NGramTokenizer.java
        Tokenizer.java
        WordTokenizer.java
        xml
        KOML.java
        MethodHandler.java
        PropertyHandler.java
        SerialUIDChanger.java
        XMLBasicSerialization.java
        XMLDocument.java
        XMLInstances.java
        XMLOptions.java
        XMLSerialization.java
        XMLSerializationMethodHandler.java
        XStream.java
        datagenerators
        ClassificationGenerator.java
        ClusterDefinition.java
        ClusterGenerator.java
        DataGenerator.java
        RegressionGenerator.java
        Test.java
        classifiers
        classification
        Agrawal.java
        BayesNet.java
        LED24.java
        RDG1.java
        RandomRBF.java
        regression
        Expression.java
        MexicanHat.java
        clusterers
        BIRCHCluster.java
        SubspaceCluster.java
        SubspaceClusterDefinition.java
        estimators
        CheckEstimator.java
        ConditionalEstimator.java
        DDConditionalEstimator.java
        DKConditionalEstimator.java
        DNConditionalEstimator.java
        DiscreteEstimator.java
        Estimator.java
        EstimatorUtils.java
        IncrementalEstimator.java
        KDConditionalEstimator.java
        KKConditionalEstimator.java
        KernelEstimator.java
        MahalanobisEstimator.java
        NDConditionalEstimator.java
        NNConditionalEstimator.java
        NormalEstimator.java
        PoissonEstimator.java
        UnivariateDensityEstimator.java
        UnivariateEqualFrequencyHistogramEstimator.java
        UnivariateIntervalEstimator.java
        UnivariateKernelEstimator.java
        UnivariateNormalEstimator.java
        UnivariateQuantileEstimator.java
        experiment
        AveragingResultProducer.java
        CSVResultListener.java
        ClassifierSplitEvaluator.java
        Compute.java
        CostSensitiveClassifierSplitEvaluator.java
        CrossValidationResultProducer.java
        CrossValidationSplitResultProducer.java
        DatabaseResultListener.java
        DatabaseResultProducer.java
        DatabaseUtils.java
        DensityBasedClustererSplitEvaluator.java
        Experiment.java
        ExplicitTestsetResultProducer.java
        InstanceQuery.java
        InstanceQueryAdapter.java
        InstancesResultListener.java
        LearningRateResultProducer.java
        OutputZipper.java
        PairedCorrectedTTester.java
        PairedStats.java
        PairedStatsCorrected.java
        PairedTTester.java
        PropertyNode.java
        RandomSplitResultProducer.java
        RegressionSplitEvaluator.java
        RemoteEngine.java
        RemoteExperiment.java
        RemoteExperimentEvent.java
        RemoteExperimentListener.java
        RemoteExperimentSubTask.java
        ResultListener.java
        ResultMatrix.java
        ResultMatrixCSV.java
        ResultMatrixGnuPlot.java
        ResultMatrixHTML.java
        ResultMatrixLatex.java
        ResultMatrixPlainText.java
        ResultMatrixSignificance.java
        ResultProducer.java
        SplitEvaluator.java
        Stats.java
        Task.java
        TaskStatusInfo.java
        Tester.java
        xml
        XMLExperiment.java
        filters
        AllFilter.java
        CheckSource.java
        Filter.java
        MultiFilter.java
        SimpleBatchFilter.java
        SimpleFilter.java
        SimpleStreamFilter.java
        Sourcable.java
        StreamableFilter.java
        SupervisedFilter.java
        UnsupervisedFilter.java
        supervised
        attribute
        AddClassification.java
        AttributeSelection.java
        ClassOrder.java
        Discretize.java
        NominalToBinary.java
        instance
        Resample.java
        SpreadSubsample.java
        StratifiedRemoveFolds.java
        unsupervised
        attribute
        AbstractTimeSeries.java
        Add.java
        AddCluster.java
        AddExpression.java
        AddID.java
        AddNoise.java
        AddUserFields.java
        AddUserFieldsBeanInfo.java
        AddValues.java
        Center.java
        ChangeDateFormat.java
        ClassAssigner.java
        ClusterMembership.java
        Copy.java
        Discretize.java
        FirstOrder.java
        InterquartileRange.java
        KernelFilter.java
        MakeIndicator.java
        MathExpression.java
        MergeManyValues.java
        MergeTwoValues.java
        NominalToBinary.java
        NominalToString.java
        Normalize.java
        NumericCleaner.java
        NumericToBinary.java
        NumericToNominal.java
        NumericTransform.java
        Obfuscate.java
        PKIDiscretize.java
        PartitionedMultiFilter.java
        PotentialClassIgnorer.java
        PrincipalComponents.java
        RandomProjection.java
        RandomSubset.java
        Remove.java
        RemoveByName.java
        RemoveType.java
        RemoveUseless.java
        RenameAttribute.java
        Reorder.java
        ReplaceMissingValues.java
        SortLabels.java
        Standardize.java
        StringToNominal.java
        StringToWordVector.java
        SwapValues.java
        TimeSeriesDelta.java
        TimeSeriesTranslate.java
        instance
        NonSparseToSparse.java
        Randomize.java
        RemoveFolds.java
        RemoveFrequentValues.java
        RemoveMisclassified.java
        RemovePercentage.java
        RemoveRange.java
        RemoveWithValues.java
        Resample.java
        ReservoirSample.java
        SparseToNonSparse.java
        SubsetByExpression.java
        subsetbyexpression
        Parser.java
        Scanner.java
        sym.java
        gui
        AttributeListPanel.java
        AttributeSelectionPanel.java
        AttributeSummaryPanel.java
        AttributeVisualizationPanel.java
        BrowserHelper.java
        CheckBoxList.java
        ComponentHelper.java
        ConverterFileChooser.java
        CostMatrixEditor.java
        CustomPanelSupplier.java
        DatabaseConnectionDialog.java
        DocumentPrinting.java
        ETable.java
        ExtensionFileFilter.java
        FileEditor.java
        GUIChooser.java
        GenericArrayEditor.java
        GenericObjectEditor.java
        GenericObjectEditorHistory.java
        GenericPropertiesCreator.java
        HierarchyPropertyParser.java
        InstancesSummaryPanel.java
        JListHelper.java
        JTableHelper.java
        ListSelectorDialog.java
        Loader.java
        LogPanel.java
        LogWindow.java
        Logger.java
        LookAndFeel.java
        Main.java
        MainMenuExtension.java
        MemoryUsagePanel.java
        PackageManager.java
        PropertyDialog.java
        PropertyPanel.java
        PropertySelectorDialog.java
        PropertySheetPanel.java
        PropertyText.java
        PropertyValueSelector.java
        ReaderToTextPane.java
        ResultHistoryPanel.java
        SaveBuffer.java
        SelectedTagEditor.java
        SetInstancesPanel.java
        SimpleCLI.java
        SimpleCLIPanel.java
        SimpleDateFormatEditor.java
        SortedTableModel.java
        SplashWindow.java
        SysErrLog.java
        TaskLogger.java
        ViewerDialog.java
        WekaTaskMonitor.java
        arffviewer
        ArffPanel.java
        ArffSortedTableModel.java
        ArffTable.java
        ArffTableCellRenderer.java
        ArffTableModel.java
        ArffViewer.java
        ArffViewerMainPanel.java
        beans
        AbstractDataSink.java
        AbstractDataSinkBeanInfo.java
        AbstractDataSource.java
        AbstractDataSourceBeanInfo.java
        AbstractEvaluator.java
        AbstractOffscreenChartRenderer.java
        AbstractTestSetProducer.java
        AbstractTestSetProducerBeanInfo.java
        AbstractTrainAndTestSetProducer.java
        AbstractTrainAndTestSetProducerBeanInfo.java
        AbstractTrainingSetProducer.java
        AbstractTrainingSetProducerBeanInfo.java
        Appender.java
        AppenderBeanInfo.java
        Associator.java
        AssociatorBeanInfo.java
        AssociatorCustomizer.java
        AttributeSummarizer.java
        AttributeSummarizerBeanInfo.java
        AttributeSummarizerCustomizer.java
        BatchAssociationRulesEvent.java
        BatchAssociationRulesListener.java
        BatchClassifierEvent.java
        BatchClassifierListener.java
        BatchClustererEvent.java
        BatchClustererListener.java
        BeanCommon.java
        BeanConnection.java
        BeanCustomizer.java
        BeanInstance.java
        BeanVisual.java
        ChartEvent.java
        ChartListener.java
        ClassAssigner.java
        ClassAssignerBeanInfo.java
        ClassAssignerCustomizer.java
        ClassValuePicker.java
        ClassValuePickerBeanInfo.java
        ClassValuePickerCustomizer.java
        Classifier.java
        ClassifierBeanInfo.java
        ClassifierCustomizer.java
        ClassifierPerformanceEvaluator.java
        ClassifierPerformanceEvaluatorBeanInfo.java
        ClassifierPerformanceEvaluatorCustomizer.java
        Clusterer.java
        ClustererBeanInfo.java
        ClustererCustomizer.java
        ClustererPerformanceEvaluator.java
        ClustererPerformanceEvaluatorBeanInfo.java
        ConfigurationEvent.java
        ConfigurationListener.java
        ConfigurationProducer.java
        ConnectionNotificationConsumer.java
        CostBenefitAnalysis.java
        CostBenefitAnalysisBeanInfo.java
        CrossValidationFoldMaker.java
        CrossValidationFoldMakerBeanInfo.java
        CrossValidationFoldMakerCustomizer.java
        CustomizerCloseRequester.java
        CustomizerClosingListener.java
        DataFormatListener.java
        DataSetEvent.java
        DataSink.java
        DataSource.java
        DataSourceListener.java
        DataVisualizer.java
        DataVisualizerBeanInfo.java
        DataVisualizerCustomizer.java
        EnvironmentField.java
        EventConstraints.java
        FileEnvironmentField.java
        Filter.java
        FilterBeanInfo.java
        FilterCustomizer.java
        FlowRunner.java
        GOECustomizer.java
        GraphEvent.java
        GraphListener.java
        GraphViewer.java
        GraphViewerBeanInfo.java
        HeadlessEventCollector.java
        ImageEvent.java
        ImageListener.java
        ImageSaver.java
        ImageSaverBeanInfo.java
        ImageSaverCustomizer.java
        IncrementalClassifierEvaluator.java
        IncrementalClassifierEvaluatorBeanInfo.java
        IncrementalClassifierEvaluatorCustomizer.java
        IncrementalClassifierEvent.java
        IncrementalClassifierListener.java
        InstanceEvent.java
        InstanceListener.java
        InstanceStreamToBatchMaker.java
        InstanceStreamToBatchMakerBeanInfo.java
        KFStep.java
        KnowledgeFlow.java
        KnowledgeFlowApp.java
        Loader.java
        LoaderBeanInfo.java
        LoaderCustomizer.java
        LogPanel.java
        LogWriter.java
        MetaBean.java
        ModelPerformanceChart.java
        ModelPerformanceChartBeanInfo.java
        ModelPerformanceChartCustomizer.java
        Note.java
        NoteBeanInfo.java
        NoteCustomizer.java
        OffscreenChartRenderer.java
        PluginManager.java
        PredictionAppender.java
        PredictionAppenderBeanInfo.java
        PredictionAppenderCustomizer.java
        SQLViewerPerspective.java
        Saver.java
        SaverBeanInfo.java
        SaverCustomizer.java
        ScatterPlotMatrix.java
        ScatterPlotMatrixBeanInfo.java
        SerializedModelSaver.java
        SerializedModelSaverBeanInfo.java
        SerializedModelSaverCustomizer.java
        ShadowBorder.java
        Sorter.java
        SorterBeanInfo.java
        SorterCustomizer.java
        StartUpListener.java
        Startable.java
        StripChart.java
        StripChartBeanInfo.java
        StripChartCustomizer.java
        StructureProducer.java
        SubstringLabeler.java
        SubstringLabelerBeanInfo.java
        SubstringLabelerCustomizer.java
        SubstringReplacer.java
        SubstringReplacerBeanInfo.java
        SubstringReplacerCustomizer.java
        TestSetEvent.java
        TestSetListener.java
        TestSetMaker.java
        TestSetMakerBeanInfo.java
        TestSetProducer.java
        TextEvent.java
        TextListener.java
        TextViewer.java
        TextViewerBeanInfo.java
        ThresholdDataEvent.java
        ThresholdDataListener.java
        TrainTestSplitMaker.java
        TrainTestSplitMakerBeanInfo.java
        TrainTestSplitMakerCustomizer.java
        TrainingSetEvent.java
        TrainingSetListener.java
        TrainingSetMaker.java
        TrainingSetMakerBeanInfo.java
        TrainingSetProducer.java
        UserRequestAcceptor.java
        Visible.java
        VisualizableErrorEvent.java
        VisualizableErrorListener.java
        WekaOffscreenChartRenderer.java
        WekaWrapper.java
        xml
        XMLBeans.java
        boundaryvisualizer
        BoundaryPanel.java
        BoundaryPanelDistributed.java
        BoundaryVisualizer.java
        DataGenerator.java
        KDDataGenerator.java
        RemoteBoundaryVisualizerSubTask.java
        RemoteResult.java
        experiment
        AlgorithmListPanel.java
        DatasetListPanel.java
        DistributeExperimentPanel.java
        Experimenter.java
        ExperimenterDefaults.java
        GeneratorPropertyIteratorPanel.java
        HostListPanel.java
        OutputFormatDialog.java
        ResultsPanel.java
        RunNumberPanel.java
        RunPanel.java
        SetupModePanel.java
        SetupPanel.java
        SimpleSetupPanel.java
        explorer
        AbstractPlotInstances.java
        AssociationsPanel.java
        AttributeSelectionPanel.java
        ClassifierErrorsPlotInstances.java
        ClassifierPanel.java
        ClassifierPanelLaunchHandlerPlugin.java
        ClustererAssignmentsPlotInstances.java
        ClustererPanel.java
        ClustererPanelLaunchHandlerPlugin.java
        DataGeneratorPanel.java
        Explorer.java
        ExplorerDefaults.java
        PreprocessPanel.java
        VisualizePanel.java
        filters
        AddUserFieldsCustomizer.java
        graphvisualizer
        BIFFormatException.java
        BIFParser.java
        DotParser.java
        GraphConstants.java
        GraphEdge.java
        GraphNode.java
        GraphVisualizer.java
        HierarchicalBCEngine.java
        LayoutCompleteEvent.java
        LayoutCompleteEventListener.java
        LayoutEngine.java
        hierarchyvisualizer
        HierarchyVisualizer.java
        scripting
        FileScriptingPanel.java
        GroovyPanel.java
        GroovyScript.java
        JythonPanel.java
        JythonScript.java
        Script.java
        ScriptUtils.java
        ScriptingPanel.java
        SyntaxDocument.java
        event
        ScriptExecutionEvent.java
        ScriptExecutionListener.java
        TitleUpdatedEvent.java
        TitleUpdatedListener.java
        sql
        ConnectionPanel.java
        DbUtils.java
        InfoPanel.java
        InfoPanelCellRenderer.java
        QueryPanel.java
        ResultPanel.java
        ResultSetHelper.java
        ResultSetTable.java
        ResultSetTableCellRenderer.java
        ResultSetTableModel.java
        SqlViewer.java
        SqlViewerDialog.java
        event
        ConnectionEvent.java
        ConnectionListener.java
        HistoryChangedEvent.java
        HistoryChangedListener.java
        QueryExecuteEvent.java
        QueryExecuteListener.java
        ResultChangedEvent.java
        ResultChangedListener.java
        streams
        InstanceCounter.java
        InstanceEvent.java
        InstanceJoiner.java
        InstanceListener.java
        InstanceLoader.java
        InstanceProducer.java
        InstanceSavePanel.java
        InstanceTable.java
        InstanceViewer.java
        SerialInstanceListener.java
        treevisualizer
        Colors.java
        Edge.java
        NamedColor.java
        Node.java
        NodePlace.java
        PlaceNode1.java
        PlaceNode2.java
        TreeBuild.java
        TreeDisplayEvent.java
        TreeDisplayListener.java
        TreeVisualizer.java
        visualize
        AttributePanel.java
        AttributePanelEvent.java
        AttributePanelListener.java
        BMPWriter.java
        ClassPanel.java
        InstanceInfo.java
        InstanceInfoFrame.java
        JComponentWriter.java
        JPEGWriter.java
        LegendPanel.java
        MatrixPanel.java
        PNGWriter.java
        Plot2D.java
        Plot2DCompanion.java
        PlotData2D.java
        PostscriptGraphics.java
        PostscriptWriter.java
        PrintableComponent.java
        PrintableHandler.java
        PrintablePanel.java
        ThresholdVisualizePanel.java
        VisualizePanel.java
        VisualizePanelEvent.java
        VisualizePanelListener.java
        VisualizeUtils.java
        plugins
        AssociationRuleVisualizePlugin.java
        ErrorVisualizePlugin.java
        GraphVisualizePlugin.java
        TreeVisualizePlugin.java
        VisualizePlugin.java
    - test
      - java
        weka
        AllTests.java
        associations
        AbstractAssociatorTest.java
        AllTests.java
        AprioriTest.java
        FPGrowthTest.java
        FilteredAssociatorTest.java
        attributeSelection
        AbstractAttributeSelectionTest.java
        AbstractEvaluatorTest.java
        AbstractSearchTest.java
        AllTests.java
        BestFirstTest.java
        CfsSubsetEvalTest.java
        CorrelationAttributeEvalTest.java
        GainRatioAttributeEvalTest.java
        GreedyStepwiseTest.java
        InfoGainAttributeEvalTest.java
        OneRAttributeEvalTest.java
        PrincipalComponentsTest.java
        RankerTest.java
        ReliefFAttributeEvalTest.java
        SymmetricalUncertAttributeEvalTest.java
        WrapperSubsetEvalTest.java
        classifiers
        AbstractClassifierTest.java
        AllTests.java
        bayes
        BayesNetTest.java
        NaiveBayesMultinomialTest.java
        NaiveBayesMultinomialTextTest.java
        NaiveBayesMultinomialUpdateableTest.java
        NaiveBayesTest.java
        NaiveBayesUpdateableTest.java
        functions
        GaussianProcessesTest.java
        LinearRegressionTest.java
        LogisticTest.java
        MultilayerPerceptronTest.java
        SGDTest.java
        SGDTextTest.java
        SMOTest.java
        SMOregTest.java
        SimpleLinearRegressionTest.java
        SimpleLogisticTest.java
        VotedPerceptronTest.java
        supportVector
        AbstractKernelTest.java
        AllTests.java
        NormalizedPolyKernelTest.java
        PolyKernelTest.java
        PrecomputedKernelMatrixKernelTest.java
        PukTest.java
        RBFKernelTest.java
        StringKernelTest.java
        lazy
        IBkTest.java
        KStarTest.java
        LWLTest.java
        meta
        AdaBoostM1Test.java
        AdditiveRegressionTest.java
        AttributeSelectedClassifierTest.java
        BaggingTest.java
        CVParameterSelectionTest.java
        ClassificationViaRegressionTest.java
        CostSensitiveClassifierTest.java
        FilteredClassifierTest.java
        LogitBoostTest.java
        MultiClassClassifierTest.java
        MultiClassClassifierUpdateableTest.java
        MultiSchemeTest.java
        RandomCommitteeTest.java
        RandomSubSpaceTest.java
        RegressionByDiscretizationTest.java
        StackingTest.java
        ThresholdSelectorDummyClassifier.java
        VoteTest.java
        misc
        InputMappedClassifierTest.java
        SerializedClassifierTest.java
        pmml
        consumer
        AbstractPMMLClassifierTest.java
        AllTests.java
        GeneralRegressionTest.java
        NeuralNetworkTest.java
        RegressionTest.java
        RuleSetModelTest.java
        SupportVectorMachineModelTest.java
        TreeModelTest.java
        rules
        DecisionTableTest.java
        JRipTest.java
        M5RulesTest.java
        OneRTest.java
        PARTTest.java
        ZeroRTest.java
        trees
        DecisionStumpTest.java
        J48Test.java
        LMTTest.java
        M5PTest.java
        REPTreeTest.java
        RandomForestTest.java
        RandomTreeTest.java
        clusterers
        AbstractClustererTest.java
        AllTests.java
        CobwebTest.java
        EMTest.java
        FarthestFirstTest.java
        FilteredClustererTest.java
        HierarchicalClustererTest.java
        MakeDensityBasedClustererTest.java
        SimpleKMeansTest.java
        core
        AlgVectorTest.java
        AllTests.java
        InstancesTest.java
        MathematicalExpressionTest.java
        OptionHandlersTest.java
        RangeTest.java
        SingleIndexTest.java
        TrieTest.java
        UtilsTest.java
        converters
        AbstractConverterTest.java
        AbstractFileConverterTest.java
        AllTests.java
        ArffTest.java
        C45Test.java
        CSVTest.java
        JSONTest.java
        LibSVMTest.java
        MatlabTest.java
        SVMLightTest.java
        SerializedInstancesTest.java
        XRFFTest.java
        neighboursearch
        AbstractNearestNeighbourSearchTest.java
        AllTests.java
        BallTreeTest.java
        CoverTreeTest.java
        KDTreeTest.java
        LinearNNSearchTest.java
        tokenizers
        AbstractTokenizerTest.java
        AllTests.java
        AlphabeticTokenizerTest.java
        NGramTokenizerTest.java
        WordTokenizerTest.java
        datagenerators
        AbstractClusterDefinitionTest.java
        AbstractDataGeneratorTest.java
        AllTests.java
        classifiers
        classification
        AgrawalTest.java
        BayesNetTest.java
        LED24Test.java
        RDG1Test.java
        RandomRBFTest.java
        regression
        ExpressionTest.java
        MexicanHatTest.java
        clusterers
        BIRCHClusterTest.java
        SubspaceClusterDefinitionTest.java
        SubspaceClusterTest.java
        filters
        AbstractFilterTest.java
        AbstractTimeSeriesFilterTest.java
        AllFilterTest.java
        AllTests.java
        MultiFilterTest.java
        supervised
        attribute
        AddClassificationTest.java
        AttributeSelectionTest.java
        ClassOrderTest.java
        DiscretizeTest.java
        NominalToBinaryTest.java
        instance
        ResampleTest.java
        SpreadSubsampleTest.java
        StratifiedRemoveFoldsTest.java
        unsupervised
        attribute
        AddClusterTest.java
        AddExpressionTest.java
        AddIDTest.java
        AddNoiseTest.java
        AddTest.java
        AddValuesTest.java
        CenterTest.java
        ChangeDateFormatTest.java
        ClassAssignerTest.java
        ClusterMembershipTest.java
        CopyTest.java
        DiscretizeTest.java
        FirstOrderTest.java
        InterquartileRangeTest.java
        KernelFilterTest.java
        MakeIndicatorTest.java
        MathExpressionTest.java
        MergeManyValuesTest.java
        MergeTwoValuesTest.java
        NominalToBinaryTest.java
        NominalToStringTest.java
        NormalizeTest.java
        NumericCleanerTest.java
        NumericToBinaryTest.java
        NumericToNominalTest.java
        NumericTransformTest.java
        ObfuscateTest.java
        PKIDiscretizeTest.java
        PartitionedMultiFilterTest.java
        PrincipalComponentsTest.java
        RandomProjectionTest.java
        RandomSubsetTest.java
        RemoveByNameTest.java
        RemoveTest.java
        RemoveTypeTest.java
        RemoveUselessTest.java
        RenameAttributeTest.java
        ReorderTest.java
        ReplaceMissingValuesTest.java
        SortLabelsTest.java
        StandardizeTest.java
        StringToNominalTest.java
        StringToWordVectorTest.java
        SwapValuesTest.java
        TimeSeriesDeltaTest.java
        TimeSeriesTranslateTest.java
        instance
        NonSparseToSparseTest.java
        RandomizeTest.java
        RemoveFoldsTest.java
        RemoveFrequentValuesTest.java
        RemoveMisclassifiedTest.java
        RemovePercentageTest.java
        RemoveRangeTest.java
        RemoveWithValuesTest.java
        ResampleTest.java
        ReservoirSampleTest.java
        SparseToNonSparseTest.java
        SubsetByExpressionTest.java
        test
        Regression.java
        WekaTestSuite.java

/*
 *   This program is free software: you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
 *   the Free Software Foundation, either version 3 of the License, or
 *   (at your option) any later version.
 *
 *   This program is distributed in the hope that it will be useful,
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *   GNU General Public License for more details.
 *
 *   You should have received a copy of the GNU General Public License
 *   along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */

/*
 *    SGDText.java
 *    Copyright (C) 2012 University of Waikato, Hamilton, New Zealand
 *
 */

package weka.classifiers.functions;

import java.io.File;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.Enumeration;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.Map;
import java.util.Random;
import java.util.Vector;

import weka.classifiers.RandomizableClassifier;
import weka.classifiers.UpdateableClassifier;
import weka.core.Attribute;
import weka.core.Capabilities;
import weka.core.Capabilities.Capability;
import weka.core.DenseInstance;
import weka.core.FastVector;
import weka.core.Instance;
import weka.core.Instances;
import weka.core.Option;
import weka.core.OptionHandler;
import weka.core.RevisionUtils;
import weka.core.SelectedTag;
import weka.core.Stopwords;
import weka.core.Tag;
import weka.core.Utils;
import weka.core.WeightedInstancesHandler;
import weka.core.stemmers.NullStemmer;
import weka.core.stemmers.Stemmer;
import weka.core.tokenizers.Tokenizer;
import weka.core.tokenizers.WordTokenizer;

/**
 <!-- globalinfo-start -->
 * Implements stochastic gradient descent for learning a linear binary class SVM or binary class logistic regression on text data. Operates directly (and only) on String attributes. Other types of input attributes are accepted but ignored during training and classification.
 * <p/>
 <!-- globalinfo-end -->
 *
 <!-- options-start -->
 * Valid options are: <p/>
 * 
 * <pre> -F
 *  Set the loss function to minimize. 0 = hinge loss (SVM), 1 = log loss (logistic regression)
 *  (default = 0)</pre>
 * 
 * <pre> -outputProbs
 *  Output probabilities for SVMs (fits a logsitic
 *  model to the output of the SVM)</pre>
 * 
 * <pre> -L
 *  The learning rate (default = 0.01).</pre>
 * 
 * <pre> -R <double>
 *  The lambda regularization constant (default = 0.0001)</pre>
 * 
 * <pre> -E <integer>
 *  The number of epochs to perform (batch learning only, default = 500)</pre>
 * 
 * <pre> -W
 *  Use word frequencies instead of binary bag of words.</pre>
 * 
 * <pre> -P <# instances>
 *  How often to prune the dictionary of low frequency words (default = 0, i.e. don't prune)</pre>
 * 
 * <pre> -M <double>
 *  Minimum word frequency. Words with less than this frequence are ignored.
 *  If periodic pruning is turned on then this is also used to determine which
 *  words to remove from the dictionary (default = 3).</pre>
 * 
 * <pre> -normalize
 *  Normalize document length (use in conjunction with -norm and -lnorm</pre>
 * 
 * <pre> -norm <num>
 *  Specify the norm that each instance must have (default 1.0)</pre>
 * 
 * <pre> -lnorm <num>
 *  Specify L-norm to use (default 2.0)</pre>
 * 
 * <pre> -lowercase
 *  Convert all tokens to lowercase before adding to the dictionary.</pre>
 * 
 * <pre> -stoplist
 *  Ignore words that are in the stoplist.</pre>
 * 
 * <pre> -stopwords <file>
 *  A file containing stopwords to override the default ones.
 *  Using this option automatically sets the flag ('-stoplist') to use the
 *  stoplist if the file exists.
 *  Format: one stopword per line, lines starting with '#'
 *  are interpreted as comments and ignored.</pre>
 * 
 * <pre> -tokenizer <spec>
 *  The tokenizing algorihtm (classname plus parameters) to use.
 *  (default: weka.core.tokenizers.WordTokenizer)</pre>
 * 
 * <pre> -stemmer <spec>
 *  The stemmering algorihtm (classname plus parameters) to use.</pre>
 * 
 <!-- options-end -->
 *
 * @author Mark Hall (mhall{[at]}pentaho{[dot]}com)
 * @author Eibe Frank (eibe{[at]}cs{[dot]}waikato{[dot]}ac{[dot]}nz)
 *
 */
public class SGDText extends RandomizableClassifier 
  implements UpdateableClassifier, WeightedInstancesHandler {
  
  /** For serialization */
  private static final long serialVersionUID = 7200171484002029584L;

  private static class Count implements Serializable {
    
    /**
     * For serialization
     */
    private static final long serialVersionUID = 2104201532017340967L;
    
    public double m_count;
    
    public double m_weight;
    
    public Count(double c) {
      m_count = c;
    }    
  }
  
  /**
   * The number of training instances at which to periodically prune the dictionary
   * of min frequency words. Empty or null string indicates don't prune
   */
  protected int m_periodicP = 0;
  
  /** Only consider dictionary words (features) that occur at least this many times */
  protected double m_minWordP = 3;
  
  /** Use word frequencies rather than bag-of-words if true */
  protected boolean m_wordFrequencies = false;
  
  /** Whether to normalized document length or not */
  protected boolean m_normalize = false;
  
  /** The length that each document vector should have in the end */
  protected double m_norm = 1.0;
  
  /** The L-norm to use */
  protected double m_lnorm = 2.0;
  
  /** The dictionary (and term weights) */
  protected LinkedHashMap<String, Count> m_dictionary;
  
  /** Default (rainbow) stopwords */
  protected transient Stopwords m_stopwords;
  
  /** 
   * a file containing stopwords for using others than the default Rainbow 
   * ones.
   */
  protected File m_stopwordsFile = new File(System.getProperty("user.dir"));
  
  /** The tokenizer to use */
  protected Tokenizer m_tokenizer = new WordTokenizer();
  
  /** Whether or not to convert all tokens to lowercase */
  protected boolean m_lowercaseTokens;
  
  /** The stemming algorithm. */
  protected Stemmer m_stemmer = new NullStemmer();
  
  /** Whether or not to use a stop list */
  protected boolean m_useStopList; 
  
  /** The regularization parameter */
  protected double m_lambda = 0.0001;
  
  /** The learning rate */
  protected double m_learningRate = 0.01;
  
  /** Holds the current iteration number */
  protected double m_t;
  
  /** Holds the bias term */
  protected double m_bias;
  
  /** The number of training instances */
  protected double m_numInstances;
  
  /** The header of the training data */
  protected Instances m_data;
  
  /**
   *  The number of epochs to perform (batch learning). Total iterations is
   *  m_epochs * num instances 
   */
  protected int m_epochs = 500;
  
  /** 
   * Holds the current document vector (LinkedHashMap is more efficient
   * when iterating over EntrySet than HashMap) 
   */
  protected transient LinkedHashMap<String, Count> m_inputVector;
  
  /** the hinge loss function. */
  public static final int HINGE = 0;
  
  /** the log loss function. */
  public static final int LOGLOSS = 1;
  
  /** The current loss function to minimize */
  protected int m_loss = HINGE;
  
  /** Loss functions to choose from */
  public static final Tag [] TAGS_SELECTION = {
    new Tag(HINGE, "Hinge loss (SVM)"),
    new Tag(LOGLOSS, "Log loss (logistic regression)")    
  };
  
  /** Used for producing probabilities for SVM via SGD logistic regression */
  protected SGD m_svmProbs;
  
  /** 
   * True if a logistic regression is to be fit to the output of the SVM for
   * producing probability estimates
   */
  protected boolean m_fitLogistic = false;
  protected Instances m_fitLogisticStructure;
  
  protected double dloss(double z) {
    if (m_loss == HINGE) {
      return (z < 1) ? 1 : 0;
    } else {
      // log loss
      if (z < 0) {
        return 1.0 / (Math.exp(z) + 1.0);  
      } else {
        double t = Math.exp(-z);
        return t / (t + 1);
      }
    }    
  }
    
  /**
   * Returns default capabilities of the classifier.
   *
   * @return      the capabilities of this classifier
   */
  public Capabilities getCapabilities() {
    Capabilities result = super.getCapabilities();
    result.disableAll();
    
    //attributes    
    result.enable(Capability.STRING_ATTRIBUTES);
    result.enable(Capability.NOMINAL_ATTRIBUTES);
    result.enable(Capability.DATE_ATTRIBUTES);
    result.enable(Capability.NUMERIC_ATTRIBUTES);
    result.enable(Capability.MISSING_VALUES);
    
    result.enable(Capability.BINARY_CLASS);
    result.enable(Capability.MISSING_CLASS_VALUES);
    
    // instances
    result.setMinimumNumberInstances(0);
    
    return result;
  }
  
  /**
   * the stemming algorithm to use, null means no stemming at all (i.e., the
   * NullStemmer is used).
   *
   * @param value     the configured stemming algorithm, or null
   * @see             NullStemmer
   */
  public void setStemmer(Stemmer value) {
    if (value != null)
      m_stemmer = value;
    else
      m_stemmer = new NullStemmer();
  }

  /**
   * Returns the current stemming algorithm, null if none is used.
   *
   * @return          the current stemming algorithm, null if none set
   */
  public Stemmer getStemmer() {
    return m_stemmer;
  }

  /**
   * Returns the tip text for this property.
   *
   * @return tip text for this property suitable for
   * displaying in the explorer/experimenter gui
   */
  public String stemmerTipText() {
    return "The stemming algorithm to use on the words.";
  }
  
  /**
   * the tokenizer algorithm to use.
   *
   * @param value     the configured tokenizing algorithm
   */
  public void setTokenizer(Tokenizer value) {
    m_tokenizer = value;
  }

  /**
   * Returns the current tokenizer algorithm.
   *
   * @return          the current tokenizer algorithm
   */
  public Tokenizer getTokenizer() {
    return m_tokenizer;
  }

  /**
   * Returns the tip text for this property.
   *
   * @return tip text for this property suitable for
   * displaying in the explorer/experimenter gui
   */
  public String tokenizerTipText() {
    return "The tokenizing algorithm to use on the strings.";
  }
  
  /**
   * Returns the tip text for this property
   * 
   * @return tip text for this property suitable for
   * displaying in the explorer/experimenter gui
   */
  public String useWordFrequenciesTipText() {
    return "Use word frequencies rather than binary "
      + "bag of words representation";
  }
  
  /**
   * Set whether to use word frequencies rather than binary
   * bag of words representation.
   * 
   * @param u true if word frequencies are to be used.
   */
  public void setUseWordFrequencies(boolean u) {
    m_wordFrequencies = u;
  }
  
  /**
   * Get whether to use word frequencies rather than binary
   * bag of words representation.
   * 
   * @param u true if word frequencies are to be used.
   */
  public boolean getUseWordFrequencies() {
    return m_wordFrequencies;
  }
  
  /**
   * Returns the tip text for this property
   * 
   * @return tip text for this property suitable for
   * displaying in the explorer/experimenter gui
   */
  public String lowercaseTokensTipText() {
    return "Whether to convert all tokens to lowercase";
  }
  
  /**
   * Set whether to convert all tokens to lowercase
   * 
   * @param l true if all tokens are to be converted to
   * lowercase
   */
  public void setLowercaseTokens(boolean l) {
    m_lowercaseTokens = l;
  }
  
  /**
   * Get whether to convert all tokens to lowercase
   * 
   * @return true true if all tokens are to be converted to
   * lowercase
   */
  public boolean getLowercaseTokens() {
    return m_lowercaseTokens;
  }
  
  /**
   * Returns the tip text for this property
   * 
   * @return tip text for this property suitable for
   * displaying in the explorer/experimenter gui
   */
  public String useStopListTipText() {
    return "If true, ignores all words that are on the stoplist.";
  }
  
  /**
   * Set whether to ignore all words that are on the stoplist. 
   * 
   * @param u true to ignore all words on the stoplist.
   */
  public void setUseStopList(boolean u) {
    m_useStopList = u;
  }
  
  /**
   * Get whether to ignore all words that are on the stoplist. 
   * 
   * @return true to ignore all words on the stoplist.
   */
  public boolean getUseStopList() {
    return m_useStopList;
  }
  
  /**
   * sets the file containing the stopwords, null or a directory unset the
   * stopwords. If the file exists, it automatically turns on the flag to
   * use the stoplist.
   *
   * @param value     the file containing the stopwords
   */
  public void setStopwords(File value) {
    if (value == null)
      value = new File(System.getProperty("user.dir"));

    m_stopwordsFile = value;
    if (value.exists() && value.isFile())
      setUseStopList(true);
  }

  /**
   * returns the file used for obtaining the stopwords, if the file represents
   * a directory then the default ones are used.
   *
   * @return          the file containing the stopwords
   */
  public File getStopwords() {
    return m_stopwordsFile;
  }

  /**
   * Returns the tip text for this property.
   *
   * @return tip text for this property suitable for
   * displaying in the explorer/experimenter gui
   */
  public String stopwordsTipText() {
    return "The file containing the stopwords (if this is a directory then the default ones are used).";
  }
  
  /**
   * Returns the tip text for this property
   * 
   * @return tip text for this property suitable for
   * displaying in the explorer/experimenter gui
   */
  public String periodicPruningTipText() {
    return "How often (number of instances) to prune " +
                "the dictionary of low frequency terms. " +
                "0 means don't prune. Setting a positive " +
                "integer n means prune after every n instances";
  }
  
  /**
   * Set how often to prune the dictionary
   * 
   * @param p how often to prune
   */
  public void setPeriodicPruning(int p) {
    m_periodicP = p;
  }
  
  /**
   * Get how often to prune the dictionary
   * 
   * @return how often to prune the dictionary
   */
  public int getPeriodicPruning() {
    return m_periodicP;
  }
  
  /**
   * Returns the tip text for this property
   * 
   * @return tip text for this property suitable for
   * displaying in the explorer/experimenter gui
   */
  public String minWordFrequencyTipText() {
    return "Ignore any words that don't occur at least "
      + "min frequency times in the training data. If periodic "
      + "pruning is turned on, then the dictionary is pruned " 
      + "according to this value";
      	
  }
  
  /**
   * Set the minimum word frequency. Words that don't occur
   * at least min freq times are ignored when updating weights. 
   * If periodic pruning is turned on, then min frequency is used
   * when removing words from the dictionary.
   * 
   * @param minFreq the minimum word frequency to use
   */
  public void setMinWordFrequency(double minFreq) {
    m_minWordP = minFreq;
  }
  
  /**
   * Get the minimum word frequency. Words that don't occur
   * at least min freq times are ignored when updating weights. 
   * If periodic pruning is turned on, then min frequency is used
   * when removing words from the dictionary.
   * 
   * @param return the minimum word frequency to use
   */
  public double getMinWordFrequency() {
    return m_minWordP;
  }  
  
  /**
   * Returns the tip text for this property
   *
   * @return tip text for this property suitable for
   * displaying in the explorer/experimenter gui
   */
  public String normalizeDocLengthTipText() {
    return "If true then document length is normalized according " +
    		"to the settings for norm and lnorm";
  }
  
  /**
   * Set whether to normalize the length of each document
   * 
   * @param norm true if document lengths is to be normalized
   */
  public void setNormalizeDocLength(boolean norm) {
    m_normalize = norm;
  }
  
  /**
   * Get whether to normalize the length of each document
   * 
   * @return true if document lengths is to be normalized
   */
  public boolean getNormalizeDocLength() {
    return m_normalize;
  }
  
  /**
   * Returns the tip text for this property
   *
   * @return tip text for this property suitable for
   * displaying in the explorer/experimenter gui
   */
  public String normTipText() { 
    return "The norm of the instances after normalization.";
  }
  
  /**
   * Get the instance's Norm.
   *
   * @return the Norm
   */
  public double getNorm() {
    return m_norm;
  }
  
  /**
   * Set the norm of the instances
   *
   * @param newNorm the norm to wich the instances must be set
   */
  public void setNorm(double newNorm) {
    m_norm = newNorm;
  }
  
  /**
   * Returns the tip text for this property
   *
   * @return tip text for this property suitable for
   * displaying in the explorer/experimenter gui
   */
  public String LNormTipText() { 
    return "The LNorm to use for document length normalization.";
  }
  
  /**
   * Get the L Norm used.
   *
   * @return the L-norm used
   */
  public double getLNorm() {
    return m_lnorm;
  }
  
  /**
   * Set the L-norm to used
   *
   * @param newLNorm the L-norm
   */
  public void setLNorm(double newLNorm) {
    m_lnorm = newLNorm;
  }
  
  /**
   * Returns the tip text for this property
   * 
   * @return tip text for this property suitable for
   * displaying in the explorer/experimenter gui
   */
  public String lambdaTipText() {
    return "The regularization constant. (default = 0.0001)";
  }
  
  /**
   * Set the value of lambda to use
   * 
   * @param lambda the value of lambda to use
   */
  public void setLambda(double lambda) {
    m_lambda = lambda;
  }
  
  /**
   * Get the current value of lambda
   * 
   * @return the current value of lambda
   */
  public double getLambda() {
    return m_lambda;
  }
  
  /**
   * Set the learning rate.
   * 
   * @param lr the learning rate to use.
   */
  public void setLearningRate(double lr) {
    m_learningRate = lr;
  }
  
  /**
   * Get the learning rate.
   * 
   * @return the learning rate
   */
  public double getLearningRate() {
    return m_learningRate;
  }
  
  /**
   * Returns the tip text for this property
   * 
   * @return tip text for this property suitable for
   * displaying in the explorer/experimenter gui
   */
  public String learningRateTipText() {
    return "The learning rate.";
  }
  
  /**
   * Returns the tip text for this property
   * 
   * @return tip text for this property suitable for
   * displaying in the explorer/experimenter gui
   */
  public String epochsTipText() {
    return "The number of epochs to perform (batch learning). " +
                "The total number of iterations is epochs * num" +
                " instances.";
  }
  
  /**
   * Set the number of epochs to use
   * 
   * @param e the number of epochs to use
   */
  public void setEpochs(int e) {
    m_epochs = e;
  }
  
  /**
   * Get current number of epochs
   * 
   * @return the current number of epochs
   */
  public int getEpochs() {
    return m_epochs;
  }
  
  /**
   * Set the loss function to use.
   * 
   * @param function the loss function to use.
   */
  public void setLossFunction(SelectedTag function) {
    if (function.getTags() == TAGS_SELECTION) {
      m_loss = function.getSelectedTag().getID();
    }
  }
  
  /**
   * Get the current loss function.
   * 
   * @return the current loss function.
   */
  public SelectedTag getLossFunction() {
    return new SelectedTag(m_loss, TAGS_SELECTION);
  }
  
  /**
   * Returns the tip text for this property
   * 
   * @return tip text for this property suitable for
   * displaying in the explorer/experimenter gui
   */
  public String lossFunctionTipText() {
    return "The loss function to use. Hinge loss (SVM), " +
                "log loss (logistic regression) or " +
                "squared loss (regression).";
  }
  
  /**
   * Set whether to fit a logistic regression (itself trained
   * using SGD) to the outputs of the SVM (if an SVM is being 
   * learned).
   * 
   * @param o true if a logistic regression is to be fit to the 
   * output of the SVM to produce probability estimates.
   */
  public void setOutputProbsForSVM(boolean o) {
    m_fitLogistic = o;
  }
  
  /**
   * Get whether to fit a logistic regression (itself trained
   * using SGD) to the outputs of the SVM (if an SVM is being 
   * learned).
   * 
   * @return true if a logistic regression is to be fit to the 
   * output of the SVM to produce probability estimates.
   */
  public boolean getOutputProbsForSVM() {
    return m_fitLogistic;
  }
  
  /**
   * Returns the tip text for this property
   * 
   * @return tip text for this property suitable for
   * displaying in the explorer/experimenter gui
   */
  public String outputProbsForSVMTipText() {
    return "Fit a logistic regression to the output of SVM for " +
    		"producing probability estimates";
  }
  
  /**
   * Returns an enumeration describing the available options.
   *
   * @return an enumeration of all the available options.
   */
  public Enumeration<Option> listOptions() {

    Vector<Option> newVector = new Vector<Option>();
    newVector.add(new Option("\tSet the loss function to minimize. 0 = " +
        "hinge loss (SVM), 1 = log loss (logistic regression)\n\t" +
        "(default = 0)", "F", 1, "-F"));
    newVector.add(new Option("\tOutput probabilities for SVMs (fits a logsitic\n\t" +
    		"model to the output of the SVM)", "output-probs", 
        0, "-outputProbs"));
    newVector.add(new Option("\tThe learning rate (default = 0.01).", "L", 1, "-L"));
    newVector.add(new Option("\tThe lambda regularization constant " +
                "(default = 0.0001)",
                "R", 1, "-R <double>"));
    newVector.add(new Option("\tThe number of epochs to perform (" +
                "batch learning only, default = 500)", "E", 1,
                "-E <integer>"));
    newVector.add(new Option("\tUse word frequencies instead of " +
    		"binary bag of words.", "W", 0, 
    		"-W"));
    newVector.add(new Option("\tHow often to prune the dictionary " +
    		"of low frequency words (default = 0, i.e. don't prune)", 
    		"P", 1, "-P <# instances>"));
    newVector.add(new Option("\tMinimum word frequency. Words with less " +
    		"than this frequence are ignored.\n\tIf periodic pruning " +
    		"is turned on then this is also used to determine which\n\t" +
    		"words to remove from the dictionary (default = 3).",
    		"M", 1, "-M <double>"));
    newVector.addElement(new Option(
        "\tNormalize document length (use in conjunction with -norm and " +
        "-lnorm)", "normalize", 0, "-normalize"));
    newVector.addElement(new Option(
        "\tSpecify the norm that each instance must have (default 1.0)",
        "norm", 1, "-norm <num>"));
    newVector.addElement(new Option(
        "\tSpecify L-norm to use (default 2.0)",
        "lnorm", 1, "-lnorm <num>"));
    newVector.addElement(new Option("\tConvert all tokens to lowercase " +
    		"before adding to the dictionary.",
        "lowercase", 0, "-lowercase"));
    newVector.addElement(new Option(
        "\tIgnore words that are in the stoplist.",
        "stoplist", 0, "-stoplist"));
    newVector.addElement(new Option(
        "\tA file containing stopwords to override the default ones.\n"
        + "\tUsing this option automatically sets the flag ('-stoplist') to use the\n"
        + "\tstoplist if the file exists.\n"
        + "\tFormat: one stopword per line, lines starting with '#'\n"
        + "\tare interpreted as comments and ignored.",
        "stopwords", 1, "-stopwords <file>"));
    newVector.addElement(new Option(
        "\tThe tokenizing algorihtm (classname plus parameters) to use.\n"
        + "\t(default: " + WordTokenizer.class.getName() + ")",
        "tokenizer", 1, "-tokenizer <spec>"));
    newVector.addElement(new Option(
        "\tThe stemmering algorihtm (classname plus parameters) to use.",
        "stemmer", 1, "-stemmer <spec>"));
    
    return newVector.elements();
  }
  
  /**
   * Parses a given list of options. <p/>
   * 
   <!-- options-start -->
   * Valid options are: <p/>
   * 
   * <pre> -F
   *  Set the loss function to minimize. 0 = hinge loss (SVM), 1 = log loss (logistic regression)
   *  (default = 0)</pre>
   * 
   * <pre> -outputProbs
   *  Output probabilities for SVMs (fits a logsitic
   *  model to the output of the SVM)</pre>
   * 
   * <pre> -L
   *  The learning rate (default = 0.01).</pre>
   * 
   * <pre> -R <double>
   *  The lambda regularization constant (default = 0.0001)</pre>
   * 
   * <pre> -E <integer>
   *  The number of epochs to perform (batch learning only, default = 500)</pre>
   * 
   * <pre> -W
   *  Use word frequencies instead of binary bag of words.</pre>
   * 
   * <pre> -P <# instances>
   *  How often to prune the dictionary of low frequency words (default = 0, i.e. don't prune)</pre>
   * 
   * <pre> -M <double>
   *  Minimum word frequency. Words with less than this frequence are ignored.
   *  If periodic pruning is turned on then this is also used to determine which
   *  words to remove from the dictionary (default = 3).</pre>
   * 
   * <pre> -normalize
   *  Normalize document length (use in conjunction with -norm and -lnorm</pre>
   * 
   * <pre> -norm <num>
   *  Specify the norm that each instance must have (default 1.0)</pre>
   * 
   * <pre> -lnorm <num>
   *  Specify L-norm to use (default 2.0)</pre>
   * 
   * <pre> -lowercase
   *  Convert all tokens to lowercase before adding to the dictionary.</pre>
   * 
   * <pre> -stoplist
   *  Ignore words that are in the stoplist.</pre>
   * 
   * <pre> -stopwords <file>
   *  A file containing stopwords to override the default ones.
   *  Using this option automatically sets the flag ('-stoplist') to use the
   *  stoplist if the file exists.
   *  Format: one stopword per line, lines starting with '#'
   *  are interpreted as comments and ignored.</pre>
   * 
   * <pre> -tokenizer <spec>
   *  The tokenizing algorihtm (classname plus parameters) to use.
   *  (default: weka.core.tokenizers.WordTokenizer)</pre>
   * 
   * <pre> -stemmer <spec>
   *  The stemmering algorihtm (classname plus parameters) to use.</pre>
   * 
   <!-- options-end -->
   *
   * @param options the list of options as an array of strings
   * @throws Exception if an option is not supported
   */
  public void setOptions(String[] options) throws Exception {
    reset();
    
    super.setOptions(options);
    
    String lossString = Utils.getOption('F', options);
    if (lossString.length() != 0) {
      setLossFunction(new SelectedTag(Integer.parseInt(lossString), 
          TAGS_SELECTION));
    }
    
    setOutputProbsForSVM(Utils.getFlag("output-probs", options));
    
    String lambdaString = Utils.getOption('R', options);
    if (lambdaString.length() > 0) {
      setLambda(Double.parseDouble(lambdaString));
    }
    
    String learningRateString = Utils.getOption('L', options);
    if (learningRateString.length() > 0) {
      setLearningRate(Double.parseDouble(learningRateString));
    }
    
    String epochsString = Utils.getOption("E", options);
    if (epochsString.length() > 0) {
      setEpochs(Integer.parseInt(epochsString));
    }        
    
    setUseWordFrequencies(Utils.getFlag("W", options));
    
    String pruneFreqS = Utils.getOption("P", options);
    if (pruneFreqS.length() > 0) {
      setPeriodicPruning(Integer.parseInt(pruneFreqS));
    }
    String minFreq = Utils.getOption("M", options);
    if (minFreq.length() > 0) {
      setMinWordFrequency(Double.parseDouble(minFreq));
    }
    
    setNormalizeDocLength(Utils.getFlag("normalize", options));
    
    String normFreqS = Utils.getOption("norm", options);
    if (normFreqS.length() > 0) {
      setNorm(Double.parseDouble(normFreqS));
    }
    String lnormFreqS = Utils.getOption("lnorm", options);
    if (lnormFreqS.length() > 0) {
      setLNorm(Double.parseDouble(lnormFreqS));
    }
    
    setLowercaseTokens(Utils.getFlag("lowercase", options));
    setUseStopList(Utils.getFlag("stoplist", options));
    
    String stopwordsS = Utils.getOption("stopwords", options);
    if (stopwordsS.length() > 0) {
      setStopwords(new File(stopwordsS));
    } else {
      setStopwords(null);
    }
    
    String tokenizerString = Utils.getOption("tokenizer", options);
    if (tokenizerString.length() == 0) {
      setTokenizer(new WordTokenizer());
    } else {
      String[] tokenizerSpec = Utils.splitOptions(tokenizerString);
      if (tokenizerSpec.length == 0)
        throw new Exception("Invalid tokenizer specification string");
      String tokenizerName = tokenizerSpec[0];
      tokenizerSpec[0] = "";
      Tokenizer tokenizer = (Tokenizer) Class.forName(tokenizerName).newInstance();
      if (tokenizer instanceof OptionHandler)
        ((OptionHandler) tokenizer).setOptions(tokenizerSpec);
      setTokenizer(tokenizer);
    }
    
    String stemmerString = Utils.getOption("stemmer", options);
    if (stemmerString.length() == 0) {
      setStemmer(null);
    } else {
      String[] stemmerSpec = Utils.splitOptions(stemmerString);
      if (stemmerSpec.length == 0)
        throw new Exception("Invalid stemmer specification string");
      String stemmerName = stemmerSpec[0];
      stemmerSpec[0] = "";
      Stemmer stemmer = (Stemmer) Class.forName(stemmerName).newInstance();
      if (stemmer instanceof OptionHandler)
        ((OptionHandler) stemmer).setOptions(stemmerSpec);
      setStemmer(stemmer);
    }
  }
  
  /**
   * Gets the current settings of the classifier.
   *
   * @return an array of strings suitable for passing to setOptions
   */
  public String[] getOptions() {
    ArrayList<String> options = new ArrayList<String>();
    
    options.add("-F"); options.add("" + getLossFunction().getSelectedTag().getID());
    if (getOutputProbsForSVM()) {
      options.add("-output-probs");
    }
    options.add("-L"); options.add("" + getLearningRate());
    options.add("-R"); options.add("" + getLambda());
    options.add("-E"); options.add("" + getEpochs());  
    if (getUseWordFrequencies()) {
      options.add("-W");
    }
    options.add("-P"); options.add("" + getPeriodicPruning());
    options.add("-M"); options.add("" + getMinWordFrequency());
    
    if (getNormalizeDocLength()) {
      options.add("-normalize");
    }
    options.add("-norm"); options.add("" + getNorm());
    options.add("-lnorm"); options.add("" + getLNorm());
    if (getLowercaseTokens()) {
      options.add("-lowercase");
    }
    if (getUseStopList()) {
      options.add("-stoplist");
    }
    if (!getStopwords().isDirectory()) {
      options.add("-stopwords"); options.add(getStopwords().getAbsolutePath());
    }
    
    options.add("-tokenizer");
    String spec = getTokenizer().getClass().getName();
    if (getTokenizer() instanceof OptionHandler)
      spec += " " + Utils.joinOptions(
          ((OptionHandler) getTokenizer()).getOptions());
    options.add(spec.trim());
    
    if (getStemmer() != null) {
      options.add("-stemmer");
      spec = getStemmer().getClass().getName();
      if (getStemmer() instanceof OptionHandler) {
        spec += " " + Utils.joinOptions(((OptionHandler) getStemmer()).getOptions());
      }
      
      options.add(spec.trim());
    }
    
    return options.toArray(new String[1]);
  }
  
  /**
   * Returns a string describing classifier
   * @return a description suitable for
   * displaying in the explorer/experimenter gui
   */
  public String globalInfo() {
    return "Implements stochastic gradient descent for learning" +
                " a linear binary class SVM or binary class" +
                " logistic regression on text data. Operates directly (and only) " +
                "on String attributes. Other types of input attributes are accepted " +
                "but ignored during training and classification.";
  }
  
  /**
   * Reset the classifier.
   */
  public void reset() {
    m_t = 1;
    m_dictionary = null;
  }

  /**
   * Method for building the classifier.
   * 
   * @param data the set of training instances.
   * @throws Exception if the classifier can't be built successfully.
   */
  public void buildClassifier(Instances data) throws Exception {
    reset();    
    
/*    boolean hasString = false;
    for (int i = 0; i < data.numAttributes(); i++) {
      if (data.attribute(i).isString() && data.classIndex() != i) {
        hasString = true;
        break;
      }
    }
    
    if (!hasString) {
      throw new Exception("Incoming data does not have any string attributes!");
    } */
    
    // can classifier handle the data?
    getCapabilities().testWithFail(data);
    
    m_dictionary = new LinkedHashMap<String, Count>(10000);
    
    m_numInstances = data.numInstances();
    m_data = new Instances(data, 0);
    data = new Instances(data);
    
    if (m_fitLogistic && m_loss == HINGE) {
      initializeSVMProbs(data);      
    }
    
    if (data.numInstances() > 0) {
      data.randomize(new Random(getSeed()));
      train(data);
    }    
  }
  
  protected void initializeSVMProbs(Instances data) throws Exception {
    m_svmProbs = new SGD();
    m_svmProbs.setLossFunction(new SelectedTag(SGD.LOGLOSS, 
        TAGS_SELECTION));
    m_svmProbs.setLearningRate(m_learningRate);
    m_svmProbs.setLambda(m_lambda);
    m_svmProbs.setEpochs(m_epochs);
    FastVector atts = new FastVector(2);
    atts.addElement(new Attribute("pred"));
    FastVector attVals = new FastVector(2);
    attVals.addElement(data.classAttribute().value(0));
    attVals.addElement(data.classAttribute().value(1));
    atts.addElement(new Attribute("class", attVals));
    m_fitLogisticStructure = new Instances("data", atts, 0);
    m_fitLogisticStructure.setClassIndex(1);
    
    m_svmProbs.buildClassifier(m_fitLogisticStructure);
  }
  
  protected void train(Instances data) throws Exception {
    for (int e = 0; e < m_epochs; e++) {
      for (int i = 0; i < data.numInstances(); i++) {
        if (e == 0) {
          updateClassifier(data.instance(i), true);
        } else {
          updateClassifier(data.instance(i), false);
        }
      }
    }
  }

  /**
   * Updates the classifier with the given instance.
   *
   * @param instance the new training instance to include in the model 
   * @exception Exception if the instance could not be incorporated in
   * the model.
   */
  public void updateClassifier(Instance instance) throws Exception {
    updateClassifier(instance, true);
  }
  
  protected void updateClassifier(Instance instance, boolean updateDictionary)
    throws Exception {
    
    if (!instance.classIsMissing()) {
      
      // tokenize
      tokenizeInstance(instance, updateDictionary);
      
      // make a meta instance for the logistic model before we update
      // the SVM
      if (m_loss == HINGE && m_fitLogistic) {
        double pred = svmOutput();
        double[] vals = new double[2];
        vals[0] = pred;
        vals[1] = instance.classValue();
        DenseInstance metaI = new DenseInstance(instance.weight(), vals);
        metaI.setDataset(m_fitLogisticStructure);
        m_svmProbs.updateClassifier(metaI);
      }
      
      // --- 
      double wx = dotProd(m_inputVector);
      double y = (instance.classValue() == 0) ? -1 : 1;
      double z = y * (wx + m_bias);
      
      // Compute multiplier for weight decay
      double multiplier = 1.0;
      if (m_numInstances == 0) {
        multiplier = 1.0 - (m_learningRate * m_lambda) / m_t;
      } else {
        multiplier = 1.0 - (m_learningRate * m_lambda) / m_numInstances;
      }      
      for (Count c : m_dictionary.values()) {
        c.m_weight *= multiplier;
      }
      
      // Only need to do the following if the loss is non-zero
      if (m_loss != HINGE || (z < 1)) {
        // Compute Factor for updates
        double factor = m_learningRate * y * dloss(z);
        
        // Update coefficients for attributes
        for (Map.Entry<String, Count> feature : m_inputVector.entrySet()) {
          String word = feature.getKey();
          double value = (m_wordFrequencies) ? feature.getValue().m_count : 1;
          
          Count c = m_dictionary.get(word);
          if (c != null) {
            c.m_weight += factor * value;
          }
        }
        
        // update the bias
        m_bias += factor;
      }
      
      m_t++;      
    }
  }
  
  protected void tokenizeInstance(Instance instance, boolean updateDictionary) {
    if (m_inputVector == null) {
      m_inputVector = new LinkedHashMap<String, Count>();
    } else {
      m_inputVector.clear();
    }
    
    if (m_useStopList && m_stopwords == null) {
      m_stopwords = new Stopwords();
      try {
        if (getStopwords().exists() && !getStopwords().isDirectory()) {
          m_stopwords.read(getStopwords());
        }
      } catch (Exception ex) {
        ex.printStackTrace();
      }
    }
    
    for (int i = 0; i < instance.numAttributes(); i++) {
      if (instance.attribute(i).isString() && !instance.isMissing(i)) {
        m_tokenizer.tokenize(instance.stringValue(i));
        
        while (m_tokenizer.hasMoreElements()) {
          String word = ((String)m_tokenizer.nextElement()).intern();
          if (m_lowercaseTokens) {
            word = word.toLowerCase().intern();              
          }
          
          word = m_stemmer.stem(word);
          
          if (m_useStopList) {
            if (m_stopwords.is(word)) {
              continue;
            }
          }
          
          Count docCount = m_inputVector.get(word);
          if (docCount == null) {
            m_inputVector.put(word, new Count(instance.weight()));
          } else {
            docCount.m_count += instance.weight();
          }
          
          if (updateDictionary) {
            Count count = m_dictionary.get(word);
            if (count == null) {
              m_dictionary.put(word, new Count(instance.weight()));
            } else {
              count.m_count += instance.weight();
            }
          }
        }
      }
    }
    
    if (updateDictionary) {
      pruneDictionary();
    }
  }
  
  protected void pruneDictionary() {
    if (m_periodicP <= 0 || m_t % m_periodicP > 0) {
      return;
    }
    
    Iterator<Map.Entry<String, Count>> entries = m_dictionary.entrySet().iterator();
    while (entries.hasNext()) { 
      Map.Entry<String, Count> entry = entries.next();
      if (entry.getValue().m_count < m_minWordP) {
        entries.remove();
      }
    }
  }
  
  protected double svmOutput() {
    double wx = dotProd(m_inputVector);
    double z = (wx + m_bias);
    
    return z;
  }
  
  public double[] distributionForInstance(Instance inst) throws Exception {
    double[] result = new double[2];
    
    tokenizeInstance(inst, false);
    double wx = dotProd(m_inputVector);
    double z = (wx + m_bias);
    
    if (m_loss == HINGE && m_fitLogistic) {
      double pred = z;
      double[] vals = new double[2];
      vals[0] = pred;
      vals[1] = Utils.missingValue();
      DenseInstance metaI = new DenseInstance(inst.weight(), vals);
      metaI.setDataset(m_fitLogisticStructure);
      return m_svmProbs.distributionForInstance(metaI);
    }
    
    if (z <= 0) {
      if (m_loss == LOGLOSS) {
        result[0] = 1.0 / (1.0 + Math.exp(z));
        result[1] = 1.0 - result[0];
      } else {
        result[0] = 1;
      }
    } else {
      if (m_loss == LOGLOSS) {
        result[1] = 1.0 / (1.0 + Math.exp(-z));
        result[0] = 1.0 - result[1];
      } else {
        result[1] = 1;
      }
    }
    
    return result;
  }
  
  protected double dotProd(Map<String, Count> document) {    
    double result = 0;
    
    // document normalization
    double iNorm = 0;
    double fv = 0;
    if (m_normalize) {
      for (Count c : document.values()) {
        // word counts or bag-of-words?
        fv = (m_wordFrequencies) ? c.m_count : 1.0;
        iNorm += Math.pow(Math.abs(fv), m_lnorm);
      }
      iNorm = Math.pow(iNorm, 1.0 / m_lnorm);
    }
    
    for (Map.Entry<String, Count> feature : document.entrySet()) {
      String word = feature.getKey();
      double freq = (m_wordFrequencies) ? feature.getValue().m_count : 1.0;
      //double freq = (feature.getValue().m_count / iNorm * m_norm);
      if (m_normalize) {
        freq /= iNorm * m_norm;
      }
      
      Count weight = m_dictionary.get(word);
      
      if (weight != null && weight.m_count >= m_minWordP) {
        result += freq * weight.m_weight;
      }      
    }
    
    return result;
  }
  
  public String toString() {
    if (m_dictionary == null) {
      return "SGDText: No model built yet.\n";
    }
        
    StringBuffer buff = new StringBuffer();
    buff.append("SGDText:\n\n");
    buff.append("Loss function: ");
    if (m_loss == HINGE) {
      buff.append("Hinge loss (SVM)\n\n");
    } else {
      buff.append("Log loss (logistic regression)\n\n");
    }
    
    buff.append("Dictionary size: " + m_dictionary.size() + "\n\n");
    
    buff.append(m_data.classAttribute().name() + " = \n\n");
    int printed = 0;
    
    Iterator<Map.Entry<String, Count>> entries = m_dictionary.entrySet().iterator();
    while (entries.hasNext()) { 
      Map.Entry<String, Count> entry = entries.next();
      
      if (printed > 0) {
        buff.append(" + ");
      } else {
        buff.append("   ");
      }
      
      buff.append(Utils.doubleToString(entry.getValue().m_weight, 12, 4)
          + " " + entry.getKey() + "\n");
      printed++;
    }
      
    if (m_bias > 0) {
      buff.append(" + " + Utils.doubleToString(m_bias, 12, 4));
    } else {
      buff.append(" - " + Utils.doubleToString(-m_bias, 12, 4));
    }
    
    return buff.toString();
  }
  
  /**
   * Returns the revision string.
   * 
   * @return            the revision
   */
  public String getRevision() {
    return RevisionUtils.extract("$Revision: 8034 $");
  }
  
  /**
   * Main method for testing this class.
   */
  public static void main(String[] args) {
    runClassifier(new SGDText(), args);
  }
}