CRFModelHandler.java example

Explorer

szeke-master
- src
  - main
    - java
      - edu
        isi
        karma
        cleaning
        ANode.java
        ConfigParameters.java
        ConstrainedAlignment.java
        DataCollection.java
        ExampleSelection.java
        GrammarTreeNode.java
        InterpreterType.java
        Interpretor.java
        Loop.java
        Maintest.java
        MultipleStringAlign.java
        MyLogger.java
        Partition.java
        PartitionClassifier.java
        PartitionClassifierType.java
        Position.java
        ProgSynthesis.java
        Program.java
        ProgramRule.java
        QuestionableRecord
        Feature1.java
        Feature2.java
        Feature3.java
        Feature4.java
        FeatureVector.java
        OutlierDetector.java
        RecFeature.java
        RecordDistiller.java
        Ruler.java
        Section.java
        Segment.java
        TNode.java
        Template.java
        Test.java
        TestJAVA.java
        TextVector.java
        Tokenizer.java
        Traces.java
        UtilTools.java
        features
        CntFeature.java
        Data2Features.java
        DataRanking.java
        Feature.java
        FeatureSet.java
        LogisticModelParameters.java
        Main.java
        MovFeature.java
        RecordClassifier2.java
        RecordCntFeatures.java
        RecordFeatureSet.java
        RecordPerFeatures.java
        RecordTextFeature.java
        RegularityFeatureSet.java
        Test.java
        VarianceFeatureSet.java
        controller
        command
        CloseWorkspaceCommand.java
        CloseWorkspaceCommandFactory.java
        Command.java
        CommandException.java
        CommandFactory.java
        CommandWithPreview.java
        FetchGraphsFromTripleStoreCommand.java
        FetchGraphsFromTripleStoreCommandFactory.java
        FetchPreferencesCommand.java
        FetchPreferencesCommandFactory.java
        GetUniqueGraphUrlCommand.java
        GetUniqueGraphUrlCommandFactory.java
        JSONInputCommandFactory.java
        ResetKarmaCommand.java
        ResetKarmaCommandFactory.java
        TestSPARQLEndPointCommand.java
        TestSPARQLEndPointCommandFactory.java
        UndoRedoCommand.java
        UndoRedoCommandFactory.java
        WorksheetCommand.java
        alignment
        AddUserLinkToAlignmentCommand.java
        AddUserLinkToAlignmentCommandFactory.java
        ApplyModelFromTripleStoreCommand.java
        ApplyModelFromTripleStoreCommandFactory.java
        ChangeInternalNodeLinksCommand.java
        ChangeInternalNodeLinksCommandFactory.java
        CreateNewModelCommand.java
        CreateNewModelCommandFactory.java
        FetchExistingModelsForWorksheetCommand.java
        FetchExistingModelsForWorksheetCommandFactory.java
        FetchR2RMLModelsCommand.java
        FetchR2RMLModelsCommandFactory.java
        GenerateR2RMLModelCommand.java
        GenerateR2RMLModelCommandFactory.java
        GetAlternativeLinksCommand.java
        GetAlternativeLinksCommandFactory.java
        GetCurrentLinksOfInternalNodeCommand.java
        GetCurrentLinksOfInternalNodeCommandFactory.java
        GetDataPropertiesForClassCommand.java
        GetDataPropertiesForClassCommandFactory.java
        GetDataPropertyHierarchyCommand.java
        GetDataPropertyHierarchyCommandFactory.java
        GetDomainsForDataPropertyCommand.java
        GetDomainsForDataPropertyCommandFactory.java
        GetInternalNodesListOfAlignmentCommand.java
        GetInternalNodesListOfAlignmentCommandFactory.java
        GetLinksOfAlignmentCommand.java
        GetLinksOfAlignmentCommandFactory.java
        GetOntologyClassHierarchyCommand.java
        GetOntologyClassHierarchyCommandFactory.java
        GetPropertiesAndClassesList.java
        GetPropertiesAndClassesListCommandFactory.java
        InvokeDataMiningServiceCommand.java
        InvokeDataMiningServiceCommandFactory.java
        SetMetaPropertyCommand.java
        SetMetaPropertyCommandFactory.java
        SetSemanticTypeCommand.java
        SetSemanticTypeCommandFactory.java
        ShowAutoModelCommand.java
        ShowAutoModelCommandFactory.java
        ShowModelCommand.java
        ShowModelCommandFactory.java
        UnassignSemanticTypeCommand.java
        UnassignSemanticTypeCommandFactory.java
        cleaning
        FetchTransformingDataCommand.java
        FetchTransformingDataFactory.java
        GenerateCleaningRulesCommand.java
        GenerateCleaningRulesCommandFactory.java
        InvokeCleaningServiceCommand.java
        InvokeCleaningServiceCommandFactory.java
        SubmitCleaningCommand.java
        SubmitCleaningCommandFactory.java
        importdata
        ImportCSVFileCommand.java
        ImportCSVFileCommandFactory.java
        ImportDatabaseTableCommand.java
        ImportDatabaseTableCommandFactory.java
        ImportExcelFileCommand.java
        ImportExcelFileCommandFactory.java
        ImportJSONFileCommand.java
        ImportJSONFileCommandFactory.java
        ImportOntologyCommand.java
        ImportOntologyCommandFactory.java
        ImportServiceCommand.java
        ImportServiceCommandFactory.java
        ImportUnionResultCommand.java
        ImportUnionResultCommandFactory.java
        ImportXMLFileCommand.java
        ImportXMLFileCommandFactory.java
        publish
        PublishCSVCommand.java
        PublishCSVCommandFactory.java
        PublishDatabaseCommand.java
        PublishDatabaseCommandFactory.java
        PublishKMLLayerCommand.java
        PublishKMLLayerCommandFactory.java
        PublishMDBCommand.java
        PublishMDBCommandFactory.java
        PublishRDFCellCommand.java
        PublishRDFCellCommandFactory.java
        PublishRDFCommand.java
        PublishRDFCommandFactory.java
        PublishSpatialDataCommand.java
        PublishSpatialDataCommandFactory.java
        PublishWorksheetHistoryCommand.java
        PublishWorksheetHistoryCommandFactory.java
        reconciliation
        InvokeRubenReconciliationService.java
        InvokeRubenReconciliationServiceFactory.java
        service
        InvokeServiceCommand.java
        InvokeServiceCommandFactory.java
        PopulateCommand.java
        PopulateCommandFactory.java
        PublishModelCommand.java
        PublishModelCommandFactory.java
        ServiceTableUtil.java
        transformation
        PreviewPythonTransformationResultsCommand.java
        PreviewPythonTransformationResultsCommandFactory.java
        SubmitPythonTransformationCommand.java
        SubmitPythonTransformationCommandFactory.java
        worksheet
        AddColumnCommand.java
        AddColumnCommandFactory.java
        ApplyHistoryFromR2RMLModelCommand.java
        ApplyHistoryFromR2RMLModelCommandFactory.java
        ApplyWorksheetHistoryCommand.java
        ApplyWorksheetHistoryCommandFactory.java
        EditCellCommand.java
        EditCellCommandFactory.java
        FetchExistingWorksheetPropertiesCommand.java
        FetchExistingWorksheetPropertiesCommandFactory.java
        MultipleValueEditColumnCommand.java
        MultipleValueEditColumnCommandFactory.java
        RenameColumnCommand.java
        RenameColumnCommandFactory.java
        SetWorksheetPropertiesCommand.java
        SetWorksheetPropertiesCommandFactory.java
        SplitByCommaCommand.java
        SplitByCommaCommandFactory.java
        SplitColumnByDelimiter.java
        TablePagerCommand.java
        TablePagerCommandFactory.java
        TablePagerResizeCommand.java
        TablePagerResizeCommandFactory.java
        history
        CommandHistory.java
        CommandHistoryWriter.java
        HistoryJsonUtil.java
        WorksheetCommandHistoryReader.java
        update
        AbstractUpdate.java
        AlignmentHeadersUpdate.java
        CSVImportPreviewUpdate.java
        CleaningResultUpdate.java
        DatabaseTablePreviewUpdate.java
        DatabaseTablesListUpdate.java
        EmptyUpdate.java
        ErrorUpdate.java
        FetchPreferencesUpdate.java
        FetchR2RMLUpdate.java
        FetchResultUpdate.java
        HistoryAddCommandUpdate.java
        HistoryUpdate.java
        InfoUpdate.java
        InvokeDataMiningServiceUpdate.java
        NewDatabaseCommandUpdate.java
        NodeChangedUpdate.java
        OntologyClassHierarchyUpdate.java
        OntologyHierarchyUpdate.java
        SVGAlignmentUpdate_ForceKarmaLayout.java
        SemanticTypesUpdate.java
        TagsUpdate.java
        UpdateContainer.java
        WorksheetCleaningServiceInvocationResultsUpdate.java
        WorksheetDataUpdate.java
        WorksheetHeadersUpdate.java
        WorksheetHierarchicalDataUpdate.java
        WorksheetHierarchicalHeadersUpdate.java
        WorksheetListUpdate.java
        er
        helper
        ConfigUtil.java
        ConnectPostgis.java
        Constants.java
        SPARQLGeneratorUtil.java
        TripleStoreUtil.java
        geospatial
        FeatureTable.java
        LineString.java
        Point.java
        SpatialReferenceSystemTransformationUtil.java
        WorksheetGeospatialContent.java
        WorksheetToFeatureCollection.java
        imp
        csv
        CSVFileExport.java
        CSVFileImport.java
        database
        DatabaseTableImport.java
        excel
        ToCSV.java
        json
        JsonImport.java
        mdb
        MDBFileExport.java
        rdf
        RDFImport.java
        RDFImportMain.java
        RDFQuery.java
        UnionImport.java
        testRepository.java
        kr2rml
        ColumnTemplateTerm.java
        ErrorReport.java
        GraphMap.java
        KR2RMLMappingAuxillaryInformation.java
        KR2RMLMappingGenerator.java
        KR2RMLWorksheetRDFGenerator.java
        NamedGraph.java
        ObjectMap.java
        Predicate.java
        PredicateObjectMap.java
        Prefix.java
        R2RMLMapping.java
        RefObjectMap.java
        ReportMessage.java
        StringTemplateTerm.java
        SubjectMap.java
        TemplateTerm.java
        TemplateTermSet.java
        TemplateTermSetBuilder.java
        TermMap.java
        TriplesMap.java
        TriplesMapGraph.java
        TriplesMapLink.java
        Type.java
        WorksheetModelWriter.java
        WorksheetR2RMLJenaModelParser.java
        WorksheetR2RMLSesameModelParser.java
        linkedapi
        server
        GetRequestManager.java
        HTTPClientTest.java
        LinkedApiRequestManager.java
        PostRequestManager.java
        ResourceType.java
        model
        serialization
        DataSourceLoader.java
        DataSourcePublisher.java
        MimeType.java
        Repository.java
        SerializationLang.java
        SourceLoader.java
        SourcePublisher.java
        WebServiceLoader.java
        WebServicePublisher.java
        modeling
        ModelingParams.java
        Namespaces.java
        Prefixes.java
        Test.java
        Uris.java
        alignment
        Alignment.java
        AlignmentManager.java
        GraphBuilder.java
        GraphPreProcess.java
        GraphUtil.java
        LinkIdFactory.java
        NodeIdFactory.java
        SteinerTree.java
        TreePostProcess.java
        ontology
        AutoOntology.java
        DomainRangePair.java
        OntologyCache.java
        OntologyHandler.java
        OntologyManager.java
        OntologyTreeNode.java
        OntologyUpdateListener.java
        SubclassSuperclassPair.java
        research
        ComputeGED.java
        GraphVizUtil.java
        ModelReader.java
        Params.java
        PatternContainment.java
        SemanticLabel.java
        ServiceModel.java
        Util.java
        approach1
        Approach1.java
        CandidateSteinerSets.java
        CoherenceItem.java
        MappingStruct.java
        MappingType.java
        RankedModel.java
        RankedSteinerSet.java
        SemanticLabelType.java
        SemanticTypeMapping.java
        SteinerNodes.java
        approach2
        Approach2.java
        graph
        konstantinosnedas
        HungarianAlgorithm.java
        roek
        nlpged
        algorithm
        GraphEditDistance.java
        application
        App.java
        Config.java
        graph
        Edge.java
        Graph.java
        Node.java
        graphmatching
        algorithms
        BipartiteMatching.java
        GraphMatching.java
        HungarianAlgorithm.java
        VolgenantJonker.java
        nanoxml
        XMLElement.java
        XMLParseException.java
        net
        n3
        nanoxml
        CDATAReader.java
        ContentReader.java
        IXMLBuilder.java
        IXMLElement.java
        IXMLEntityResolver.java
        IXMLParser.java
        IXMLReader.java
        IXMLValidator.java
        NonValidator.java
        PIReader.java
        StdXMLBuilder.java
        StdXMLParser.java
        StdXMLReader.java
        ValidatorPlugin.java
        XMLAttribute.java
        XMLElement.java
        XMLEntityResolver.java
        XMLException.java
        XMLParseException.java
        XMLParserFactory.java
        XMLUtil.java
        XMLValidationException.java
        XMLWriter.java
        sax
        SAXAdapter.java
        SAXEntityResolver.java
        SAXParser.java
        util
        CostFunction.java
        Edge.java
        EditDistance.java
        Graph.java
        GraphSet.java
        MatrixGenerator.java
        Node.java
        ResultPrinter.java
        TreeNode.java
        xml
        XMLParser.java
        semantictypes
        CRFColumnModel.java
        FullCRFModel.java
        SemanticTypeTrainingThread.java
        SemanticTypeUtil.java
        crfmodelhandler
        CRFModelHandler.java
        mycrf
        common
        Constants.java
        Node.java
        crfmodel
        CRFModelAbstract.java
        CRFModelFieldOnly.java
        fieldonly
        LblFtrPair.java
        globaldata
        GlobalDataAbstract.java
        GlobalDataFieldOnly.java
        graph
        GraphFieldOnly.java
        GraphInterface.java
        map
        MAPFieldOnly.java
        math
        LargeNumber.java
        Matrix.java
        optimization
        BacktrackingLineSearch.java
        LBFGS.java
        OptimizeFieldOnly.java
        myutils
        DBTable.java
        FileIOOps.java
        FileOps.java
        FileSystemOps.java
        ListOps.java
        Prnt.java
        RandOps.java
        Test.java
        ThreadOps.java
        sl
        Feature.java
        Lexer.java
        Part.java
        RegexFeatureExtractor.java
        Type.java
        rdf
        DatabaseTableRDFGenerator.java
        OfflineRdfGenerator.java
        rep
        CellValue.java
        Entity.java
        HNode.java
        HNodePath.java
        HTable.java
        Node.java
        RepEntity.java
        RepFactory.java
        Row.java
        StringCellValue.java
        Table.java
        TablePager.java
        Worksheet.java
        Workspace.java
        WorkspaceManager.java
        alignment
        ClassInstanceLink.java
        ColumnNode.java
        ColumnSubClassLink.java
        DataPropertyLink.java
        DataPropertyOfColumnLink.java
        InternalNode.java
        Label.java
        Link.java
        LinkKeyInfo.java
        LinkPriorityComparator.java
        LinkPriorityType.java
        LinkStatus.java
        LinkType.java
        LiteralNode.java
        Node.java
        NodeType.java
        ObjectPropertyLink.java
        ObjectPropertySpecializationLink.java
        SemanticType.java
        SemanticTypes.java
        SimpleLink.java
        SimpleNode.java
        SubClassLink.java
        SynonymSemanticTypes.java
        cleaning
        RamblerTransformation.java
        RamblerTransformationExample.java
        RamblerTransformationInputs.java
        RamblerTransformationOutput.java
        RamblerValueCollection.java
        Transformation.java
        TransformationExample.java
        TransformationInputs.java
        TransformationOutput.java
        ValueCollection.java
        hierarchicalheadings
        ColorKeyTranslator.java
        ColspanMap.java
        ColumnCoordinateSet.java
        Coordinate.java
        HHCell.java
        HHTNode.java
        HHTable.java
        HHTree.java
        LeafColumnIndexMap.java
        Span.java
        TForest.java
        TNode.java
        metadata
        MetadataContainer.java
        SourceInformation.java
        Tag.java
        TagsContainer.java
        WorksheetProperties.java
        model
        Argument.java
        ArgumentType.java
        Atom.java
        ClassAtom.java
        DatavaluedPropertyAtom.java
        IndividualPropertyAtom.java
        Model.java
        sources
        Attribute.java
        AttributeRequirement.java
        DataSource.java
        IOType.java
        Invocation.java
        InvocationManager.java
        Request.java
        Response.java
        Source.java
        Table.java
        URLManager.java
        WebService.java
        service
        json
        ArrayValue.java
        Element.java
        JsonManager.java
        SingleValue.java
        Value.java
        ValueType.java
        transformation
        PythonTransformationHelper.java
        util
        AbstractJDBCUtil.java
        CommandInputJSONUtil.java
        FileIOOps.java
        FileUtil.java
        HTTPUtil.java
        JDBCUtilFactory.java
        JSONUtil.java
        Jsonizable.java
        LogStackTrace.java
        MySQLUtil.java
        OracleUtil.java
        PostGISUtil.java
        Prnt.java
        RandomGUID.java
        SQLServerUtil.java
        Util.java
        view
        Border.java
        Margin.java
        RowPathCounts.java
        RowPathCountsByColumn.java
        Stroke.java
        VCell.java
        VColumnHeader.java
        VRow.java
        VRowEntry.java
        VTable.java
        VTableCssTags.java
        VWorksheet.java
        VWorksheetList.java
        VWorkspace.java
        ViewEntity.java
        ViewFactory.java
        ViewPreferences.java
        alignmentHeadings
        AlignmentColorKeyTranslator.java
        AlignmentForest.java
        AlignmentLink.java
        AlignmentNode.java
        tabledata
        StrokeStyles.java
        VDCell.java
        VDCellStrokes.java
        VDIndexTable.java
        VDRow.java
        VDTableCells.java
        VDTableData.java
        VDTreeNode.java
        VDTriangle.java
        VDVerticalSeparator.java
        VDVerticalSeparators.java
        tableheadings
        HeadersColorKeyTranslator.java
        VColumnHeader.java
        VHTreeNode.java
        VHTreeNodeLevel.java
        VTHNode.java
        VTHeaderForest.java
        VTableHeadings.java
        webserver
        ExecutionController.java
        ExtractSpatialInformationFromOSMServiceHandler.java
        ExtractSpatialInformationFromWikimapiaServiceHandler.java
        GetExampleJSON.java
        KMLFileTransferHandler.java
        KarmaException.java
        LinkedApiServiceHandler.java
        RequestController.java
        SampleDataFactory.java
        ServerStart.java
        ServletContextParameterMap.java
        SpatialReferenceSystemServiceHandler.java
        WorkspaceRegistry.java
        helper
        CreateGeoBuildingForTable.java
        CreateGeoStreetForTable.java
        CreateNodeDataForTable.java
        CreateWikimapiaInformation.java

/*******************************************************************************
 * Copyright 2012 University of Southern California
 * 
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * 
 * 	http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 * 
 * This code was developed by the Information Integration Group as part 
 * of the Karma project at the Information Sciences Institute of the 
 * University of Southern California.  For more information, publications, 
 * and related projects, please see: http://www.isi.edu/integration
 ******************************************************************************/

package edu.isi.karma.modeling.semantictypes.crfmodelhandler ;

import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.FileReader;
import java.io.FileWriter;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Random;
import java.util.Set;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import edu.isi.karma.modeling.semantictypes.mycrf.crfmodel.CRFModelFieldOnly;
import edu.isi.karma.modeling.semantictypes.mycrf.fieldonly.LblFtrPair;
import edu.isi.karma.modeling.semantictypes.mycrf.globaldata.GlobalDataFieldOnly;
import edu.isi.karma.modeling.semantictypes.mycrf.graph.GraphFieldOnly;
import edu.isi.karma.modeling.semantictypes.mycrf.graph.GraphInterface;
import edu.isi.karma.modeling.semantictypes.mycrf.map.MAPFieldOnly;
import edu.isi.karma.modeling.semantictypes.mycrf.math.Matrix;
import edu.isi.karma.modeling.semantictypes.mycrf.optimization.OptimizeFieldOnly;
import edu.isi.karma.modeling.semantictypes.myutils.ListOps;
import edu.isi.karma.modeling.semantictypes.myutils.Prnt;
import edu.isi.karma.modeling.semantictypes.myutils.RandOps;
import edu.isi.karma.modeling.semantictypes.sl.Lexer;
import edu.isi.karma.modeling.semantictypes.sl.Part;
import edu.isi.karma.modeling.semantictypes.sl.RegexFeatureExtractor;


/**
 * This class is an API to the mycrf package.
 * It allows instantiating CRF models, training them and using them for prediction.
 * 
 * @author amangoel
 *
 */
public class CRFModelHandler {

	// ***********************************************************************************************
	/**
	 * @author amangoel
	 * The ColumnFeature enum with members representing the possible features that could be passed.
	 *
	 */
	public enum ColumnFeature {
		ColumnHeaderName ,
		TableName
	} ;

	// ***********************************************************************************************

	// ***********************************************************************************************
	/**
	 * @author amangoel
	 * This internal class represents an example.
	 */
	static class Example {
		String exampleString;
		HashMap<ColumnFeature, String> columnFeatures;

		/**
		 * @param exampleString The string that the example represents
		 * No ColumnFeatures specified.
		 */
		public Example(String exampleString) {
			this.exampleString = exampleString;
			columnFeatures = new HashMap<CRFModelHandler.ColumnFeature, String>();
		}

		/**
		 * @param exampleString The example string
		 * @param columnFeatures Associated ColumnFeatures
		 * It takes in a collection of feature values for each ColumnFeature, 
		 * but only picks the first value to store in the Example, 
		 * as I don't see yet why more than one String should be associated with a ColumnFeature.
		 */
		public Example(String exampleString, Map<ColumnFeature, Collection<String>> columnFeatures) {
			this.exampleString = exampleString;
			this.columnFeatures = new HashMap<ColumnFeature, String>();
			if (columnFeatures != null) {
				for(Map.Entry<ColumnFeature, Collection<String>> entry : columnFeatures.entrySet()) {
					if (entry.getValue() != null && entry.getValue().size() > 0) {
						String featureValue;
						featureValue = null;
						for(String str : entry.getValue()) {
							featureValue = str;
							break;
						}
						if (featureValue != null) {
							this.columnFeatures.put(entry.getKey(), featureValue);
						}
					}
				}
			}
		}

		/**
		 * @param columnFeature A ColumnFeature
		 * @param featureValue Corresponding Value to the ColumnFeature
		 */
		public void addColumnFeature(ColumnFeature columnFeature, String featureValue) {
			if (columnFeature != null && featureValue != null) {
				columnFeatures.put(columnFeature, featureValue);
			}
		}

		public String getString() {
			return exampleString;
		}

		/**
		 * @param colFeature ColumnFeature for which the value is required.
		 * @return The value corresponding to the ColumnFeature, or null if the example doesn't have the passed ColumnFeature. 
		 * Checking the return value from this method is therefore important.
		 */
		public String getValueForColumnFeature(ColumnFeature colFeature) {
			if (columnFeatures.containsKey(colFeature)) {
				return columnFeatures.get(colFeature);
			}
			else {
				return null;
			}
		}
	}

	// ***********************************************************************************************
	// instance variables
	String file;
	HashMap<String, ArrayList<Example>> labelToExamplesMap;
	GlobalDataFieldOnly globalData;
	ArrayList<String> allowedCharacters;
	static Logger logger = LoggerFactory.getLogger(CRFModelHandler.class.getSimpleName()) ;
	static final int MAX_FFs_PER_LABEL = 50;
	static final int MAX_EXAMPLES_PER_LABEL = 50;
	static final int MAX_EXAMPLES_SAVED_PER_LABEL = 200;

	/**
	 * Making the empty constructor private to prevent instantiation of this class.
	 * This class should only be used to access its static methods.
	 */
	public CRFModelHandler() {
		file = null ;
		labelToExamplesMap = null ;
		globalData = null ;
		allowedCharacters = allowedCharacters();
	}

	/**
	 * Returns the path to the file that the CRF Model is using
	 * @return Path to the Model file 
	 */
	public String getModelFilePath() {
		return file;
	}


	/**
	 * Adds the passed list of examples to the model. 
	 * Regenerates 100 feature functions to represent the label, 
	 * if examples of this label already exist in the model.
	 * 
	 * @param label True label for the list of example.
	 * @param examples List of example strings.
	 * @param columnFeatures Map of column features.
	 * @return True if success, else False
	 */
	public synchronized boolean addOrUpdateLabel(String label, List<String> examples, Map<ColumnFeature, Collection<String>> columnFeatures) {
		ArrayList<String> cleanedExamples, allFeatures;
		int labelIndex ;
		HashSet<String> selectedFeatures, tmpFeatures;
		ArrayList<Example> selectedExamples;
		OptimizeFieldOnly optimizationObject;
		boolean savingSuccessful ;
		if (file == null) {
			Prnt.prn("CRF Model is not ready, either because it was never read or an error happened while reading it previously. Please try reading the model file again.");
			return false ;
		}
		// running basic sanity checks in the input arguments
		if (label == null || label.trim().length() == 0 || examples.size() == 0) {
			Prnt.prn("@label argument cannot be null or an empty string and the @examples list cannot be empty.") ;
			return false ;
		}
		label = label.trim() ;
		cleanedExamples = new ArrayList<String>() ;
		cleanedExamplesList(examples, cleanedExamples);
		examples = cleanedExamples ;
		// making sure that the condition where the examples list is not empty but contains junk only is not accepted
		if (examples.size() == 0) {
			Prnt.prn("@examples list contains forbidden characters only. The allowed characters are " + allowedCharacters) ;
			return false ;
		}
		// if label does not already exist in the model, add new label. Also, add an entry in the map for the new label.
		labelIndex = globalData.labels.indexOf(label) ;
		if (labelIndex == -1) {
			globalData.labels.add(label) ;
			labelIndex = globalData.labels.indexOf(label) ;
			labelToExamplesMap.put(label, new ArrayList<Example>()) ;
		}
		// adding all the new examples to list of existing examples for the arg label.
		for(String newExampleString : examples) {
			Example newExample = new Example(newExampleString, columnFeatures);
			labelToExamplesMap.get(label).add(newExample);
		}
		// make sure that we consider MAX_EXAMPLES_PER_LABEL examples for training.
		selectedExamples = new ArrayList<CRFModelHandler.Example>();
		if (labelToExamplesMap.get(label).size() <= MAX_EXAMPLES_PER_LABEL) {
			selectedExamples.addAll(labelToExamplesMap.get(label));
		}
		else {
			RandOps.getRandomlySelectedItemsFromList(labelToExamplesMap.get(label), selectedExamples, MAX_EXAMPLES_PER_LABEL);
		}
		removeGraphsForLabel(labelIndex);
		allFeatures = new ArrayList<String>();
		tmpFeatures = new HashSet<String>();
		// Add training graphs for selected examples. 
		// Accumulate all features for the training graphs being added.
		for(Example selectedExample : selectedExamples) {
			GraphFieldOnly newGraph ;
			featureSet(selectedExample, tmpFeatures) ;
			newGraph = new GraphFieldOnly(selectedExample.exampleString, label, new ArrayList<String>(tmpFeatures), globalData) ;
			globalData.trainingGraphs.add(newGraph) ;
			allFeatures.addAll(tmpFeatures) ;
		}
		// if the total number of features is > NUM_FFs, then randomly select NUM_FFs from them.
		selectedFeatures = new HashSet<String>(allFeatures);
		if (selectedFeatures.size() > MAX_FFs_PER_LABEL) {
			selectFeatureSetWithWeightedProbability(allFeatures, selectedFeatures);
		}
		// reselect the feature functions for the labelIndex.
		reselectFFs(labelIndex, selectedFeatures);
		// optimize the model to adjust to the new label/examples/ffs
		optimizationObject = new OptimizeFieldOnly(globalData.crfModel, globalData) ;
		optimizationObject.optimize(3) ;
		// save the model to file with the new weights
		savingSuccessful = saveModel() ;
		if (!savingSuccessful) {
			file = null ;
		}
		return savingSuccessful ;
	}


	/**
	 * @param label The label for which examples are being requested.
	 * @param examples The list argument that will be used to return the list of examples in the model for the supplied label.
	 * @return True, if successful, else False
	 */
	public boolean getExamplesForLabel(String label, ArrayList<String> examples) {
		ArrayList<Example> examplesOfLabel;
		if (file == null) {
			Prnt.prn("CRF Model is not ready, either because it was never read or an error happened while reading it previously. Please try reading the model file again.");
			return false ;
		}
		if (label == null || label.trim().length() == 0 || examples == null) {
			Prnt.prn("CRFModelHandler.getExamplesForLabel: Either the label is null, or it is an empty string or examples is null") ;
			return false ;
		}
		label = label.trim();
		if (!globalData.labels.contains(label)) {
			Prnt.prn("CRFModelHandler.getExamplesForLabel: Label " + label + " does not exist in the model.") ;
			return false ;
		}
		examples.clear() ;
		examplesOfLabel = labelToExamplesMap.get(label);
		for(Example exampleObject : examplesOfLabel) {
			examples.add(exampleObject.exampleString);
		}
		return true ;
	}


	/**
	 * @param labels The ordered list of labels is returned in this argument.
	 * @return True, if successful, else False
	 */
	public boolean getLabels(List<String> labels) {
		if (file == null) {
			Prnt.prn("CRF Model is not ready, either because it was never read or an error happened while reading it previously. Please try reading the model file again.");
			return false ;
		}
		if (labels == null) {
			Prnt.prn("Invalid argument @labels. It is null.") ;
			return false ;
		}
		labels.clear() ;
		labels.addAll(globalData.labels);
		return true ;
	}


	/**
	 * @param examples - list of examples of an unknown type
	 * @param numPredictions - required number of predictions in descending order
	 * @param predictedLabels - the argument in which the ordered list of labels is returned. the size of this list could be smaller than numPredictions
	 * 							if there aren't that many labels in the model already
	 * @param confidenceScores - the probability of the examples belonging to the labels returned.
	 * @param exampleProbabilities - the size() == examples.size(). It contains, for each example, in the same order, a double array that contains the probability 
	 * 									of belonging to the labels returned in predictedLabels.	 
	 * @param columnFeatures - this Map supplies ColumnFeatures such as ColumnName, etc.
	 * @return True, if successful, else False
	 */
	public boolean predictLabelForExamples(
			List<String> examples,
			int numPredictions,
			List<String> predictedLabels,
			List<Double> confidenceScores,
			List<double[]> exampleProbabilities,
			Map<ColumnFeature, Collection<String>> columnFeatures
			) {
		ArrayList<ArrayList<Double>> exampleProbabilitiesFullList ;
		MAPFieldOnly MAPPredictor ;
		double[] columnProbabilities ;
		ArrayList<String> labels ;
		ArrayList<Double> columnProbabilitiesList ;
		HashSet<String> features;
		if (file == null) {
			Prnt.prn("CRF Model is not ready, either because it was never read or an error happened while reading it previously. Please try reading the model file again.");
			return false ;
		}
		// Sanity checks for arguments
		if (examples == null || examples.size() == 0 || numPredictions <= 0 || predictedLabels == null || confidenceScores == null) {
			Prnt.prn("Invalid arguments. Possible problems: examples list size is zero, numPredictions is non-positive, predictedLabels or confidenceScores list is null.") ;
			return false ;
		}
		// Making sure that there exists a model.
		if(globalData.labels.size() == 0) {
			Prnt.prn("The model does have not any semantic types. Please add some labels with their examples before attempting to predict using this model.") ;
			return false ;
		}
		exampleProbabilitiesFullList = new ArrayList<ArrayList<Double>>() ;
		MAPPredictor = new MAPFieldOnly(globalData) ;
		columnProbabilities = new double[globalData.labels.size()] ;
		features = new HashSet<String>();
		// for each example, get the probability of each label.
		// add the probabilities to an accumulator probabilities array
		// the label that gets highest accumulated probability, is the most likely label for all examples combined
		for(String example : examples) {
			GraphFieldOnly exampleGraph ;
			String sanitizedExample;
			double[] probabilitiesForExample ;
			sanitizedExample = getSanitizedString(example);
			if (sanitizedExample.length() == 0) {
				sanitizedExample = ".";
			}
			featureSet(sanitizedExample, columnFeatures, features);
			exampleGraph = new GraphFieldOnly(sanitizedExample, null, new ArrayList<String>(features), globalData) ;
			probabilitiesForExample = MAPPredictor.probabilitiesForLabels(exampleGraph) ;
			Matrix.plusEquals(columnProbabilities, probabilitiesForExample, 1.0) ;
			if (exampleProbabilities != null) {
				exampleProbabilitiesFullList.add(newListFromDoubleArray(probabilitiesForExample)) ;
			}
		}
		// the sum of all values in the probabilies array is going to be examples.size()
		// normalize to get values that have a probabilistic interpretation
		for(int i=0;i<globalData.labels.size();i++) {
			columnProbabilities[i]/=examples.size() ;
		}
		// Sort both lists such that labels are listed according to their descending order of probability
		// and probabilityList has the probabilities in the descending order 
		// The label at index i has the probability at index i
		labels = new ArrayList<String>(globalData.labels) ;
		columnProbabilitiesList = newListFromDoubleArray(columnProbabilities) ;
		ListOps.sortListOnValues(labels, columnProbabilitiesList) ;
		// Preparing to return values now
		predictedLabels.clear() ;
		confidenceScores.clear() ;
		if (exampleProbabilities != null) {
			exampleProbabilities.clear() ;
			int minPreds = Math.min(numPredictions, globalData.labels.size()) ;
			for(int i=0;i<examples.size();i++) {
				exampleProbabilities.add(new double[minPreds]) ;
			}
		}
		for(int index=0;index < globalData.labels.size() && index < numPredictions;index++) {
			predictedLabels.add(labels.get(index)) ;
			confidenceScores.add(columnProbabilitiesList.get(index)) ;
			if (exampleProbabilities != null) {
				int li = globalData.labels.indexOf(labels.get(index)) ;
				for(int i=0;i<examples.size();i++) {
					exampleProbabilities.get(i)[index] = exampleProbabilitiesFullList.get(i).get(li) ;
				}
			}
		}
		return true ;
	}




	/**
	 * @param modelFile The path of the file from which the model should be read.
	 * @return True is successfully read. False, otherwise.
	 * This function takes the path of file as input and
	 * creates an environment that consists of globalData, crfModel, list of examples of each label, etc.
	 * It reads an empty file also.
	 */
	public boolean readModelFromFile(String modelFile) {
		BufferedReader br ;
		String line ;
		int numLabels ;
		boolean emptyFile ;
		int numFFs  ;
		ArrayList<LblFtrPair> ffs ;
		HashSet<String> features;
		double[] weights ;
		CRFModelFieldOnly crfModel  ;
		if (modelFile == null) {
			Prnt.prn("Invalid argument value. Argument @file is null.") ;
			file = null ;
			return false ;
		}
		// beginning execution
		br = null ;
		line = null ;
		numLabels = -1 ;
		try {
			br = new BufferedReader(new FileReader(modelFile)) ;
			emptyFile = true ;
			while((line = br.readLine()) != null) {
				if (line.trim().length() != 0) {
					emptyFile = false ;
					break ;
				}
			}
			br.close() ;
		}
		catch(Exception e) {
			Prnt.prn("Error reading model file " + modelFile + ".") ;
			file = null ;
			return false ;
		}
		if (emptyFile) {
			globalData = new GlobalDataFieldOnly() ;
			labelToExamplesMap = new HashMap<String, ArrayList<Example>>() ;
			globalData.trainingGraphs = new ArrayList<GraphInterface>() ;
			crfModel = new CRFModelFieldOnly(globalData) ;
			crfModel.ffs = new ArrayList<LblFtrPair>() ;
			crfModel.weights = new double[0] ;
			globalData.crfModel = crfModel ;
			file = modelFile ;
			return true ;
		}
		else {
			ArrayList<Example> selectedExamples;
			features = new HashSet<String>();
			globalData = new GlobalDataFieldOnly() ;
			labelToExamplesMap = new HashMap<String, ArrayList<Example>>() ;
			try {
				br = new BufferedReader(new FileReader(modelFile)) ;
				// Read the number of labels in the model file
				numLabels = Integer.parseInt(br.readLine().trim()) ;
				br.readLine();
				// read numLabels labels and their examples
				for(int labelNumber = 0 ; labelNumber < numLabels ; labelNumber++) {
					String newLabel;
					ArrayList<Example> examples  ;
					int numExamples ;
					newLabel = br.readLine().trim() ;
					if (globalData.labels.contains(newLabel)) {
						Prnt.prn("The label " + newLabel + " was already added to the model. " +
								"Later in the file, we found another list that had the same label and a set of examples underneath it. This is an error. " + 
								"A label can only occur one in the file. All its examples have to be listed underneath it at one place.") ;
						file = null ;
						br.close() ;
						return false ;
					}
					globalData.labels.add(newLabel) ;
					examples = new ArrayList<Example>() ;
					numExamples = Integer.parseInt(br.readLine().trim()) ;
					for(int egNumber = 0 ; egNumber < numExamples ; egNumber++) {
						Example example;
						example = parseExample(br);
						if (example == null) {
							Prnt.prn("Parsing of file failed. Could not parse an example.");
							br.close();
							file = null;
							return false;
						}
						else {
							examples.add(example) ;
						}
					}
					labelToExamplesMap.put(newLabel, examples) ;
					br.readLine() ; // consuming the empty line after each list of label and its examples
				}
				// Creating trainingGraphs for MAX_EXAMPLES_PER_LABEL num of examples
				globalData.trainingGraphs = new ArrayList<GraphInterface>() ;
				selectedExamples = new ArrayList<CRFModelHandler.Example>();
				for(String lbl : globalData.labels) {
					ArrayList<Example> allExamples;
					allExamples = labelToExamplesMap.get(lbl);
					selectedExamples.clear();
					if (allExamples.size() <= MAX_EXAMPLES_PER_LABEL) {
						selectedExamples.addAll(allExamples);
					}
					else {
						RandOps.getRandomlySelectedItemsFromList(allExamples, selectedExamples, MAX_EXAMPLES_PER_LABEL);
					}
					for(Example example : selectedExamples) {
						featureSet(example, features);
						globalData.trainingGraphs.add(new GraphFieldOnly(example.exampleString, lbl, new ArrayList<String>(features), globalData)) ;
					}
				}
				// starting to read in feature-functions and their weights. the first line is the number of such ffs. 
				numFFs = Integer.parseInt(br.readLine().trim()) ;
				ffs = new ArrayList<LblFtrPair>() ;
				weights = new double[numFFs] ;
				for(int ffNumber = 0 ; ffNumber < numFFs ; ffNumber++) {
					String[] lineParts ;
					line = br.readLine().trim() ;
					if (line.length() == 0) {
						Prnt.prn("While reading " + numFFs + " feature functions, we encountered an empty line. This is an error. " +
								"All feature functions have to be listed continuously without any blank lines in between.") ;
						file = null ;
						br.close() ;
						return false ;
					}
					lineParts = line.split("\\s+") ;
					ffs.add(new LblFtrPair(globalData.labels.indexOf(lineParts[0]), lineParts[1])) ;
					weights[ffNumber] = Double.parseDouble(lineParts[2]) ;
				}
				crfModel = new CRFModelFieldOnly(globalData) ;
				crfModel.ffs = ffs ;
				crfModel.weights = weights ;
				globalData.crfModel = crfModel ;
				br.close() ;
				file = modelFile ;
				return true ;
			}
			catch(Exception e) {
				Prnt.prn("Error parsing model file " + modelFile + ".") ;
				file = null ;
				// SHOULD I CLOSE br HERE ?
				return false ;
			}
		}
	}


	/**
	 * @return True if successfully cleared the model. False, otherwise.
	 * This method removes all labels from the CRF model. 
	 * This is effectively same as setting the model to a state, 
	 * where an empty file has been read for the first time.
	 * Since, each change in the model is immediately reflected
	 * in the model file, this method also completely clears the 
	 * model file.
	 * 
	 */
	public boolean removeAllLabels() {
		BufferedWriter bw;
		CRFModelFieldOnly crfModel;
		if (file == null) {
			Prnt.prn("CRF Model is not ready, either because it was never read or an error happened while reading it previously. Please try reading the model file again.");
			return false ;
		}
		try {
			bw = new BufferedWriter(new FileWriter(file)) ;
			bw.write("") ;
			bw.close() ;
		}
		catch(Exception e) {
			Prnt.prn("Clearing the contents of the model file failed.") ;
			file = null ;
			return false ;
		}
		labelToExamplesMap = new HashMap<String, ArrayList<Example>>() ;
		globalData = new GlobalDataFieldOnly() ;
		globalData.trainingGraphs = new ArrayList<GraphInterface>() ;
		crfModel = new CRFModelFieldOnly(globalData) ;
		crfModel.ffs = new ArrayList<LblFtrPair>() ;
		crfModel.weights = new double[0] ;
		globalData.crfModel = crfModel ;
		return true ;
	}



	public boolean removeLabel(String label) {
		int labelIndex;
		ArrayList<Double> weightsList;
		ArrayList<LblFtrPair> otherFFs ;
		double[] newWeights ;
		OptimizeFieldOnly optimizationObject;
		boolean savingSuccessful;
		if (file == null) {
			Prnt.prn("CRF Model is not ready, either because it was never read or an error happened while reading it previously. Please try reading the model file again.");
			return false ;
		}
		if (label == null) {
			Prnt.prn("Illegal value, null, passed for argument @label") ;
			return false ;
		}
		label = label.trim() ;
		labelIndex = globalData.labels.indexOf(label) ;
		if (labelIndex == -1) {
			Prnt.prn("Label " + label + " does not exist in the CRF model.") ;
			return false ;
		}
		globalData.labels.remove(labelIndex) ;
		labelToExamplesMap.remove(label) ;
		for(int i=0;i<globalData.trainingGraphs.size();i++) {
			GraphFieldOnly graph;
			graph = (GraphFieldOnly) globalData.trainingGraphs.get(i) ;
			if (graph.node.labelIndex == labelIndex) {
				globalData.trainingGraphs.remove(i) ;
				i-- ;
			}
			else if(graph.node.labelIndex > labelIndex) {
				graph.node.labelIndex-- ;
			}
		}
		weightsList = new ArrayList<Double>() ;
		otherFFs = new ArrayList<LblFtrPair>() ;
		for(int i=0;i<globalData.crfModel.ffs.size();i++) {
			if (globalData.crfModel.ffs.get(i).labelIndex != labelIndex) {
				otherFFs.add(globalData.crfModel.ffs.get(i)) ;
				weightsList.add(globalData.crfModel.weights[i]) ;
			}
		}
		// Since the label has been removed from dataModel.labels
		// the labels that were after this label in dataModel.labels list
		// will now have their index reduced by 1.
		// Therefore, all ffs that had labelIndex > the index of the label to be removed
		// should have their
		for(LblFtrPair ff : otherFFs) {
			if (ff.labelIndex > labelIndex) {
				ff.labelIndex-- ;
			}
		}
		newWeights = new double[weightsList.size()] ;
		for(int i=0;i<weightsList.size();i++) {
			newWeights[i] = weightsList.get(i) ;
		}
		globalData.crfModel.ffs = otherFFs ;
		globalData.crfModel.weights = newWeights ;
		optimizationObject = new OptimizeFieldOnly(globalData.crfModel, globalData) ;
		optimizationObject.optimize(10) ;
		savingSuccessful = saveModel() ;
		if (!savingSuccessful) {
			file = null ;
		}
		return savingSuccessful ;
	}




	/**
	 * @return Returns list of allowed Characters
	 */
	private ArrayList<String> allowedCharacters() {
		ArrayList<String> allowed = new ArrayList<String>() ;
		// Adding A-Z
		for(int c=65;c<=90;c++) {
			allowed.add(new Character((char) c).toString()) ;
		}
		// Adding a-z
		for(int c=97;c<=122;c++) {
			allowed.add(new Character((char) c).toString()) ;
		}
		// Adding 0-9
		for(int c=48;c<=57;c++) {
			allowed.add(new Character((char) c).toString()) ;
		}
		allowed.add(" ") ;  // adding space
		allowed.add(".") ;  // adding dot
		allowed.add("%") ;  
		allowed.add("@") ;  
		allowed.add("_") ;  
		allowed.add("-") ;  
		allowed.add("*") ;  
		allowed.add("(") ;
		allowed.add(")") ;
		allowed.add("[") ;
		allowed.add("]") ;
		allowed.add("+") ;
		allowed.add("/") ;
		allowed.add("&") ;
		allowed.add(":") ;
		allowed.add(",") ;
		allowed.add(";") ;
		allowed.add("?") ;
		return allowed ;
	}



	/**
	 * @param uncleanList List of all examples
	 * @param cleanedList List with examples that dont have unallowed chars and others such as nulls or empty strings
	 * This method cleans the examples list passed to it. Generally, it is used by other methods to sanitize lists passed from outside.
	 */
	private void cleanedExamplesList(List<String> uncleanList, List<String> cleanedList) {
		cleanedList.clear();
		for(String example : uncleanList) {
			if (example != null) {
				String trimmedExample ;
				trimmedExample = getSanitizedString(example);
				if (trimmedExample.length() != 0) {
					cleanedList.add(trimmedExample) ;
				}
			}
		}
	}


	/**
	 * @param columnName The value passed for the ColumnFeature ColumnHeaderName
	 * @param features The set in which the features extracted about this value will be returned.
	 */
	private void extractFeaturesFromColumnName(String columnName, HashSet<String> features) {
		ArrayList<String> parts;
		HashSet<String> nonDupParts;
		parts = new ArrayList<String>();
		nonDupParts = new HashSet<String>();
		features.clear();
		splitString(columnName, parts);
		nonDupParts.addAll(parts);
		for(String part : nonDupParts) {
			part = part.trim();
			if (part.length() > 0) {
				features.add(part.toLowerCase()) ;
			}
		}
	}

	/**
	 * @param tableName The value passed for the ColumnFeature TableName
	 * @param features The set in which the features extracted about this value will be returned.
	 */
	private void extractFeaturesFromTableName(String tableName, HashSet<String> features) {
		ArrayList<String> parts;
		HashSet<String> nonDupParts;
		parts = new ArrayList<String>();
		nonDupParts = new HashSet<String>();
		features.clear();
		splitString(tableName, parts);
		nonDupParts.addAll(parts);
		for(String part : nonDupParts) {
			part = part.trim();
			if (part.length() > 0) {
				features.add(part.toLowerCase()) ;
			}
		}
	}

	/**
	 * @param field A string from which syntactic features will be extracted
	 * @param features The arg used to return those features.
	 */
	private void featureSet(String field, HashSet<String> features) {
		ArrayList<Part> tokens;
		tokens = Lexer.tokenizeField(field);
		features.clear();
		for(Part token : tokens) {
			features.addAll(RegexFeatureExtractor.getTokenFeatures(token)) ;
		}
	}


	/**
	 * @param example The example for which the features have to extracted
	 * @param features The arg used to return those features.
	 */
	private void featureSet(Example example, HashSet<String> features) {
		HashSet<String> tmpFeatures;
		String featureValue;
		tmpFeatures = new HashSet<String>();
		features.clear();
		// add features about the example string itself
		featureSet(example.exampleString, tmpFeatures);
		features.addAll(tmpFeatures);
		// add ftrs about the example's columnname.
		featureValue = example.getValueForColumnFeature(ColumnFeature.ColumnHeaderName);
		if (featureValue != null) {
			extractFeaturesFromColumnName(featureValue, tmpFeatures);
			features.addAll(tmpFeatures);
		}
		// add ftrs about the example's tablename
		featureValue = example.getValueForColumnFeature(ColumnFeature.TableName);
		if (featureValue != null) {
			extractFeaturesFromTableName(featureValue, tmpFeatures);
			features.addAll(tmpFeatures);
		}
	}


	/**
	 * @param field Field for which features are to be extracted
	 * @param columnFeatures The columnFeatures of the field.
	 * @param features A set used to return the features.
	 * This method just uses the first string in every collection to construct an Example.
	 * It then uses featureSet(Example, HashSet<String>) method to return the features for this created example.
	 */
	private void featureSet(String field, Map<ColumnFeature, Collection<String>> columnFeatures, HashSet<String> features) {
		Example example;
		example = new Example(field);
		if (columnFeatures != null) {
			for(Map.Entry<ColumnFeature, Collection<String>> entry : columnFeatures.entrySet()) {
				Collection<String> ftrValues;
				ftrValues = entry.getValue();
				if (ftrValues != null && ftrValues.size() > 0) {
					for(String ftrValue : ftrValues) {
						example.addColumnFeature(entry.getKey(), ftrValue);
						break;
					}
				}
			}
		}
		featureSet(example, features);
	}
	
	
	private String getSanitizedString(String unsanitizedString) {
		String sanitizedString ;
		sanitizedString = "" ;
		for(int i=0;i<unsanitizedString.length();i++) {
			String charAtIndex;
			charAtIndex = unsanitizedString.substring(i,i+1) ;
			if (allowedCharacters.contains(charAtIndex)) {
				sanitizedString+=charAtIndex ;
			}
		}
		return sanitizedString;
	}

	


	/**
	 * @param array The array of doubles
	 * @return A list containing the same doubles in the same order
	 * A utility method to get a new list having the same values as an array
	 */
	private ArrayList<Double> newListFromDoubleArray(double[] array) {
		ArrayList<Double> newList ;
		newList = new ArrayList<Double>() ;
		for(double element : array) {
			newList.add(element) ;
		}
		return newList ;
	}


	/**
	 * @param br A BufferedReader instance
	 * @return Parsed Example instance.
	 * @throws Exception Mainly IOException
	 * This method starts from wherever the BufferedReader is and keeps reading till it has parsed an entire Example.
	 * Then it returns it.
	 */
	private Example parseExample(BufferedReader br) throws Exception {
		Example example;
		String exampleString;
		int contentLen;
		char c;
		contentLen = parseLengthHeader(br);
		if (contentLen == -1) {
			Prnt.prn("Parsing of file failed since lengthHeader could not be parsed.");
			return null;
		}
		// space has already been consumed
		exampleString = "";
		for(int i=0;i<contentLen;i++) {
			c = (char) br.read();
			exampleString = exampleString + c; 
		}
		example = new Example(exampleString);
		while (true) {
			c = (char) br.read();
			if (10 == (int) c) { // checking for newline character
				break;
			}
			else if (c == ' ') {
				contentLen = parseLengthHeader(br);
				if (contentLen == -1) {
					Prnt.prn("Parsing of file failed since lengthHeader could not be parsed.");
					return null;
				}
				else {
					String columnFeatureStringAndValue, columnFeatureString, columnFeatureValue;
					ColumnFeature columnFeature;
					columnFeatureStringAndValue = "";
					for(int i=0;i<contentLen;i++) {
						c = (char) br.read();
						columnFeatureStringAndValue = columnFeatureStringAndValue + c; 
					}
					columnFeatureString = columnFeatureStringAndValue.split(":")[0];
					columnFeatureValue = columnFeatureStringAndValue.substring(columnFeatureString.length() + 1) ; // to ignore the colon
					columnFeature = null;
					try {
						columnFeature = Enum.valueOf(ColumnFeature.class, columnFeatureString);
					}
					catch (Exception e) {
						Prnt.prn("Parsing of file failed. There is no ColumnFeature called " + columnFeatureString + ".");
						return null;
					}
					example.addColumnFeature(columnFeature, columnFeatureValue);
				}
			}
			else {
				Prnt.prn("Parsing of file failed because found a character other than space or newline after a column feature. The charcter is " + ((int) c));
				return null;
			}
		}
		return example;
	}


	/**
	 * @param br BufferedReader reading the model file.
	 * @return The int value of the string.
	 * @throws Exception
	 */
	private int parseLengthHeader(BufferedReader br) throws Exception {
		String lenHeader;
		char c ;
		int numDigits;
		numDigits = 0;
		lenHeader = "";
		while(true) {
			c = (char)br.read();
			if (c >= '0' && c<= '9') {
				numDigits++;
				lenHeader = lenHeader + c;
				if (numDigits > 5) {
					Prnt.prn("Length marker has more than 5 digits. The program doesn't expect such large entries. Signaling parsing error.");
					return -1;
				}
			}
			else if (c == ' ') {
				if (lenHeader.length() > 0) {
					return Integer.parseInt(lenHeader);
				}
				else {
					return -1;
				}
			}
			else {
				return -1;
			}
		}
	}
	
	
	private void removeGraphsForLabel(int labelIndex) {
		GraphFieldOnly graph;
		for(int i=0;i<globalData.trainingGraphs.size();i++) {
			graph = (GraphFieldOnly) globalData.trainingGraphs.get(i);
			if (graph.node.labelIndex == labelIndex) {
				globalData.trainingGraphs.remove(i);
				i--;
			}
		}
	}
	
	
	/**
	 * @param labelIndex The labelIndex for which the feature functions will be reselected.
	 * @param newFeatureSet The new set of features to be included.
	 * This method takes in a set of features and a labelIndex. 
	 * It removes all existing feature functions for this labelIndex.
	 * It adds new feature functions for the features supplied.
	 * It then sets the weights for feature functions that already existed to their old values.
	 * It sets the weights for all new feature functions to zero.
	 */
	private void reselectFFs(int labelIndex, Set<String> newFeatureSet) {
		ArrayList<LblFtrPair> ffsOfLabel, otherFFs;
		ArrayList<Double> weightsOfFFsOfLabel, weightsOfOtherFFs;
		// separate the label ffs and weights from other ffs and weights
		ffsOfLabel = new ArrayList<LblFtrPair>() ;
		otherFFs = new ArrayList<LblFtrPair>() ;
		weightsOfFFsOfLabel = new ArrayList<Double>() ;
		weightsOfOtherFFs = new ArrayList<Double>() ;
		for(int ffIndex=0;ffIndex<globalData.crfModel.ffs.size();ffIndex++) {
			LblFtrPair ff;
			ff = globalData.crfModel.ffs.get(ffIndex);
			if (ff.labelIndex == labelIndex) {
				ffsOfLabel.add(ff) ;
				weightsOfFFsOfLabel.add(globalData.crfModel.weights[ffIndex]);
			}
			else {
				otherFFs.add(ff) ;
				weightsOfOtherFFs.add(globalData.crfModel.weights[ffIndex]);
			}
		}
		// from the existing ffs of this label, if any of them have a selected feature, then add it to the other ffs and its learned weight
		for(int ffIndex=0;ffIndex<ffsOfLabel.size();ffIndex++) {
			LblFtrPair ff;
			ff = ffsOfLabel.get(ffIndex);
			if (newFeatureSet.contains(ff.feature)) {
				otherFFs.add(ff);
				weightsOfOtherFFs.add(weightsOfFFsOfLabel.get(ffIndex)) ;
				newFeatureSet.remove(ff.feature);
			}
		}
		// create new ffs for all other selected features and add zero as their weight
		for(String ftr : newFeatureSet) {
			otherFFs.add(new LblFtrPair(labelIndex, ftr));
			weightsOfOtherFFs.add(0.0);
		}
		// reset the ffs and the weights array
		globalData.crfModel.ffs = otherFFs ;
		globalData.crfModel.weights = new double[otherFFs.size()];
		for(int i=0;i<otherFFs.size();i++) {
			globalData.crfModel.weights[i] = weightsOfOtherFFs.get(i) ;
		}
	}

	private void selectFeatureSetWithWeightedProbability(List<String> allFeatures, Set<String> selectedFeatureSet) {
		ArrayList<String> tmpAllFeatures, tmpFeatures;
		Random random ;
		tmpAllFeatures = new ArrayList<String>(allFeatures);
		tmpFeatures = new ArrayList<String>();
		random = new Random();
		selectedFeatureSet.clear();
		for(int i=0;i<MAX_FFs_PER_LABEL;i++) {
			String ftr;
			ftr = tmpAllFeatures.get(random.nextInt(tmpAllFeatures.size()));
			selectedFeatureSet.add(ftr);
			tmpFeatures.clear();
			tmpFeatures.add(ftr);
			tmpAllFeatures.removeAll(tmpFeatures);
		}
	}
	

	/**
	 * This method writes the model in memory to the file that it was read from.
	 * @return true, if writing is successful, else return, false
	 */
	private boolean saveModel() {
		try {
			BufferedWriter bw;
			bw = new BufferedWriter(new FileWriter(file)) ;
			// Write the number of labels and then a blank line
			bw.write(globalData.labels.size() + "\n") ;
			// Insert an empty line
			bw.write("\n");
			// Write name of label and then list its examples.
			for(String label : globalData.labels) {
				ArrayList<Example> examples;
				bw.write(label + "\n") ;
				examples = labelToExamplesMap.get(label) ;
				
				// Get random MAX_EXAMPLES_SAVED_PER_LABEL number of examples to be saved
				if (examples.size() > MAX_EXAMPLES_SAVED_PER_LABEL) {
					Collections.shuffle(examples);
					ArrayList<Example> subsetOfExamples = new ArrayList<Example>();
					for (int i=0; i< MAX_EXAMPLES_SAVED_PER_LABEL; i++) {
						subsetOfExamples.add(examples.get(i));
					}
					examples = subsetOfExamples;
				}
				
				bw.write(examples.size() + "\n") ;
				for(Example example : examples) {
					bw.write(example.exampleString.length() + " " + example.exampleString) ;
					for(Map.Entry<ColumnFeature, String> entry : example.columnFeatures.entrySet()) {
						if (entry.getValue() != null) {
							String featureValue;
							featureValue = entry.getKey().toString() + ":" + entry.getValue();
							bw.write(" " + featureValue.length() + " " + featureValue);
						}
					}
					bw.write("\n");
				}
				bw.write("\n") ;
			}
			// write all the feature functions
			bw.write(globalData.crfModel.ffs.size() + "\n") ;
			for(int ffIndex = 0;ffIndex<globalData.crfModel.ffs.size();ffIndex++) {
				LblFtrPair ff;
				ff = globalData.crfModel.ffs.get(ffIndex) ;
				bw.write(globalData.labels.get(ff.labelIndex) + " " + ff.feature + " " + globalData.crfModel.weights[ffIndex] + "\n") ;
			}
			bw.close() ;
			return true ;
		}
		catch(Exception e) {
			Prnt.prn("Writing the model to file " + file + " failed. The file can be inconsistent with the model in memory until it is successfully written.") ;
			return false ;
		}
	}



	/**
	 * @param str The string to be split
	 * @param parts The list in which the parts will be returned
	 * @return True, if successful. False, if errors like null args.
	 */
	private boolean splitString(String str, ArrayList<String> parts) {
		HashSet<String> splitters;
		ArrayList<String> tmpParts;
		// basic argument sanity check
		if (str == null || parts == null) {
			return false;
		}
		// creating the preset splitters
		splitters = new HashSet<String>();
		splitters.add("\\s+");
		splitters.add("_");
		tmpParts = new ArrayList<String>();
		// setting up the arraylist for iterative processing
		parts.clear();
		parts.add(str);
		// iterate over all splitters
		for(String splitter : splitters) {
			tmpParts.clear();
			for(String part : parts) {
				String[] tokens;
				tokens = part.split(splitter);
				for(String token : tokens) {
					if (token.length() != 0) {
						tmpParts.add(token);
					}
				}
			}
			parts.clear();
			parts.addAll(tmpParts);
		}
		return true;
	}

} // end of class CRFModelHandlerNew



/*

public static boolean getWeightedFeatureFunctionSums(String example, Map<ColumnFeature, Collection<String>> columnFeatures, List<Double> sums) {
	GraphFieldOnly exampleGraph ;
	HashSet<String> features;
	double[] ffSums;
	MAPFieldOnly mapPredictor;
	features = new HashSet<String>();
	featureSet(example, columnFeatures, features);
	exampleGraph = new GraphFieldOnly(example, null, new ArrayList<String>(features), globalData) ;
	mapPredictor = new MAPFieldOnly(globalData);
	ffSums = mapPredictor.weightedFeatureFunctionSums(exampleGraph);
	sums.clear();
	for(double sum : ffSums) {
		sums.add(sum);
	}
	return true;
}


private static boolean addOrUpdateLabel(String label, List<String> examples) {
	if (file == null) {
		Prnt.prn("CRF Model is not ready, either because it was never read or an error happened while reading it previously. Please try reading the model file again.");
		return false ;
	}
	else {
		return addOrUpdateLabel(label, examples, null) ;
	}
}



 * @param examples - list of examples of an unknown type
 * @param numPredictions - required number of predictions in descending order
 * @param predictedLabels - the argument in which the ordered list of labels is returned. the size of this list could be smaller than numPredictions
 * 							if there aren't that many labels in the model already
 * @param confidenceScores - the probability of the examples belonging to the labels returned.
 * @return
private static boolean predictLabelForExamples(
		List<String> examples,
		int numPredictions,
		List<String> predictedLabels,
		List<Double> confidenceScores
		) {
	if (CRFModelHandler.file == null) {
		Prnt.prn("CRF Model is not ready, either because it was never read or an error happened while reading it previously. Please try reading the model file again.");
		return false ;
	}
	else {
		return predictLabelForExamples(examples, numPredictions, predictedLabels, confidenceScores, null) ;
	}
}

 * @param examples - list of examples of an unknown type
 * @param numPredictions - required number of predictions in descending order
 * @param predictedLabels - the argument in which the ordered list of labels is returned. the size of this list could be smaller than numPredictions
 * 							if there aren't that many labels in the model already
 * @param confidenceScores - the probability of the examples belonging to the labels returned.
 * @param exampleProbabilities - the size() == examples.size(). It contains, for each example, in the same order, a double array that contains the probability 
 * 									of belonging to the labels returned in predictedLabels.
 * @return
private static boolean predictLabelForExamples(
		List<String> examples,
		int numPredictions, 
		List<String> predictedLabels, 
		List<Double> confidenceScores, 
		List<double[]> exampleProbabilities
		) {
	if (CRFModelHandler.file == null) {
		Prnt.prn("CRF Model is not ready, either because it was never read or an error happened while reading it previously. Please try reading the model file again.");
		return false ;
	}
	else {
		return predictLabelForExamples(examples, numPredictions, predictedLabels, confidenceScores, exampleProbabilities, null) ;
	}
}

 */