package edu.northwestern.at.utils.corpuslinguistics.lexicon; /* Please see the license information at the end of this file. */ import java.util.*; import java.io.*; import java.net.URL; import edu.northwestern.at.utils.*; /** LexiconEntry: A lexicon entry. * * <p> * A LexiconEntry contains the following information about a * particular spelling. * </p> * * <ul> * </ul> */ public class LexiconEntry implements Serializable, XCloneable { /** The lexicon entry string. */ public String entry; /** Standardized lexicon entry string. */ public String standardEntry; /** Map with categories as keys and lemmatized entries as values. */ public Map<String, String> lemmata; /** The spelling count. */ public int entryCount; /** Map with categories as keys and counts as values. */ public Map<String, MutableInteger> categoriesAndCounts; /** Category with largest count. */ public String largestCategory; /** Count for largest category. */ public int largestCategoryCount; /** Create a LexiconEntry. */ public LexiconEntry ( String entry , String standardEntry , int entryCount , Map<String, MutableInteger> categoriesAndCounts , Map<String, String> lemmata ) { this.entry = entry; this.standardEntry = standardEntry; this.entryCount = entryCount; this.categoriesAndCounts = categoriesAndCounts; this.lemmata = lemmata; String largestCategory = ""; int largestCategoryCount = 0; determineLargestCategory(); } /** Find the category with the largest count. */ public void determineLargestCategory() { for ( String category : categoriesAndCounts.keySet() ) { MutableInteger count = categoriesAndCounts.get( category ); if ( count.intValue() > largestCategoryCount ) { largestCategoryCount = count.intValue(); largestCategory = category; } } } /** Add or update entry in categories and counts map. * * @param category Category for which to add/update count. * @param count Category count to add to entry. * May be negative. */ public void updateCategoryAndCount( String category , int count ) { MutableInteger currentCount = categoriesAndCounts.get( category ); if ( ( currentCount == null ) && ( count > 0 ) ) { categoriesAndCounts.put( category , new MutableInteger( count ) ); } else { currentCount.setValue( currentCount.intValue() + count ); if ( currentCount.intValue() <= 0 ) { categoriesAndCounts.remove( category ); determineLargestCategory(); } } } /** Get category count. * * @param category Get number of times category appears * in this lexicon entry.. * * @return Category count. */ public int getCategoryCount( String category ) { int result = 0; if ( categoriesAndCounts.get( category ) != null ) { result = categoriesAndCounts.get( category ).intValue(); } return result; } /** Get the categories, sorted in ascending order. * * @return The sorted category strings as an array of string. */ public String[] getCategories() { // Get category strings. Set<String> categorySet = categoriesAndCounts.keySet(); // Store categories in a String array. String[] categories = (String[])categorySet.toArray( new String[ categorySet.size() ] ); // Sort the categories. Arrays.sort( categories ); // Return sorted categories. return categories; } /** Add/update lemma for a category. * * @param category Category for which to add lemma. * @param lemma Lemma. */ public void updateLemma( String category , String lemma ) { if ( lemma != null ) lemmata.put( category , lemma ); } /** Get lemma for a category. * * @param category Category for which to add lemma. * * @return The lemma. */ public String getLemma( String category ) { String result = lemmata.get( category ); if ( ( result == null ) || ( result.length() == 0 ) ) { result = "*"; } return result; } /** Get String array containing lexicon data suitable for output. * * @return String array containing lexicon data items. * * <p> * The result String array contains the following entries: * </p> * * <p> * <code> * result[0] : entry<br /> * result[1] : entry count<br /> * result[2] : first category tag<br /> * result[3] : first category lemma<br /> * result[4] : first category count<br /> * result[5] : second category tag, if any<br /> * result[6] : second category lemma<br /> * result[7] : second category count, if any<br /> * ... * </code> * </p> */ public String[] getLexiconEntryData() { String[] result = new String[ 3 * categoriesAndCounts.keySet().size() + 2 ]; result[ 0 ] = entry; result[ 1 ] = entryCount + ""; result[ 2 ] = largestCategory; result[ 3 ] = getLemma( largestCategory ); result[ 4 ] = largestCategoryCount + ""; int k = 5; for ( String category : categoriesAndCounts.keySet() ) { if ( !category.equals( largestCategory ) ) { MutableInteger count = categoriesAndCounts.get( category ); result[ k++ ] = category; result[ k++ ] = getLemma( category ); result[ k++ ] = count + ""; } } return result; } /** Deep clone of categories and counts map. * * @return Deep clone of the categories and counts map. */ protected Map<String, MutableInteger> categoriesAndCountsClone() { Map<String, MutableInteger> result = MapFactory.createNewMap( categoriesAndCounts.size() ); for ( String category : categoriesAndCounts.keySet() ) { MutableInteger count = categoriesAndCounts.get( category ); result.put ( new String( category ) , new MutableInteger( count.intValue() ) ); } return result; } /** Deep clone of lemmata map. * * @return Deep clone of the lemmata map. */ protected Map<String, String> lemmataClone() { Map<String, String> result = MapFactory.createNewMap( lemmata.size() ); for ( String key : lemmata.keySet() ) { String data = lemmata.get( key ); result.put( new String( key ) , new String( data ) ); } return result; } /** Clone this lexicon entry. * * @return A deep clone of this lexicon entry. */ public Object clone() { return new LexiconEntry ( entry , standardEntry , entryCount , categoriesAndCountsClone(), lemmataClone() ); } /** Deep clone of this lexicon entry. * * @return A deep clone of this lexicon entry. */ public LexiconEntry deepClone() { return (LexiconEntry)clone(); } } /* Copyright (c) 2008, 2009 by Northwestern University. All rights reserved. Developed by: Academic and Research Technologies Northwestern University http://www.it.northwestern.edu/about/departments/at/ Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal with the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimers. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimers in the documentation and/or other materials provided with the distribution. * Neither the names of Academic and Research Technologies, Northwestern University, nor the names of its contributors may be used to endorse or promote products derived from this Software without specific prior written permission. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE SOFTWARE. */