package de.spieleck.app.cngram ; /* Please see the license information in the header below. */ /* NGramJ - n-gram based text classification Copyright (C) 2001- Frank S. Nestel (frank at spieleck.de) This program is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 2.1 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this program (lesser.txt); if not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ import java.util.Iterator ; import java.util.Comparator ; /** * A device to keep a bunch of ngram statistics. * @author frank nestel * @author $Author: nestefan $ * @version $Revision: 2 $ $Date: 2006-03-27 23:00:21 +0200 (Mo, 27 Mrz 2006) $ $Author: nestefan $ */ public interface NGramProfile { public static final String NGRAM_PROFILE_EXTENSION = "ngp" ; public static final String NORMALIZATION_STR = "ngram_count" ; public static final String FINISHREAD_STR = "#END" ; public static final NGram[] NO_NGRAM = new NGram[ 0 ] ; public final static Comparator CHAR_SEQ_COMPARATOR = new Comparator() { public int compare( Object o1 , Object o2 ) { CharSequence c1 = (CharSequence)o1 ; CharSequence c2 = (CharSequence)o2 ; for ( int i = 0 ; i < c1.length() && i < c2.length() ; i++ ) { int d = c1.charAt( i ) - c2.charAt( i ) ; if ( d != 0 ) { return d ; } } return c2.length() - c1.length() ; } } ; /** * Return sorted ngrams * * @return sorted ngrams */ public Iterator getSorted() ; /** * @return Returns the number of ngrams. */ public int getCount() ; /** * @return Returns the name. */ public String getName() ; /** * Get the normalization of all NGrams contained. */ public int getNormalization() ; /** * @return NGram corresponding to seq, null if not found. */ public NGram get( CharSequence seq ) ; }