/*
* Copyright 2011
* Ubiquitous Knowledge Processing (UKP) Lab
* Technische Universität Darmstadt
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package de.tudarmstadt.ukp.dkpro.core.frequency;
import java.io.IOException;
import java.util.Iterator;
import com.googlecode.jweb1t.JWeb1TIterator;
import com.googlecode.jweb1t.Searcher;
import de.tudarmstadt.ukp.dkpro.core.api.frequency.provider.FrequencyCountProviderBase;
public abstract class Web1TProviderBase
extends FrequencyCountProviderBase
{
public static final String BOS = "<S>";
public static final String EOS = "</S>";
protected Searcher searcher;
protected String basePath;
protected String language;
@Override
public long getNrOfNgrams(int n)
{
return searcher.getNrOfNgrams(n);
}
@Override
public long getNrOfTokens()
{
return searcher.getNrOfNgrams(1);
}
@Override
public long getNrOfDistinctNgrams(int n)
{
return searcher.getNrOfDistinctNgrams(n);
}
@Override
protected long getFrequencyFromProvider(String phrase)
throws IOException
{
return searcher.getFrequency(phrase);
}
@Override
public Iterator<String> getNgramIterator(int n)
throws IOException
{
return new JWeb1TIterator(basePath, n).getIterator();
}
@Override
public String getLanguage()
{
return this.language;
}
}