/*
* Copyright 2012
* Ubiquitous Knowledge Processing (UKP) Lab
* Technische Universität Darmstadt
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package de.tudarmstadt.ukp.dkpro.core.textnormalizer.factory;
import static org.apache.uima.fit.factory.AnalysisEngineFactory.createEngineDescription;
import org.apache.uima.analysis_engine.AnalysisEngineDescription;
import org.apache.uima.cas.CAS;
import org.apache.uima.fit.factory.AggregateBuilder;
import org.apache.uima.resource.ExternalResourceDescription;
import org.apache.uima.resource.ResourceInitializationException;
import de.tudarmstadt.ukp.dkpro.core.castransformation.ApplyChangesAnnotator;
import de.tudarmstadt.ukp.dkpro.core.jazzy.JazzyChecker;
import de.tudarmstadt.ukp.dkpro.core.textnormalizer.ReplacementFileNormalizer;
import de.tudarmstadt.ukp.dkpro.core.textnormalizer.ReplacementFileNormalizer.SrcSurroundings;
import de.tudarmstadt.ukp.dkpro.core.textnormalizer.ReplacementFileNormalizer.TargetSurroundings;
import de.tudarmstadt.ukp.dkpro.core.textnormalizer.SpellingNormalizer;
import de.tudarmstadt.ukp.dkpro.core.textnormalizer.frequency.CapitalizationNormalizer;
import de.tudarmstadt.ukp.dkpro.core.textnormalizer.frequency.ExpressiveLengtheningNormalizer;
import de.tudarmstadt.ukp.dkpro.core.textnormalizer.frequency.SharpSNormalizer;
import de.tudarmstadt.ukp.dkpro.core.tokit.BreakIteratorSegmenter;
@Deprecated
public class NormalizerFactory
{
private int view_counter = 0;
public AnalysisEngineDescription getSpellcorrection(String aModelLocation)
throws ResourceInitializationException
{
AggregateBuilder ab = new AggregateBuilder();
ab.add(createEngineDescription(BreakIteratorSegmenter.class),
CAS.NAME_DEFAULT_SOFA, getSourceView());
ab.add(createEngineDescription(JazzyChecker.class,
JazzyChecker.PARAM_MODEL_LOCATION, aModelLocation),
CAS.NAME_DEFAULT_SOFA, getSourceView());
ab.add(createEngineDescription(SpellingNormalizer.class),
CAS.NAME_DEFAULT_SOFA, getSourceView());
ab.add(createEngineDescription(ApplyChangesAnnotator.class),
ApplyChangesAnnotator.VIEW_SOURCE, getSourceView(),
ApplyChangesAnnotator.VIEW_TARGET, getTargetView());
AnalysisEngineDescription aed = ab.createAggregateDescription();
aed.setAnnotatorImplementationName("Spell");
return aed;
}
public AnalysisEngineDescription getUmlautSharpSNormalization(
ExternalResourceDescription aFrequencyProvider, int aMinFrequency)
throws ResourceInitializationException
{
AggregateBuilder ab = new AggregateBuilder();
ab.add(createEngineDescription(BreakIteratorSegmenter.class),
CAS.NAME_DEFAULT_SOFA, getSourceView());
ab.add(createEngineDescription( SharpSNormalizer.class,
SharpSNormalizer.FREQUENCY_PROVIDER, aFrequencyProvider,
SharpSNormalizer.PARAM_MIN_FREQUENCY_THRESHOLD, aMinFrequency),
CAS.NAME_DEFAULT_SOFA, getSourceView());
ab.add(createEngineDescription(ApplyChangesAnnotator.class),
ApplyChangesAnnotator.VIEW_SOURCE, getSourceView(),
ApplyChangesAnnotator.VIEW_TARGET, getTargetView());
AnalysisEngineDescription aed = ab.createAggregateDescription();
aed.setAnnotatorImplementationName("Umlaute");
return aed;
}
public AnalysisEngineDescription getReplacementNormalization(String aModelLocation,
SrcSurroundings aSrc, TargetSurroundings aTarget)
throws ResourceInitializationException
{
AggregateBuilder ab = new AggregateBuilder();
ab.add(createEngineDescription(
ReplacementFileNormalizer.class,
ReplacementFileNormalizer.PARAM_MODEL_LOCATION, aModelLocation,
ReplacementFileNormalizer.PARAM_SRC_SURROUNDINGS, aSrc,
ReplacementFileNormalizer.PARAM_TARGET_SURROUNDINGS, aTarget),
CAS.NAME_DEFAULT_SOFA, getSourceView());
ab.add(createEngineDescription(ApplyChangesAnnotator.class),
ApplyChangesAnnotator.VIEW_SOURCE, getSourceView(),
ApplyChangesAnnotator.VIEW_TARGET, getTargetView());
AnalysisEngineDescription aed = ab.createAggregateDescription();
// aed.setAnnotatorImplementationName(new File(filepath).getName().split("\\")[0]);
return aed;
}
public AnalysisEngineDescription getExpressiveLengtheningNormalization(
ExternalResourceDescription aFrequencyProvider)
throws ResourceInitializationException
{
AggregateBuilder ab = new AggregateBuilder();
ab.add(createEngineDescription(BreakIteratorSegmenter.class),
CAS.NAME_DEFAULT_SOFA, getSourceView());
ab.add(createEngineDescription(ExpressiveLengtheningNormalizer.class,
ExpressiveLengtheningNormalizer.FREQUENCY_PROVIDER, aFrequencyProvider),
CAS.NAME_DEFAULT_SOFA, getSourceView());
ab.add(createEngineDescription(ApplyChangesAnnotator.class),
ApplyChangesAnnotator.VIEW_SOURCE, getSourceView(),
ApplyChangesAnnotator.VIEW_TARGET, getTargetView());
AnalysisEngineDescription aed = ab.createAggregateDescription();
aed.setAnnotatorImplementationName("Lengthening");
return aed;
}
public AnalysisEngineDescription getCapitalizationNormalization(
ExternalResourceDescription aFrequencyProvider)
throws ResourceInitializationException
{
AggregateBuilder ab = new AggregateBuilder();
ab.add(createEngineDescription(BreakIteratorSegmenter.class),
CAS.NAME_DEFAULT_SOFA, getSourceView());
ab.add(createEngineDescription(CapitalizationNormalizer.class,
CapitalizationNormalizer.FREQUENCY_PROVIDER, aFrequencyProvider),
CAS.NAME_DEFAULT_SOFA, getSourceView());
ab.add(createEngineDescription(ApplyChangesAnnotator.class),
ApplyChangesAnnotator.VIEW_SOURCE, getSourceView(),
ApplyChangesAnnotator.VIEW_TARGET, getTargetView());
AnalysisEngineDescription aed = ab.createAggregateDescription();
aed.setAnnotatorImplementationName("Capitalization");
return aed;
}
protected String getSourceView()
{
return (view_counter > 0) ? "view" + view_counter : CAS.NAME_DEFAULT_SOFA;
}
protected String getTargetView()
{
return "view" + ++view_counter;
}
public String getOutputView()
{
return "view" + view_counter;
}
}