/* * Cloud9: A MapReduce Library for Hadoop * * Licensed under the Apache License, Version 2.0 (the "License"); you * may not use this file except in compliance with the License. You may * obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or * implied. See the License for the specific language governing * permissions and limitations under the License. */ package edu.umd.cloud9.webgraph.normalizer; /** * * An interface for normalizing lines of anchor text. A Normalizer consists of an stemming, * removing the stop-word, and other processes that lines of anchor text would need to go through, * such as converting to lower case, etc. * * @author Nima Asadi * */ public interface AnchorTextNormalizer { /** * * Stemmer. * * @param anchor original line of anchor text * @return A line of anchor text where every word from the original anchor text is stemmed. */ public String stem(String anchor); /** * * @param anchor original line of anchor text * @return A line of anchor text where the anchor text is normalized (lower-case, etc.) */ public String normalize(String anchor); /** * * Removes the stop-words * * @param anchor original line of anchor text * @return A line of anchor text without the stop-words */ public String removeStopWords(String anchor); /** * * An auxiliary method which performs any other types of normalization. * * @param anchor original line of anchor text * @return A processed line of anchor text. */ public String process(String anchor); }