GfGrammar.java example

Explorer

AceWiki-master
- src
  - ch
    - uzh
      - ifi
        attempto
        aceeditor
        ACEEditor.java
        ACEEditorGrammar.java
        ACEEditorMenuCreator.java
        ACEEditorServlet.java
        LexiconHandler.java
        MenuBar.java
        ResultItem.java
        TextEntry.java
        Word.java
        package-info.java
        test
        ChartParserGenerationTest.java
        ChartParserParsingTest.java
        TestGrammar.java
        acewiki
        AceWikiApp.java
        AceWikiServlet.java
        Backend.java
        BackendServlet.java
        Task.java
        Wiki.java
        aceowl
        ACEDeclaration.java
        ACEGrammar.java
        ACEHandler.java
        ACELexiconExporter.java
        ACEOWLEngine.java
        ACEOWLLexicon.java
        ACEOWLOntoElement.java
        ACEQuestion.java
        ACESentence.java
        ACETextExporter.java
        ACETextOperator.java
        AToEverySuggestion.java
        NounChanger.java
        NounConcept.java
        NounOfChanger.java
        OfRelation.java
        ProperNameChanger.java
        ProperNameIndividual.java
        TrAdjChanger.java
        TrAdjRelation.java
        VerbChanger.java
        VerbRelation.java
        package-info.java
        core
        AbstractAceWikiEngine.java
        AbstractLanguageHandler.java
        AbstractModuleElement.java
        AbstractOntologyElement.java
        AbstractSentence.java
        AbstractStatement.java
        AceWikiConfig.java
        AceWikiDataExporter.java
        AceWikiEngine.java
        AceWikiGrammarEditor.java
        AceWikiReasoner.java
        AceWikiStorage.java
        AnswerElement.java
        Article.java
        CachingReasoner.java
        Comment.java
        Concept.java
        Declaration.java
        DefaultWordIndex.java
        DummyReasoner.java
        EditorController.java
        FileBasedStorage.java
        GeneralTopic.java
        GrammarEditorResult.java
        InconsistencyException.java
        Individual.java
        InvalidWordException.java
        LanguageHandler.java
        LanguageUtils.java
        LexiconChanger.java
        LexiconDetail.java
        LexiconTableExporter.java
        MetaOntologyElement.java
        ModuleElement.java
        MonolingualEngine.java
        MonolingualHandler.java
        MonolingualSentence.java
        MonolingualStatement.java
        MultilingualSentence.java
        Ontology.java
        OntologyElement.java
        OntologyExportManager.java
        OntologyExporter.java
        OntologyTextElement.java
        PrettyTextElement.java
        Question.java
        Relation.java
        Sentence.java
        SentenceDetail.java
        SentenceSuggestion.java
        Statement.java
        StatementFactory.java
        StatementTableExporter.java
        TopicChanger.java
        TopicElement.java
        User.java
        UserBase.java
        UserProvider.java
        WordIndex.java
        package-info.java
        gf
        GfDeclaration.java
        GfEditorControllerFactory.java
        GfEngine.java
        GfGrammar.java
        GfGrammarEditor.java
        GfHandler.java
        GfLexiconEditor.java
        GfLexiconEditorCellRenderer.java
        GfLexiconEditorModel.java
        GfModuleChanger.java
        GfOwlConverter.java
        GfParameters.java
        GfPredictiveParser.java
        GfQuestion.java
        GfReportExporter.java
        GfSentence.java
        GfTextOperator.java
        GfWikiEntry.java
        GfWikiUtils.java
        SimpleAnswerElement.java
        TreeList.java
        TypeGfModule.java
        gui
        AboutPage.java
        AceWikiMenuCreator.java
        AlternativesWindow.java
        AnswerList.java
        ArticlePage.java
        AssignmentsPage.java
        CommentComponent.java
        CommentEditorHandler.java
        CompTable.java
        ConceptPage.java
        DetailsPage.java
        EditorDialog.java
        ErrorPage.java
        Executable.java
        ExportWindow.java
        FormPane.java
        GeneralPage.java
        GrammarPage.java
        GuiUtils.java
        HierarchyPage.java
        IconButton.java
        IndexBar.java
        IndexPage.java
        IndividualPage.java
        IndividualsPage.java
        LexiconEditorPage.java
        ListItem.java
        LoginWindow.java
        ModulePage.java
        NameValueTable.java
        RecalcIcon.java
        ReferencesPage.java
        RegisterWindow.java
        RelationPage.java
        SearchPage.java
        SectionTitle.java
        SentenceComponent.java
        SentenceEditorHandler.java
        SentencePage.java
        StartPage.java
        StatementMenu.java
        TabRow.java
        TextEditorDialog.java
        TextRow.java
        Title.java
        TranslationsPage.java
        UserWindow.java
        WebLink.java
        WikiLink.java
        WikiPage.java
        package-info.java
        owl
        AbstractAceWikiOWLReasoner.java
        AbstractOWLOntoElement.java
        AceWikiOWLReasoner.java
        AceWikiOWLReasoner2.java
        OWLConcept.java
        OWLDeclaration.java
        OWLFunctionalSyntaxExporter.java
        OWLIndividual.java
        OWLOntoElement.java
        OWLQuestion.java
        OWLRelation.java
        OWLSentence.java
        OWLXMLExporter.java
        package-info.java
        package-info.java
        base
        APE.java
        AbstractOption.java
        ConcreteOption.java
        DefaultTextOperator.java
        LoggerContext.java
        MultiTextContainer.java
        NextTokenOptions.java
        PredictiveParser.java
        SimpleAbstractOption.java
        SimpleConcreteOption.java
        SimpleNextTokenOptions.java
        TextContainer.java
        TextElement.java
        TextOperator.java
        package-info.java
        chartparser
        Annotation.java
        BackrefCategory.java
        CPAbstractOption.java
        CPConcreteOption.java
        CPNextTokenOptions.java
        Category.java
        Chart.java
        ChartParser.java
        DynamicLexicon.java
        Edge.java
        FeatureMap.java
        Grammar.java
        GrammarRule.java
        LexicalRule.java
        Nonterminal.java
        ParseTree.java
        ParseTreeNode.java
        Preterminal.java
        StringObject.java
        StringRef.java
        Terminal.java
        UnificationFailedException.java
        package-info.java
        codeco
        package-info.java
        echocomp
        CheckBox.java
        DelayedComponent.java
        EchoThread.java
        GeneralButton.java
        GeneralWindow.java
        HSpace.java
        Label.java
        LocaleResources.java
        MessageWindow.java
        PasswordField.java
        RadioButton.java
        SimpleErrorMessageWindow.java
        SmallButton.java
        SolidLabel.java
        SquareButton.java
        Style.java
        TabSensitiveTextField.java
        TabSensitiveTextFieldPeer.java
        TextArea.java
        TextAreaWindow.java
        TextField.java
        TextFieldWindow.java
        UploadWindow.java
        VSpace.java
        package-info.java
        preditor
        DefaultMenuCreator.java
        DefaultMenuItemComparator.java
        MenuBlock.java
        MenuBlockColumn.java
        MenuBlockContent.java
        MenuBlockManager.java
        MenuCreator.java
        MenuEntry.java
        MenuItem.java
        PreditorWindow.java
        SpecialMenuItem.java
        WordEditorForm.java
        WordEditorWindow.java
        package-info.java

// This file is part of AceWiki.
// Copyright 2008-2013, AceWiki developers.
//
// AceWiki is free software: you can redistribute it and/or modify it under the terms of the GNU
// Lesser General Public License as published by the Free Software Foundation, either version 3 of
// the License, or (at your option) any later version.
//
// AceWiki is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without
// even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
// Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License along with AceWiki. If
// not, see http://www.gnu.org/licenses/.

package ch.uzh.ifi.attempto.acewiki.gf;

import java.net.URI;
import java.net.URISyntaxException;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.google.common.base.Functions;
import com.google.common.base.Joiner;
import com.google.common.base.Splitter;
import com.google.common.collect.HashMultimap;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.Maps;
import com.google.common.collect.Multimap;
import com.google.common.collect.Ordering;
import com.google.common.collect.Sets;

import ch.uzh.ifi.attempto.acewiki.core.Ontology;
import ch.uzh.ifi.attempto.gfservice.GfModule;
import ch.uzh.ifi.attempto.gfservice.GfParseResult;
import ch.uzh.ifi.attempto.gfservice.GfService;
import ch.uzh.ifi.attempto.gfservice.GfServiceException;
import ch.uzh.ifi.attempto.gfservice.GfServiceResultBrowseAll;
import ch.uzh.ifi.attempto.gfservice.GfServiceResultComplete;
import ch.uzh.ifi.attempto.gfservice.GfServiceResultGrammar;
import ch.uzh.ifi.attempto.gfservice.GfServiceResultLinearize;
import ch.uzh.ifi.attempto.gfservice.GfServiceResultLinearizeAll;
import ch.uzh.ifi.attempto.gfservice.GfServiceResultParse;
import ch.uzh.ifi.attempto.gfservice.GfServiceResultRandom;
import ch.uzh.ifi.attempto.gfservice.GfStorage;
import ch.uzh.ifi.attempto.gfservice.GfStorageResult;
import ch.uzh.ifi.attempto.gfservice.GfStorageResultLs;
import ch.uzh.ifi.attempto.gfservice.gfwebservice.GfWebService;
import ch.uzh.ifi.attempto.gfservice.gfwebservice.GfWebStorage;


/**
 * This class wraps GF features of a particular GF grammar.
 *
 * TODO: move ACE-specific stuff out of this class
 *
 * @author Kaarel Kaljurand
 */
public class GfGrammar {

	// TODO: let the user configure the size of the ambiguity
	public final static int GF_PARSE_LIMIT = 10;

	private final int LINEARIZE_ALL_QUERY_LIMIT;

	private final Logger mLogger = LoggerFactory.getLogger(GfGrammar.class);

	// Some naming conventions
	public final static String PREFIX_DISAMB = "Disamb";
	public final static String SUFFIX_APE = "Ape";
	public final static String EXTENSION_GF = ".gf";
	public final static String EXTENSION_GFO = ".gfo";

	// Note that true can remove (always removes?) lins
	// which are not available in all the concretes,
	// i.e. if you add a lin then you need to add it too all the concretes
	// otherwise you cannot use it in a sentence.
	private final static boolean OPTIMIZE_PGF = true;

	private final static int GF_APE_FIELD_LOGICAL_SYMBOL = 3;

	private final static char GF_TOKEN_SEPARATOR = ' ';
	private final static char GF_TREE_SEPARATOR = '|';
	private final static char GF_APE_SEPARATOR = '|';
	private final static String GF_SERIALIZATION_SEPARATOR = "||";

	public final static Joiner GF_TREE_JOINER = Joiner.on(GF_TREE_SEPARATOR);
	public final static Joiner GF_SERIALIZATION_JOINER = Joiner.on(GF_SERIALIZATION_SEPARATOR).useForNull("");
	public final static Joiner GF_TOKEN_JOINER = Joiner.on(GF_TOKEN_SEPARATOR);
	public final static Splitter GF_TREE_SPLITTER = Splitter.on(GF_TREE_SEPARATOR).omitEmptyStrings();
	public final static Splitter GF_APE_SPLITTER = Splitter.on(GF_APE_SEPARATOR);
	public final static Splitter GF_SERIALIZATION_SPLITTER = Splitter.on(GF_SERIALIZATION_SEPARATOR);
	public final static Splitter GF_TOKEN_SPLITTER = Splitter.on(GF_TOKEN_SEPARATOR);

	private final GfService mGfService;
	private final GfStorage mGfStorage;
	private final String mCat;
	private final String mDir;

	private GfServiceResultGrammar mGfServiceResultGrammar;
	private GfServiceResultBrowseAll mGfServiceResultBrowseAll;

	private final Map<String, Multimap<String, String>> langToTokenToCats = Maps.newHashMap();
	private final Map<String, Map<String, String>> langToIriToToken = Maps.newHashMap();

	// TODO: could use a Multiset instead but there does not seem to be a
	// short way to get out k-largest elements.
	private final Map<String, Integer> mCatToSize = Maps.newHashMap();

	public GfGrammar(Ontology ontology) {
		URI serviceUri;
		try {
			serviceUri = new URI(ontology.getParameter("service_uri"));
		} catch (URISyntaxException e) {
			throw new RuntimeException(e);
		}

		String pgfName = ontology.getParameter("pgf_name");
		mGfService = new GfWebService(serviceUri, pgfName);
		mGfStorage = new GfWebStorage(serviceUri);
		// Note: start_cat can be null, in this case the default start category is used
		mCat = ontology.getParameter("start_cat");
		mDir = getDir(pgfName);
		LINEARIZE_ALL_QUERY_LIMIT = ontology.getParameterAsInt("linearize_all_query_limit");

		try {
			refreshGrammarInfo();
			refreshLangToTokenToCats();
		} catch (GfServiceException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		}
	}


	public GfServiceResultGrammar getGrammar() {
		return mGfServiceResultGrammar;
	}


	/**
	 * @return set of names of the concrete languages defined in the grammar
	 */
	public Set<String> getLanguages() {
		if (mGfServiceResultGrammar == null) {
			return Collections.emptySet();
		}
		return mGfServiceResultGrammar.getLanguages().keySet();
	}


	/**
	 * @return set of locales defined for the given language in the grammar
	 */
	public Set<String> getLocales(String lang) {
		if (mGfServiceResultGrammar == null) {
			return Collections.emptySet();
		}
		return mGfServiceResultGrammar.getLanguages().get(lang);
	}


	/**
	 * @return {@code true} iff the given grammar contains a concrete language with suffix SUFFIX_APE
	 */
	public boolean isAceCompatible() {
		return getLanguages().contains(mGfServiceResultGrammar.getName() + SUFFIX_APE);
	}


	/**
	 * Parses the given text in the given language.
	 *
	 * @param text The text.
	 * @param language The language.
	 * @return The parse result.
	 * @throws GfServiceException
	 */
	public Set<String> parse(String text, String language) throws GfServiceException {
		GfServiceResultParse result = mGfService.parse(mCat, text, language, GF_PARSE_LIMIT);
		return result.getTrees(language);
	}


	public String random() throws GfServiceException {
		return random(1).iterator().next();
	}


	public List<String> random(int limit) throws GfServiceException {
		GfServiceResultRandom result = mGfService.random(mCat, limit);
		return result.getTrees();
	}


	/**
	 * Serializes the GF wiki entry, given as 3 components:
	 *   - language (e.g. GeographyEng)
	 *   - sentence as string (e.g. "Germany is a country .")
	 *   - set of corresponding trees
	 *
	 * The format is:
	 *
	 *  lang||text||tree1|tree2|...|treeN
	 *
	 * This is more robust, e.g. if the tree cannot be linearized anymore
	 * because grammar was refactored then we could try to parse the
	 * sentence. Also the sentence could be shown if the tree
	 * has multiple variant lins.
	 */
	public static String serialize(GfWikiEntry entry) {
		return GF_SERIALIZATION_JOINER.join(
				entry.getLanguage(),
				entry.getText(),
				GF_TREE_JOINER.join(entry.getTrees().getTrees()));
	}


	/**
	 * Deserializes a GF wiki entry.
	 */
	public static GfWikiEntry deserialize(String serialized) {
		List<String> splitsAsList = ImmutableList.copyOf(GF_SERIALIZATION_SPLITTER.split(serialized));
		if (splitsAsList.size() == 1) {
			// deprecated form, containing just the trees
			return new GfWikiEntry(new TreeList(GF_TREE_SPLITTER.split(serialized)));
		} else if (splitsAsList.size() == 3) {
			Iterable<String> trees = GF_TREE_SPLITTER.split(splitsAsList.get(2));
			return new GfWikiEntry(
					splitsAsList.get(0),
					splitsAsList.get(1),
					new TreeList(trees));
		}
		throw new RuntimeException("Syntax error: " + serialized);
	}


	public Set<String> linearize(String tree, String language) throws GfServiceException {
		GfServiceResultLinearize result = mGfService.linearize(tree, language);
		return result.getTexts(language);
	}


	public Map<String, Set<String>> linearize(String tree) throws GfServiceException {
		GfServiceResultLinearize result = mGfService.linearize(tree, null);
		return result.getTexts();
	}


	public Set<String> complete(List<String> tokens, String language) throws GfServiceException {
		return complete(mCat, tokens, language);
	}


	/**
	 * <p>This method tries to return a set that contains more than one element, i.e.
	 * if there is only one (unambiguous) completion then "complete" is automatically
	 * called again. In this case the result set contains multi-token completions.
	 * There is a limit of 15 tokens to each completion.</p>
	 *
	 * @param cat start category for the parser
	 * @param tokens list of tokens the last of which is to be completed
	 * @param language language of the input tokens
	 * @return list of possible completions
	 * @throws GfServiceException
	 */
	public Set<String> complete(String cat, List<String> tokens, String language) throws GfServiceException {
		// Remove the last argument if this behavior turns out to be confusing
		// Removed it (was 15), it seemed to be buggy in some cases.
		GfServiceResultComplete result = mGfService.complete(cat, getCompletionInput(tokens), language, null);
		return result.getCompletions(language);
	}

	public String abstrtree(String tree) throws GfServiceException {
		return mGfService.abstrtree(tree).getDataUri();
	}

	public String parsetree(String tree, String from) throws GfServiceException {
		return mGfService.parsetree(tree, from).getDataUri();
	}

	public String alignment(String tree) throws GfServiceException {
		return mGfService.alignment(tree).getDataUri();
	}


	public Set<String> getProducers(String cat) {
		return mGfServiceResultBrowseAll.getProducers(cat);
	}


	public Set<String> getConsumers(String cat) {
		return mGfServiceResultBrowseAll.getConsumers(cat);
	}


	public String getCategoryName(String cat, String language) {
		return mGfServiceResultBrowseAll.getCategoryName(cat, language);
	}


	/**
	 * <p>Returns the {@code k} largest categories in the order of size.
	 * The size is in terms of the number of producer functions that are
	 * not consumer functions.</p>
	 */
	public List<String> getLargestCategories(int k) {
		return Ordering.natural().onResultOf(Functions.forMap(mCatToSize)).greatestOf(mCatToSize.keySet(), k);
	}

	public Multimap<String, String> getTokenToCats(String language) {
		return langToTokenToCats.get(language);
	}

	public Map<String, String> getIriToToken(String language) {
		return langToIriToToken.get(language);
	}

	public GfParseResult parseGfModule(GfModule gfModule) throws GfServiceException {
		return mGfStorage.parse(gfModule);
	}


	/**
	 * Uploads the given GF module to the server.
	 */
	public void upload(GfModule module) throws GfServiceException {
		mGfStorage.upload(mDir, module);
	}


	public Set<String> ls(String extension) throws GfServiceException {
		GfStorageResultLs result = mGfStorage.ls(mDir, extension);
		return result.getFilenames();
	}


	public void rm(String path) throws GfServiceException {
		mGfStorage.rm(mDir, path);
	}


	public int rmGfo() throws GfServiceException {
		int count = 0;
		for (String path : ls(EXTENSION_GFO)) {
			mGfStorage.rm(mDir, path);
			count++;
		}
		return count;
	}


	public String downloadAsString(String filename) throws GfServiceException {
		return mGfStorage.downloadAsString(mDir, filename);
	}


	/**
	 * Updates the grammar based on the given GF module, which is either
	 * a new component of the grammar or which has undergone modifications
	 * and needs to be reintegrated.
	 *
	 * @param gfModule new or modified grammar module
	 * @return GfStorageResult
	 * @throws GfServiceException
	 */
	public GfStorageResult integrateGfModule(GfModule gfModule) throws GfServiceException {
		Set<String> languages = getLanguages();
		GfStorageResult result = null;
		if (isToplevelModule(gfModule, languages)) {
			// If the module is a (toplevel) concrete syntax module then
			// update it in the context of other concrete modules.
			result = mGfStorage.update(mDir, mCat, OPTIMIZE_PGF, languages, gfModule);
		} else {
			// Otherwise just upload it and recompile the existing concrete modules.
			mGfStorage.upload(mDir, gfModule);
			result = mGfStorage.update(mDir, mCat, OPTIMIZE_PGF, languages);
		}
		if (result != null && result.isSuccess()) {
			refreshGrammarInfo();
			refreshLangToTokenToCats();
		}
		return result;
	}


	/**
	 * Recompiles the grammar.
	 */
	public GfStorageResult update() throws GfServiceException {
		Set<String> languages = getLanguages();
		GfStorageResult result = mGfStorage.update(mDir, mCat, OPTIMIZE_PGF, languages);
		if (result != null && result.isSuccess()) {
			refreshGrammarInfo();
			refreshLangToTokenToCats();
		}
		return result;
	}


	public boolean isGrammarEditable() {
		return ! (mDir == null);
	}


	/**
	 * True if the module is a concrete syntax module which no other
	 * module imports. We check if the module name has the form
	 * {@code GrammarLan}. This covers also modules
	 * which were added after the wiki was started up. The previous
	 * technique {@code languages.contains(gfModule.getName())} did not
	 * cover the new modules.
	 */
	private boolean isToplevelModule(GfModule gfModule, Set<String> languages) {
		String moduleName = gfModule.getName();
		if (languages.contains(moduleName)) {
			return true;
		}
		if (mGfServiceResultGrammar == null) {
			return false;
		}
		String grammarName = mGfServiceResultGrammar.getName();

		return (
				moduleName.startsWith(grammarName) &&
				moduleName.length() >= grammarName.length() + 3 &&
				Character.isUpperCase(moduleName.charAt(grammarName.length()))
				||
				moduleName.startsWith(PREFIX_DISAMB + grammarName) &&
				moduleName.length() >= PREFIX_DISAMB.length() + grammarName.length() + 3 &&
				Character.isUpperCase(moduleName.charAt(PREFIX_DISAMB.length() + grammarName.length()))
				);
	}


	// TODO: we assume that editable directories have a certain form
	private static String getDir(String str) {
		Pattern p = Pattern.compile("(/tmp/.+)/.+");
		Matcher m = p.matcher(str);
		if (m.matches()) {
			return m.group(1);
		}
		return null;
	}


	private static String getCompletionInput(List<String> tokens) {
		if (tokens.isEmpty()) {
			return "";
		}
		return GF_TOKEN_JOINER.join(tokens) + GF_TOKEN_SEPARATOR;
	}


	private void refreshGrammarInfo() throws GfServiceException {
		mGfServiceResultGrammar = mGfService.grammar();
		mGfServiceResultBrowseAll = mGfService.browseAll();
	}


	/**
	 * <p>Creates a structure from which you can look up the categories of tokens.</p>
	 *
	 * <pre>
	 * language -> token -> categories
	 * </pre>
	 */
	private void refreshLangToTokenToCats() throws GfServiceException {
		// Collect together all the consumer functions.
		// TODO We are not interested in their linearizations, at least for the time begin.
		Set<String> funsAllConsumers = Sets.newHashSet();
		Set<String> cats = mGfServiceResultBrowseAll.getCategories();
		for (String cat : cats) {
			funsAllConsumers.addAll(getConsumers(cat));
		}

		int countAllFuns = mGfServiceResultGrammar.getFunctions().size();
		int countIgnoreFuns = funsAllConsumers.size();

		mLogger.info("All funs: {}, (ignored) consumer funs: {}", countAllFuns, countIgnoreFuns);
		if (countAllFuns - countIgnoreFuns > LINEARIZE_ALL_QUERY_LIMIT) {
			mLogger.warn("Refusing to build preditor cache, as there are too many producer-only funs. " +
					"Increase LINEARIZE_ALL_QUERY_LIMIT if its current value {} is too low.", LINEARIZE_ALL_QUERY_LIMIT);
			return;
		}

		langToTokenToCats.clear();
		mCatToSize.clear();
		langToIriToToken.clear();
		// Iterate over all the categories that have producer functions
		for (String cat : cats) {
			mCatToSize.put(cat, 0);
			// For each category look at its producers
			for (String f : getProducers(cat)) {
				// If this function is also a consumer, then throw it out
				if (funsAllConsumers.contains(f)) {
					continue;
				}
				// Increment the counter of producers that are not consumers for this category
				mCatToSize.put(cat, mCatToSize.get(cat) + 1);
				// Otherwise get all of its linearizations in all the languages.
				// This includes all the wordforms and variants, because the linearization
				// is likely to be a complex record that holds many strings.
				GfServiceResultLinearizeAll result = mGfService.linearizeAll(f, null);
				Map<String, List<String>> langToTokens = result.getTexts();
				// Extract the logical symbol that corresponds to this function.
				// The logical symbol is present in the Ape-linearization.
				String logicalSymbol = extractLogicalSymbolFromApe(langToTokens.get(mGfServiceResultGrammar.getName() + SUFFIX_APE));
				for (Entry<String, List<String>> entry2 : langToTokens.entrySet()) {
					String lang = entry2.getKey();
					Multimap<String, String> tokenToCats = langToTokenToCats.get(lang);
					// If we haven't seen this language before then create a new hash table entry for it
					if (tokenToCats == null) {
						tokenToCats = HashMultimap.create();
						langToTokenToCats.put(lang, tokenToCats);
					}
					// Store each linearization together with its category.
					// The linearization is represented by its "most important" token.
					for (String lin : entry2.getValue()) {
						String indexToken = getIndexToken(lin);
						if (indexToken != null) {
							tokenToCats.put(indexToken, cat);
						}
					}


					if (logicalSymbol != null) {
						Map<String, String> iriToToken = langToIriToToken.get(lang);
						// If we haven't seen this language before then create a new hash table entry for it
						if (iriToToken == null) {
							iriToToken =  Maps.newHashMap();
							langToIriToToken.put(lang, iriToToken);
						}
						for (String lin : entry2.getValue()) {
							iriToToken.put(logicalSymbol, lin);
							// TODO: We assume that the dictionary form is always the first.
							// Of course, this does not always hold.
							// Unfortunately, LinearizeAll cannot be used to obtain a GF record,
							// with all the category labels of the strings, but just a list of plain strings.
							break;
						}
					}
				}
			}
		}
	}


	/**
	 * It does not make sense to index linearizations which contain multiple tokens
	 * or which are empty strings, as these cannot be matched during (single token)
	 * lookahead editing. If there are multiple tokens in the given linearization, e.g.
	 * the + Atlantic_Ocean, des + Atlantischen_Ozeans, Atlandi_Ookean + &+ + il;
	 * then we return the longest token (picking the last one in case there are several).
	 * TODO: this is a hack while we're waiting for a cleaner solution.
	 */
	private static String getIndexToken(String lin) {
		int max = 0;
		String returnTok = null;
		for (String tok : GF_TOKEN_SPLITTER.omitEmptyStrings().split(lin)) {
			if (tok.length() >= max) {
				max = tok.length();
				returnTok = tok;
			}
		}
		return returnTok;
	}


	/**
	 * <p>Extracts the logical symbol (which is used by APE as the
	 * OWL entity IRI) from the Ape-linearization of a function, assuming
	 * that the function is a lexical function.
	 * Returns {@code null} in case the extraction fails.</p>
	 *
	 * <p>We assume that the Ape linearizations have the form
	 * {@code The_Hague|pn_sg|The_Hague_PN|neutr}, where the logical symbol
	 * is always in the same field and is always the same in case there are
	 * several linearizations.</p>
	 */
	private static String extractLogicalSymbolFromApe(List<String> lins) {
		if (lins == null || lins.isEmpty()) {
			return null;
		}
		int count = 0;
		for (String field : GF_APE_SPLITTER.split(lins.get(0))) {
			if (++count == GF_APE_FIELD_LOGICAL_SYMBOL) {
				return field;
			}
		}
		return null;
	}
}