/*
* (C) Copyright 2012-2014 Nuxeo SA (http://nuxeo.com/) and others.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* Contributors:
* Florent Guillaume
*/
package org.nuxeo.ecm.core.storage;
import org.nuxeo.ecm.core.api.DocumentLocation;
import java.util.List;
/**
* Parser of strings for fulltext indexing.
* <p>
* From the strings extracted from the document, decides how they should be parsed, split and normalized for fulltext
* indexing by the underlying engine.
*
* @since 5.9.5
*/
public interface FulltextParser {
/**
* Parses one property value to normalize the fulltext for the database.
* <p>
* The passed {@code path} may be {@code null} if the passed string is not coming from a specific path, for instance
* when it was extracted from binary data.
*
* @param s the string to be parsed and normalized
* @param path the abstracted path for the property (where all complex indexes have been replaced by {@code *}), or
* {@code null}
* @return the normalized words as a single space-separated string
*/
String parse(String s, String path);
/**
* Parses one property value to normalize the fulltext for the database.
* <p>
* Like {@link #parse(String, String)} but uses the passed list to accumulate words.
*
* @param s the string to be parsed and normalized
* @param path the abstracted path for the property (where all complex indexes have been replaced by {@code *}), or
* {@code null}
* @param strings the list into which normalized words should be accumulated
*/
void parse(String s, String path, List<String> strings);
/**
* Parses one property value to normalize the fulltext for the database.
* <p>
* The passed {@code path} may be {@code null} if the passed string is not coming from a specific path, for instance
* when it was extracted from binary data.
*
* @param s the string to be parsed and normalized
* @param path the abstracted path for the property (where all complex indexes have been replaced by {@code *}), or
* {@code null}
* @param mimeType the {@code mimeType} of the string to be parsed and normalized. This may be {@code null}
* @param documentLocation the {@code documentLocation} of the Document from which the property value string
* was extracted. This may be {@code null}
* @return the normalized words as a single space-separated string
* @since 8.4
*/
String parse(String s, String path, String mimeType, DocumentLocation documentLocation);
/**
* Parses one property value to normalize the fulltext for the database.
* <p>
* Like {@link #parse(String, String)} but uses the passed list to accumulate words.
*
* @param s the string to be parsed and normalized
* @param path the abstracted path for the property (where all complex indexes have been replaced by {@code *}), or
* {@code null}
* @param mimeType the {@code mimeType} of the string to be parsed and normalized. This may be {@code null}
* @param documentLocation the {@code documentLocation} of the Document from which the property value string
* was extracted. This may be {@code null}
* @param strings the list into which normalized words should be accumulated
* @since 8.4
*/
void parse(String s, String path, String mimeType, DocumentLocation documentLocation, List<String> strings);
}