IndexData.java example

Explorer

xwiki-clams-core-master
- curriki-old
  - gelcplugins
    - src
      - test
        cactus
        org
        gelc
        xwiki
        plugins
        assets
        cactus
        AssetManagerPluginTest.java
        java
        org
        gelc
        xwiki
        plugins
        assets
        AssetManagerPluginTest.java
        framework
        FrameworkManagerPluginTest.java
        mime
        MimeTypePluginTest.java
- plugins
  - asset
    - src
      - main
        java
        org
        curriki
        xwiki
        plugin
        asset
        Asset.java
        AssetException.java
        AssetManager.java
        CollectionSpace.java
        Constants.java
        CurrikiDocument.java
        DefaultAssetManager.java
        Util.java
        attachment
        ArchiveAsset.java
        ArchiveAssetManager.java
        AttachmentAsset.java
        AttachmentAssetManager.java
        AudioAsset.java
        AudioAssetManager.java
        DocumentAsset.java
        DocumentAssetManager.java
        ImageAsset.java
        ImageAssetManager.java
        InteractiveAsset.java
        InteractiveAssetManager.java
        composite
        CollectionCompositeAsset.java
        CompositeAsset.java
        CompositeAssetManager.java
        FolderCompositeAsset.java
        RootCollectionCompositeAsset.java
        external
        ExternalAsset.java
        ExternalAssetManager.java
        VideoAsset.java
        VideoAssetManager.java
        other
        InvalidAsset.java
        ProtectedAsset.java
        UnknownAsset.java
        text
        TextAsset.java
        TextAssetManager.java
  - curriki
    - src
      - main
        java
        org
        curriki
        xwiki
        plugin
        curriki
        CurrikiException.java
        CurrikiPlugin.java
        CurrikiPluginApi.java
  - currikiactivitystream
    - src
      - main
        java
        org
        curriki
        plugin
        activitystream
        impl
        CurrikiActivityStream.java
        plugin
        CurrikiActivityStreamPlugin.java
        CurrikiActivityStreamPluginApi.java
        DocumentationActivityEvent.java
        MemberActivityEvent.java
        MessageActivityEvent.java
        ResourceActivityEvent.java
  - currikispacemanager
    - src
      - main
        java
        org
        curriki
        plugin
        spacemanager
        impl
        CurrikiSpace.java
        CurrikiSpaceManager.java
        CurrikiSpaceManagerExtension.java
        plugin
        CurrikiSpaceManagerPluginApi.java
  - framework
    - src
      - main
        java
        org
        curriki
        xwiki
        plugin
        framework
        CSVImportFilterImpl.java
        DefaultImportFilterImpl.java
        Framework.java
        FrameworkConstant.java
        FrameworkItem.java
        FrameworkManagerPlugin.java
        FrameworkManagerPluginAPI.java
        ImportFilter.java
  - licence
    - src
      - main
        java
        org
        curriki
        xwiki
        plugin
        licence
        Licence.java
        LicenceManagerConstant.java
        LicenceManagerPlugin.java
        LicenceManagerPluginAPI.java
  - lucene
    - src
      - main
        java
        com
        xpn
        xwiki
        plugin
        lucene
        AbstractXWikiRunnable.java
        AttachmentData.java
        DocumentData.java
        IndexData.java
        IndexFields.java
        IndexRebuilder.java
        IndexUpdater.java
        LucenePlugin.java
        LucenePluginApi.java
        ObjectData.java
        SearchResult.java
        SearchResults.java
        TextExtractor.java
        XWikiDocumentQueue.java
        textextraction
        MSExcelTextExtractor.java
        MSPowerPointTextExtractor.java
        MSWordTextExtractor.java
        MimetypeTextExtractor.java
        OpenOfficeTextExtractor.java
        PDFTextExtractor.java
        PlainTextExtractor.java
        XmlTextExtractor.java
        xmlutil
        XmlEncodingDetector.java
        org
        curriki
        xwiki
        plugin
        lucene
        NoStopWordsAnalyzer.java
  - metadata
    - src
      - main
        java
        org
        curriki
        xwiki
        plugin
        metadata
        MetaDataFrameworkPlugin.java
        MetaDataFrameworkPluginAPI.java
  - mimetype
    - src
      - main
        java
        org
        curriki
        xwiki
        plugin
        mimetype
        MimeTypeConstant.java
        MimeTypePlugin.java
        MimeTypePluginAPI.java
  - servlet
    - src
      - main
        java
        org
        curriki
        xwiki
        servlet
        BaseServlet.java
        RestletServlet.java
        restlet
        resource
        BaseResource.java
        DefaultResource.java
        assets
        AssetManagerResource.java
        AssetResource.java
        AssetsResource.java
        ExternalResource.java
        ExternalsResource.java
        MetadataResource.java
        NominateResource.java
        PartnerResource.java
        PublishedResource.java
        SubassetResource.java
        SubassetsResource.java
        TextassetResource.java
        TextassetsResource.java
        UnnominateResource.java
        VideoResource.java
        VideosResource.java
        groups
        GroupCollectionsResource.java
        metadata
        FieldResource.java
        FieldsResource.java
        users
        UserCollectionsResource.java
        UserGroupsResource.java
        UserResource.java
        router
        AssetsRouter.java
        BaseRouter.java
        GroupsRouter.java
        MetadataRouter.java
        ServiceRouter.java
        UsersRouter.java
  - spacemanager
    - src
      - main
        java
        com
        xpn
        xwiki
        plugin
        spacemanager
        api
        Space.java
        SpaceManager.java
        SpaceManagerException.java
        SpaceManagerExtension.java
        SpaceManagers.java
        SpaceUserProfile.java
        impl
        SpaceImpl.java
        SpaceManagerExtensionImpl.java
        SpaceManagerImpl.java
        SpaceUserProfileImpl.java
        plugin
        SpaceApi.java
        SpaceManagerPluginApi.java
- tools
  - appservmonitoring
    - src
      - main
        java
        org
        curriki
        tools
        monitor
        MonitorAllSources.java
        MonitorPageLoadTime.java
        MonitorWebRenderer.java
        MonitoringConstants.java
  - loadtest
    - src
      - main
        java
        org
        curriki
        tools
        loadtest
        Checker.java
        TestClusteringWorksOnTitles.java
        XWikiHttpClient.java
  - loganalyzer
    - src
      - main
        java
        org
        curriki
        tools
        loganalyzer
        LogAnalysisCursor.java
        LogAnalyzer.java
        LogCollector.java
        TestTailer.java
      - test
        java
        UAParserTest.java
        org
        curriki
        tools
        loganalyzer
        MaximizingLogAnalysisCursor.java
        MultipleParallelConsolesTest.java
        ParseAFewTest.java
        TestParseFilenames.java
  - misctools
    - src
      - main
        java
        UploadToWiki.java
        iContactMassiveUpdater.java
        org
        curriki
        tools
        tests
        TryAnOpenIDRequestAtGoogle.java

/*
 * See the NOTICE file distributed with this work for additional
 * information regarding copyright ownership.
 *
 * This is free software; you can redistribute it and/or modify it
 * under the terms of the GNU Lesser General Public License as
 * published by the Free Software Foundation; either version 2.1 of
 * the License, or (at your option) any later version.
 *
 * This software is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this software; if not, write to the Free
 * Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
 * 02110-1301 USA, or see the FSF site: http://www.fsf.org.
 */
package com.xpn.xwiki.plugin.lucene;

import java.util.Date;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;

import com.xpn.xwiki.XWikiContext;
import com.xpn.xwiki.doc.XWikiDocument;

/**
 * @version $Id: $
 */
public abstract class IndexData
{
    private static final Log LOG = LogFactory.getLog(IndexData.class);

    private String documentTitle;

    private String documentName;

    private String documentWeb;

    private String documentFullName;

    private String fullName;

    private String author;

    private Date creationDate;

    private String creator;

    private String language;

    private Date modificationDate;

    /**
     * name of the virtual wiki this doc belongs to
     */
    private String wiki;

    public IndexData(final XWikiDocument doc, final XWikiContext context)
    {
        setDocumentName(doc.getName());
        setDocumentTitle(doc.getDisplayTitle(context));
        setDocumentWeb(doc.getSpace());
        setDocumentFullName(doc.getFullName());
        setWiki(doc.getDatabase() == null ? context.getDatabase() : doc.getDatabase());
        setFullName(new StringBuffer(wiki).append(":").append(documentWeb).append(".").append(
            documentName).toString());
        setLanguage(doc.getLanguage());
    }

    /**
     * Adds this documents data to a lucene Document instance for indexing. <p> <strong>Short introduction to Lucene
     * field types </strong> </p> <p> Which type of Lucene field is used determines what Lucene does with data and how
     * we can use it for searching and showing search results: </p> <ul> <li>Keyword fields don't get tokenized, but are
     * searchable and stored in the index. This is perfect for fields you want to search in programmatically (like ids
     * and such), and date fields. Since all user-entered queries are tokenized, letting the user search these fields
     * makes almost no sense, except of queries for date fields, where tokenization is useless.</li> <li>the stored text
     * fields are used for short texts which should be searchable by the user, and stored in the index for
     * reconstruction. Perfect for document names, titles, abstracts.</li> <li>the unstored field takes the biggest part
     * of the content - the full text. It is tokenized and indexed, but not stored in the index. This makes sense, since
     * when the user wants to see the full content, he clicks the link to vie the full version of a document, which is
     * then delivered by xwiki.</li> </ul>
     *
     * @param luceneDoc if not null, this controls which translated version of the content will be indexed. If null, the
     * content in the default language will be used.
     */
    public void addDataToLuceneDocument(org.apache.lucene.document.Document luceneDoc,
        XWikiDocument doc, XWikiContext context)
    {
        // Keyword fields: stored and indexed, but not tokenized
        // Note: ID field must be UN_TOKENIZED to enable case sensitive IDs
        luceneDoc.add(new Field(IndexFields.DOCUMENT_ID,
            getId(),
            Field.Store.YES,
            Field.Index.UN_TOKENIZED));
        luceneDoc.add(new Field(IndexFields.DOCUMENT_LANGUAGE,
            this.language,
            Field.Store.YES,
            Field.Index.TOKENIZED));
        if (wiki != null && wiki.length() > 0) {
            luceneDoc.add(new Field(IndexFields.DOCUMENT_WIKI,
                wiki,
                Field.Store.YES,
                Field.Index.TOKENIZED));
        }
        if (getType() != null) {
            luceneDoc.add(new Field(IndexFields.DOCUMENT_TYPE,
                getType(),
                Field.Store.YES,
                Field.Index.TOKENIZED));
        }
        if (modificationDate != null) {
            luceneDoc.add(new Field(IndexFields.DOCUMENT_DATE, IndexFields
                .dateToString(modificationDate), Field.Store.YES, Field.Index.UN_TOKENIZED));
        }
        if (creationDate != null) {
            luceneDoc.add(new Field(IndexFields.DOCUMENT_CREATIONDATE, IndexFields
                .dateToString(creationDate), Field.Store.YES, Field.Index.UN_TOKENIZED));
        }

        // stored Text fields: tokenized and indexed
        if (documentTitle != null) {
            luceneDoc.add(new Field(IndexFields.DOCUMENT_TITLE,
                documentTitle,
                Field.Store.YES,
                Field.Index.TOKENIZED));
            luceneDoc.add(new Field(IndexFields.DOCUMENT_TITLE + IndexFields.UNTOKENIZED,
                documentTitle.toUpperCase(),
                Field.Store.NO,
                Field.Index.UN_TOKENIZED));
        }
        luceneDoc.add(new Field(IndexFields.DOCUMENT_NAME,
            documentName,
            Field.Store.YES,
            Field.Index.TOKENIZED));
        luceneDoc.add(new Field(IndexFields.DOCUMENT_WEB,
            documentWeb,
            Field.Store.YES,
            Field.Index.TOKENIZED));
        luceneDoc.add(new Field(IndexFields.DOCUMENT_FULLNAME,
            documentFullName,
            Field.Store.YES,
            Field.Index.TOKENIZED));
        if (author != null) {
            luceneDoc.add(new Field(IndexFields.DOCUMENT_AUTHOR,
                author,
                Field.Store.YES,
                Field.Index.TOKENIZED));
            luceneDoc.add(new Field(IndexFields.DOCUMENT_AUTHOR + IndexFields.UNTOKENIZED,
                author.toUpperCase(),
                Field.Store.NO,
                Field.Index.UN_TOKENIZED));
        }
        if (creator != null) {
            luceneDoc.add(new Field(IndexFields.DOCUMENT_CREATOR,
                creator,
                Field.Store.YES,
                Field.Index.TOKENIZED));
            luceneDoc.add(new Field(IndexFields.DOCUMENT_CREATOR + IndexFields.UNTOKENIZED,
                creator.toUpperCase(),
                Field.Store.NO,
                Field.Index.UN_TOKENIZED));
        }

        // UnStored fields: tokenized and indexed, but no reconstruction of
        // original content will be possible from the search result
        try {
            final String ft = getFullText(doc, context);
            if (ft != null) {
                luceneDoc.add(new Field(IndexFields.FULLTEXT,
                    ft,
                    Field.Store.NO,
                    Field.Index.TOKENIZED));
            }
        } catch (Exception e) {
            LOG.error("error extracting fulltext for document " + this, e);
        }
    }

    /**
     * Builds a Lucene query matching only the document this instance represents. This is used for removing old versions
     * of a document from the index before adding a new one.
     *
     * @return a query matching the field DOCUMENT_ID to the value of #getId()
     */
    public Query buildQuery()
    {
        return new TermQuery(new Term(IndexFields.DOCUMENT_ID, getId()));
    }

    /**
     * @return string unique to this document across all languages and virtual wikis
     */
    public String getId()
    {
        StringBuffer retval = new StringBuffer();
        if (wiki != null && wiki.length() > 0) {
            retval.append(wiki).append(":");
        }
        retval.append(documentWeb).append(".");
        retval.append(documentName).append(".");
        retval.append(language);
        return retval.toString();
    }

    /**
     * @return String of documentName, documentWeb, author and creator
     */
    public String getFullText(XWikiDocument doc, XWikiContext context)
    {
        StringBuffer sb =
            new StringBuffer(documentName).append(" ").append(documentWeb).append(" ").append(
                author).append(creator);
        return sb.toString();
    }

    public abstract String getType();

    public String toString()
    {
        return getId();
    }

    /**
     * @param author The author to set.
     */
    public void setAuthor(String author)
    {
        this.author = author;
    }

    /**
     * @param documentTitle the document title
     */
    public void setDocumentTitle(String documentTitle)
    {
        this.documentTitle = documentTitle;
    }

    /**
     * @param documentName The documentName to set.
     */
    public void setDocumentName(String documentName)
    {
        this.documentName = documentName;
    }

    /**
     * @param documentWeb The documentWeb to set.
     */
    public void setDocumentWeb(String documentWeb)
    {
        this.documentWeb = documentWeb;
    }

    /**
     * @param documentFullName The documentFullName to set.
     */
    public void setDocumentFullName(String documentFullName)
    {
        this.documentFullName = documentFullName;
    }

    /**
     * @param modificationDate The modificationDate to set.
     */
    public void setModificationDate(Date modificationDate)
    {
        this.modificationDate = modificationDate;
    }

    public String getDocumentTitle()
    {
        return documentTitle;
    }

    public String getDocumentName()
    {
        return documentName;
    }

    public String getDocumentWeb()
    {
        return documentWeb;
    }

    public String getDocumentFullName()
    {
        return documentFullName;
    }

    public String getWiki()
    {
        return wiki;
    }

    public void setWiki(String wiki)
    {
        this.wiki = wiki;
    }

    public Date getCreationDate()
    {
        return creationDate;
    }

    public void setCreationDate(Date creationDate)
    {
        this.creationDate = creationDate;
    }

    public String getCreator()
    {
        return creator;
    }

    public void setCreator(String creator)
    {
        this.creator = creator;
    }

    public String getFullName()
    {
        return fullName;
    }

    public void setFullName(String fullName)
    {
        this.fullName = fullName;
    }

    public String getLanguage()
    {
        return language;
    }

    public void setLanguage(String lang)
    {
        if (lang != null && lang.length() > 0) {
            this.language = lang;
        } else {
            this.language = "default";
        }
    }
}