/* * See the NOTICE file distributed with this work for additional * information regarding copyright ownership. * * This is free software; you can redistribute it and/or modify it * under the terms of the GNU Lesser General Public License as * published by the Free Software Foundation; either version 2.1 of * the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this software; if not, write to the Free * Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA * 02110-1301 USA, or see the FSF site: http://www.fsf.org. */ package com.xpn.xwiki.plugin.lucene; import java.util.Date; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.lucene.document.Field; import org.apache.lucene.index.Term; import org.apache.lucene.search.Query; import org.apache.lucene.search.TermQuery; import com.xpn.xwiki.XWikiContext; import com.xpn.xwiki.doc.XWikiDocument; /** * @version $Id: $ */ public abstract class IndexData { private static final Log LOG = LogFactory.getLog(IndexData.class); private String documentTitle; private String documentName; private String documentWeb; private String documentFullName; private String fullName; private String author; private Date creationDate; private String creator; private String language; private Date modificationDate; /** * name of the virtual wiki this doc belongs to */ private String wiki; public IndexData(final XWikiDocument doc, final XWikiContext context) { setDocumentName(doc.getName()); setDocumentTitle(doc.getDisplayTitle(context)); setDocumentWeb(doc.getSpace()); setDocumentFullName(doc.getFullName()); setWiki(doc.getDatabase() == null ? context.getDatabase() : doc.getDatabase()); setFullName(new StringBuffer(wiki).append(":").append(documentWeb).append(".").append( documentName).toString()); setLanguage(doc.getLanguage()); } /** * Adds this documents data to a lucene Document instance for indexing. <p> <strong>Short introduction to Lucene * field types </strong> </p> <p> Which type of Lucene field is used determines what Lucene does with data and how * we can use it for searching and showing search results: </p> <ul> <li>Keyword fields don't get tokenized, but are * searchable and stored in the index. This is perfect for fields you want to search in programmatically (like ids * and such), and date fields. Since all user-entered queries are tokenized, letting the user search these fields * makes almost no sense, except of queries for date fields, where tokenization is useless.</li> <li>the stored text * fields are used for short texts which should be searchable by the user, and stored in the index for * reconstruction. Perfect for document names, titles, abstracts.</li> <li>the unstored field takes the biggest part * of the content - the full text. It is tokenized and indexed, but not stored in the index. This makes sense, since * when the user wants to see the full content, he clicks the link to vie the full version of a document, which is * then delivered by xwiki.</li> </ul> * * @param luceneDoc if not null, this controls which translated version of the content will be indexed. If null, the * content in the default language will be used. */ public void addDataToLuceneDocument(org.apache.lucene.document.Document luceneDoc, XWikiDocument doc, XWikiContext context) { // Keyword fields: stored and indexed, but not tokenized // Note: ID field must be UN_TOKENIZED to enable case sensitive IDs luceneDoc.add(new Field(IndexFields.DOCUMENT_ID, getId(), Field.Store.YES, Field.Index.UN_TOKENIZED)); luceneDoc.add(new Field(IndexFields.DOCUMENT_LANGUAGE, this.language, Field.Store.YES, Field.Index.TOKENIZED)); if (wiki != null && wiki.length() > 0) { luceneDoc.add(new Field(IndexFields.DOCUMENT_WIKI, wiki, Field.Store.YES, Field.Index.TOKENIZED)); } if (getType() != null) { luceneDoc.add(new Field(IndexFields.DOCUMENT_TYPE, getType(), Field.Store.YES, Field.Index.TOKENIZED)); } if (modificationDate != null) { luceneDoc.add(new Field(IndexFields.DOCUMENT_DATE, IndexFields .dateToString(modificationDate), Field.Store.YES, Field.Index.UN_TOKENIZED)); } if (creationDate != null) { luceneDoc.add(new Field(IndexFields.DOCUMENT_CREATIONDATE, IndexFields .dateToString(creationDate), Field.Store.YES, Field.Index.UN_TOKENIZED)); } // stored Text fields: tokenized and indexed if (documentTitle != null) { luceneDoc.add(new Field(IndexFields.DOCUMENT_TITLE, documentTitle, Field.Store.YES, Field.Index.TOKENIZED)); luceneDoc.add(new Field(IndexFields.DOCUMENT_TITLE + IndexFields.UNTOKENIZED, documentTitle.toUpperCase(), Field.Store.NO, Field.Index.UN_TOKENIZED)); } luceneDoc.add(new Field(IndexFields.DOCUMENT_NAME, documentName, Field.Store.YES, Field.Index.TOKENIZED)); luceneDoc.add(new Field(IndexFields.DOCUMENT_WEB, documentWeb, Field.Store.YES, Field.Index.TOKENIZED)); luceneDoc.add(new Field(IndexFields.DOCUMENT_FULLNAME, documentFullName, Field.Store.YES, Field.Index.TOKENIZED)); if (author != null) { luceneDoc.add(new Field(IndexFields.DOCUMENT_AUTHOR, author, Field.Store.YES, Field.Index.TOKENIZED)); luceneDoc.add(new Field(IndexFields.DOCUMENT_AUTHOR + IndexFields.UNTOKENIZED, author.toUpperCase(), Field.Store.NO, Field.Index.UN_TOKENIZED)); } if (creator != null) { luceneDoc.add(new Field(IndexFields.DOCUMENT_CREATOR, creator, Field.Store.YES, Field.Index.TOKENIZED)); luceneDoc.add(new Field(IndexFields.DOCUMENT_CREATOR + IndexFields.UNTOKENIZED, creator.toUpperCase(), Field.Store.NO, Field.Index.UN_TOKENIZED)); } // UnStored fields: tokenized and indexed, but no reconstruction of // original content will be possible from the search result try { final String ft = getFullText(doc, context); if (ft != null) { luceneDoc.add(new Field(IndexFields.FULLTEXT, ft, Field.Store.NO, Field.Index.TOKENIZED)); } } catch (Exception e) { LOG.error("error extracting fulltext for document " + this, e); } } /** * Builds a Lucene query matching only the document this instance represents. This is used for removing old versions * of a document from the index before adding a new one. * * @return a query matching the field DOCUMENT_ID to the value of #getId() */ public Query buildQuery() { return new TermQuery(new Term(IndexFields.DOCUMENT_ID, getId())); } /** * @return string unique to this document across all languages and virtual wikis */ public String getId() { StringBuffer retval = new StringBuffer(); if (wiki != null && wiki.length() > 0) { retval.append(wiki).append(":"); } retval.append(documentWeb).append("."); retval.append(documentName).append("."); retval.append(language); return retval.toString(); } /** * @return String of documentName, documentWeb, author and creator */ public String getFullText(XWikiDocument doc, XWikiContext context) { StringBuffer sb = new StringBuffer(documentName).append(" ").append(documentWeb).append(" ").append( author).append(creator); return sb.toString(); } public abstract String getType(); public String toString() { return getId(); } /** * @param author The author to set. */ public void setAuthor(String author) { this.author = author; } /** * @param documentTitle the document title */ public void setDocumentTitle(String documentTitle) { this.documentTitle = documentTitle; } /** * @param documentName The documentName to set. */ public void setDocumentName(String documentName) { this.documentName = documentName; } /** * @param documentWeb The documentWeb to set. */ public void setDocumentWeb(String documentWeb) { this.documentWeb = documentWeb; } /** * @param documentFullName The documentFullName to set. */ public void setDocumentFullName(String documentFullName) { this.documentFullName = documentFullName; } /** * @param modificationDate The modificationDate to set. */ public void setModificationDate(Date modificationDate) { this.modificationDate = modificationDate; } public String getDocumentTitle() { return documentTitle; } public String getDocumentName() { return documentName; } public String getDocumentWeb() { return documentWeb; } public String getDocumentFullName() { return documentFullName; } public String getWiki() { return wiki; } public void setWiki(String wiki) { this.wiki = wiki; } public Date getCreationDate() { return creationDate; } public void setCreationDate(Date creationDate) { this.creationDate = creationDate; } public String getCreator() { return creator; } public void setCreator(String creator) { this.creator = creator; } public String getFullName() { return fullName; } public void setFullName(String fullName) { this.fullName = fullName; } public String getLanguage() { return language; } public void setLanguage(String lang) { if (lang != null && lang.length() > 0) { this.language = lang; } else { this.language = "default"; } } }