package org.apache.lucene.facet.example.multiCL; import java.util.Arrays; import java.util.List; import java.util.Random; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.TextField; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.Term; import org.apache.lucene.index.IndexWriterConfig.OpenMode; import org.apache.lucene.store.Directory; import org.apache.lucene.store.RAMDirectory; import org.apache.lucene.facet.example.ExampleUtils; import org.apache.lucene.facet.example.simple.SimpleUtils; import org.apache.lucene.facet.index.CategoryDocumentBuilder; import org.apache.lucene.facet.index.params.CategoryListParams; import org.apache.lucene.facet.index.params.FacetIndexingParams; import org.apache.lucene.facet.index.params.PerDimensionIndexingParams; import org.apache.lucene.facet.taxonomy.CategoryPath; import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter; /* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ /** * Sample indexer creates an index, and adds to it sample documents and facets * with multiple CategoryLists specified for different facets, so there are different * category lists for different facets. * * @lucene.experimental */ public class MultiCLIndexer { // Number of documents to index public static int NUM_DOCS = 100; // Number of facets to add per document public static int NUM_FACETS_PER_DOC = 10; // Number of tokens in title public static int TITLE_LENGTH = 5; // Number of tokens in text public static int TEXT_LENGTH = 100; // Lorum ipsum to use as content - this will be tokenized and used for document // titles/text. static String words = "Sed ut perspiciatis unde omnis iste natus error sit " + "voluptatem accusantium doloremque laudantium totam rem aperiam " + "eaque ipsa quae ab illo inventore veritatis et quasi architecto " + "beatae vitae dicta sunt explicabo Nemo enim ipsam voluptatem " + "quia voluptas sit aspernatur aut odit aut fugit sed quia consequuntur " + "magni dolores eos qui ratione voluptatem sequi nesciunt Neque porro " + "quisquam est qui dolorem ipsum quia dolor sit amet consectetur adipisci velit " + "sed quia non numquam eius modi tempora incidunt ut labore et dolore " + "magnam aliquam quaerat voluptatem Ut enim ad minima veniam " + "quis nostrum exercitationem ullam corporis suscipit laboriosam " + "nisi ut aliquid ex ea commodi consequatur? Quis autem vel eum iure" + "reprehenderit qui in ea voluptate velit esse quam nihil molestiae " + "consequatur vel illum qui dolorem eum fugiat quo voluptas nulla pariatur"; // PerDimensionIndexingParams for multiple category lists public static PerDimensionIndexingParams MULTI_IPARAMS = new PerDimensionIndexingParams(); // Initialize PerDimensionIndexingParams static { MULTI_IPARAMS.addCategoryListParams(new CategoryPath("0"), new CategoryListParams(new Term("$Digits", "Zero"))); MULTI_IPARAMS.addCategoryListParams(new CategoryPath("1"), new CategoryListParams(new Term("$Digits", "One"))); MULTI_IPARAMS.addCategoryListParams(new CategoryPath("2"), new CategoryListParams(new Term("$Digits", "Two"))); MULTI_IPARAMS.addCategoryListParams(new CategoryPath("3"), new CategoryListParams(new Term("$Digits", "Three"))); MULTI_IPARAMS.addCategoryListParams(new CategoryPath("4"), new CategoryListParams(new Term("$Digits", "Four"))); MULTI_IPARAMS.addCategoryListParams(new CategoryPath("5"), new CategoryListParams(new Term("$Digits", "Five"))); } /** * Create an index, and adds to it sample documents and facets. * @param indexDir Directory in which the index should be created. * @param taxoDir Directory in which the taxonomy index should be created. * @throws Exception on error (no detailed exception handling here for sample simplicity */ public static void index(Directory indexDir, Directory taxoDir) throws Exception { Random random = new Random(2003); String[] docTitles = new String[NUM_DOCS]; String[] docTexts = new String[NUM_DOCS]; CategoryPath[][] cPaths = new CategoryPath[NUM_DOCS][NUM_FACETS_PER_DOC]; String[] tokens = words.split(" "); for (int docNum = 0; docNum < NUM_DOCS; docNum++) { String title = ""; String text = ""; for (int j = 0; j < TITLE_LENGTH; j++) { title = title + tokens[random.nextInt(tokens.length)] + " "; } docTitles[docNum] = title; for (int j = 0; j < TEXT_LENGTH; j++) { text = text + tokens[random.nextInt(tokens.length)] + " "; } docTexts[docNum] = text; for (int facetNum = 0; facetNum < NUM_FACETS_PER_DOC; facetNum++) { cPaths[docNum][facetNum] = new CategoryPath(Integer .toString(random.nextInt(7)), Integer.toString(random.nextInt(10))); } } index(indexDir, taxoDir, MULTI_IPARAMS, docTitles, docTexts, cPaths); } /** * More advanced method for specifying custom indexing params, doc texts, * doc titles and category paths. */ public static void index(Directory indexDir, Directory taxoDir, FacetIndexingParams iParams, String[] docTitles, String[] docTexts, CategoryPath[][] cPaths) throws Exception { // create and open an index writer IndexWriter iw = new IndexWriter(indexDir, new IndexWriterConfig( ExampleUtils.EXAMPLE_VER, SimpleUtils.analyzer).setOpenMode(OpenMode.CREATE)); // create and open a taxonomy writer DirectoryTaxonomyWriter taxo = new DirectoryTaxonomyWriter(taxoDir, OpenMode.CREATE); index(iw, taxo, iParams, docTitles, docTexts, cPaths); } /** * More advanced method for specifying custom indexing params, doc texts, * doc titles and category paths. * <p> * Create an index, and adds to it sample documents and facets. * @throws Exception * on error (no detailed exception handling here for sample * simplicity */ public static void index(IndexWriter iw, DirectoryTaxonomyWriter taxo, FacetIndexingParams iParams, String[] docTitles, String[] docTexts, CategoryPath[][] cPaths) throws Exception { // loop over sample documents int nDocsAdded = 0; int nFacetsAdded = 0; for (int docNum = 0; docNum < SimpleUtils.docTexts.length; docNum++) { List<CategoryPath> facetList = Arrays.asList(cPaths[docNum]); // we do not alter indexing parameters! // a category document builder will add the categories to a document // once build() is called CategoryDocumentBuilder categoryDocBuilder = new CategoryDocumentBuilder( taxo, iParams).setCategoryPaths(facetList); // create a plain Lucene document and add some regular Lucene fields // to it Document doc = new Document(); doc.add(new TextField(SimpleUtils.TITLE, docTitles[docNum], Field.Store.YES)); doc.add(new TextField(SimpleUtils.TEXT, docTexts[docNum], Field.Store.NO)); // finally add the document to the index categoryDocBuilder.build(doc); iw.addDocument(doc); nDocsAdded++; nFacetsAdded += facetList.size(); } // commit changes. // we commit changes to the taxonomy index prior to committing them to // the search index. // this is important, so that all facets referred to by documents in the // search index // will indeed exist in the taxonomy index. taxo.commit(); iw.commit(); // close the taxonomy index and the index - all modifications are // now safely in the provided directories: indexDir and taxoDir. taxo.close(); iw.close(); ExampleUtils.log("Indexed " + nDocsAdded + " documents with overall " + nFacetsAdded + " facets."); } public static void main(String[] args) throws Exception { index(new RAMDirectory(), new RAMDirectory()); } }