package org.apache.lucene.facet.index.streaming; import java.io.IOException; import java.util.HashMap; import java.util.Iterator; import java.util.Map.Entry; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.facet.index.CategoryListPayloadStream; import org.apache.lucene.facet.index.attributes.OrdinalProperty; import org.apache.lucene.facet.index.params.CategoryListParams; import org.apache.lucene.facet.index.params.FacetIndexingParams; import org.apache.lucene.facet.taxonomy.CategoryPath; import org.apache.lucene.facet.util.PartitionsUtils; import org.apache.lucene.util.encoding.IntEncoder; /* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ /** * {@link CategoryListTokenizer} for facet counting * * @lucene.experimental */ public class CountingListTokenizer extends CategoryListTokenizer { /** A table for retrieving payload streams by category-list name. */ protected HashMap<String, CategoryListPayloadStream> payloadStreamsByName = new HashMap<String, CategoryListPayloadStream>(); /** An iterator over the payload streams */ protected Iterator<Entry<String, CategoryListPayloadStream>> payloadStreamIterator; public CountingListTokenizer(TokenStream input, FacetIndexingParams indexingParams) { super(input, indexingParams); this.payloadStreamsByName = new HashMap<String, CategoryListPayloadStream>(); } @Override protected void handleStartOfInput() throws IOException { payloadStreamsByName.clear(); payloadStreamIterator = null; } @Override public final boolean incrementToken() throws IOException { if (input.incrementToken()) { if (this.categoryAttribute != null) { OrdinalProperty ordinalProperty = (OrdinalProperty) this.categoryAttribute .getProperty(OrdinalProperty.class); if (ordinalProperty != null && legalCategory()) { CategoryPath categoryPath = this.categoryAttribute .getCategoryPath(); int ordinal = ordinalProperty.getOrdinal(); CategoryListPayloadStream payloadStream = getPayloadStream( categoryPath, ordinal); int partitionSize = indexingParams.getPartitionSize(); payloadStream.appendIntToStream(ordinal % partitionSize); } } return true; } if (this.payloadStreamIterator == null) { this.handleEndOfInput(); this.payloadStreamIterator = this.payloadStreamsByName.entrySet() .iterator(); } if (this.payloadStreamIterator.hasNext()) { Entry<String, CategoryListPayloadStream> entry = this.payloadStreamIterator .next(); String countingListName = entry.getKey(); int length = countingListName.length(); this.termAttribute.resizeBuffer(length); countingListName.getChars(0, length, termAttribute.buffer(), 0); this.termAttribute.setLength(length); CategoryListPayloadStream payloadStream = entry.getValue(); payload.bytes = payloadStream.convertStreamToByteArray(); payload.offset = 0; payload.length = payload.bytes.length; this.payloadAttribute.setPayload(payload); return true; } return false; } /** * A method which allows extending classes to filter the categories going * into the counting list. * * @return By default returns {@code true}, meaning the current category is * to be part of the counting list. For categories that should be * filtered, return {@code false}. */ protected boolean legalCategory() { return true; } protected CategoryListPayloadStream getPayloadStream( CategoryPath categoryPath, int ordinal) throws IOException { CategoryListParams clParams = this.indexingParams.getCategoryListParams(categoryPath); String name = PartitionsUtils.partitionNameByOrdinal(indexingParams, clParams, ordinal); CategoryListPayloadStream fps = payloadStreamsByName.get(name); if (fps == null) { IntEncoder encoder = clParams.createEncoder(); fps = new CategoryListPayloadStream(encoder); payloadStreamsByName.put(name, fps); } return fps; } }