/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.lucene.index; import java.io.IOException; import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.FieldType; import org.apache.lucene.document.TextField; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.TermQuery; import org.apache.lucene.store.Directory; import org.apache.lucene.util.LuceneTestCase; /** Test that creates way, way, way too many fields */ public class TestManyFields extends LuceneTestCase { private static final FieldType storedTextType = new FieldType(TextField.TYPE_NOT_STORED); public void testManyFields() throws IOException { Directory dir = newDirectory(); IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random())) .setMaxBufferedDocs(10)); for(int j=0;j<100;j++) { Document doc = new Document(); doc.add(newField("a"+j, "aaa" + j, storedTextType)); doc.add(newField("b"+j, "aaa" + j, storedTextType)); doc.add(newField("c"+j, "aaa" + j, storedTextType)); doc.add(newField("d"+j, "aaa", storedTextType)); doc.add(newField("e"+j, "aaa", storedTextType)); doc.add(newField("f"+j, "aaa", storedTextType)); writer.addDocument(doc); } writer.close(); IndexReader reader = DirectoryReader.open(dir); assertEquals(100, reader.maxDoc()); assertEquals(100, reader.numDocs()); for(int j=0;j<100;j++) { assertEquals(1, reader.docFreq(new Term("a"+j, "aaa"+j))); assertEquals(1, reader.docFreq(new Term("b"+j, "aaa"+j))); assertEquals(1, reader.docFreq(new Term("c"+j, "aaa"+j))); assertEquals(1, reader.docFreq(new Term("d"+j, "aaa"))); assertEquals(1, reader.docFreq(new Term("e"+j, "aaa"))); assertEquals(1, reader.docFreq(new Term("f"+j, "aaa"))); } reader.close(); dir.close(); } public void testDiverseDocs() throws IOException { Directory dir = newDirectory(); IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random())) .setRAMBufferSizeMB(0.5)); int n = atLeast(1); for(int i=0;i<n;i++) { // First, docs where every term is unique (heavy on // Posting instances) for(int j=0;j<100;j++) { Document doc = new Document(); for(int k=0;k<100;k++) { doc.add(newField("field", Integer.toString(random().nextInt()), storedTextType)); } writer.addDocument(doc); } // Next, many single term docs where only one term // occurs (heavy on byte blocks) for(int j=0;j<100;j++) { Document doc = new Document(); doc.add(newField("field", "aaa aaa aaa aaa aaa aaa aaa aaa aaa aaa", storedTextType)); writer.addDocument(doc); } // Next, many single term docs where only one term // occurs but the terms are very long (heavy on // char[] arrays) for(int j=0;j<100;j++) { StringBuilder b = new StringBuilder(); String x = Integer.toString(j) + "."; for(int k=0;k<1000;k++) b.append(x); String longTerm = b.toString(); Document doc = new Document(); doc.add(newField("field", longTerm, storedTextType)); writer.addDocument(doc); } } writer.close(); IndexReader reader = DirectoryReader.open(dir); IndexSearcher searcher = newSearcher(reader); int totalHits = searcher.search(new TermQuery(new Term("field", "aaa")), 1).totalHits; assertEquals(n*100, totalHits); reader.close(); dir.close(); } // LUCENE-4398 public void testRotatingFieldNames() throws Exception { Directory dir = newFSDirectory(createTempDir("TestIndexWriter.testChangingFields")); IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random())); iwc.setRAMBufferSizeMB(0.2); iwc.setMaxBufferedDocs(-1); IndexWriter w = new IndexWriter(dir, iwc); int upto = 0; FieldType ft = new FieldType(TextField.TYPE_NOT_STORED); ft.setOmitNorms(true); int firstDocCount = -1; for(int iter=0;iter<10;iter++) { final int startFlushCount = w.getFlushCount(); int docCount = 0; while(w.getFlushCount() == startFlushCount) { Document doc = new Document(); for(int i=0;i<10;i++) { doc.add(new Field("field" + (upto++), "content", ft)); } w.addDocument(doc); docCount++; } if (VERBOSE) { System.out.println("TEST: iter=" + iter + " flushed after docCount=" + docCount); } if (iter == 0) { firstDocCount = docCount; } assertTrue("flushed after too few docs: first segment flushed at docCount=" + firstDocCount + ", but current segment flushed after docCount=" + docCount + "; iter=" + iter, ((float) docCount) / firstDocCount > 0.9); if (upto > 5000) { // Start re-using field names after a while // ... important because otherwise we can OOME due // to too many FieldInfo instances. upto = 0; } } w.close(); dir.close(); } }