package org.apache.lucene.search.suggest; import java.io.ByteArrayInputStream; import java.io.IOException; import java.io.InputStream; import java.util.AbstractMap.SimpleEntry; import java.util.ArrayList; import java.util.List; import java.util.Map; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util.TestUtil; import org.apache.lucene.util.TestUtil; import org.junit.Test; /* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ public class FileDictionaryTest extends LuceneTestCase { private Map.Entry<List<String>, String> generateFileEntry(String fieldDelimiter, boolean hasWeight, boolean hasPayload) { List<String> entryValues = new ArrayList<>(); StringBuilder sb = new StringBuilder(); String term = TestUtil.randomSimpleString(random(), 1, 300); sb.append(term); entryValues.add(term); if (hasWeight) { sb.append(fieldDelimiter); long weight = TestUtil.nextLong(random(), Long.MIN_VALUE, Long.MAX_VALUE); sb.append(weight); entryValues.add(String.valueOf(weight)); } if (hasPayload) { sb.append(fieldDelimiter); String payload = TestUtil.randomSimpleString(random(), 1, 300); sb.append(payload); entryValues.add(payload); } sb.append("\n"); return new SimpleEntry<>(entryValues, sb.toString()); } private Map.Entry<List<List<String>>,String> generateFileInput(int count, String fieldDelimiter, boolean hasWeights, boolean hasPayloads) { List<List<String>> entries = new ArrayList<>(); StringBuilder sb = new StringBuilder(); boolean hasPayload = hasPayloads; for (int i = 0; i < count; i++) { if (hasPayloads) { hasPayload = (i==0) ? true : random().nextBoolean(); } Map.Entry<List<String>, String> entrySet = generateFileEntry(fieldDelimiter, (!hasPayloads && hasWeights) ? random().nextBoolean() : hasWeights, hasPayload); entries.add(entrySet.getKey()); sb.append(entrySet.getValue()); } return new SimpleEntry<>(entries, sb.toString()); } @Test public void testFileWithTerm() throws IOException { Map.Entry<List<List<String>>,String> fileInput = generateFileInput(atLeast(100), FileDictionary.DEFAULT_FIELD_DELIMITER, false, false); InputStream inputReader = new ByteArrayInputStream(fileInput.getValue().getBytes("UTF-8")); FileDictionary dictionary = new FileDictionary(inputReader); List<List<String>> entries = fileInput.getKey(); InputIterator inputIter = dictionary.getEntryIterator(); assertFalse(inputIter.hasPayloads()); BytesRef term; int count = 0; while((term = inputIter.next()) != null) { assertTrue(entries.size() > count); List<String> entry = entries.get(count); assertTrue(entry.size() >= 1); // at least a term assertEquals(entry.get(0), term.utf8ToString()); assertEquals(1, inputIter.weight()); assertNull(inputIter.payload()); count++; } assertEquals(count, entries.size()); } @Test public void testFileWithWeight() throws IOException { Map.Entry<List<List<String>>,String> fileInput = generateFileInput(atLeast(100), FileDictionary.DEFAULT_FIELD_DELIMITER, true, false); InputStream inputReader = new ByteArrayInputStream(fileInput.getValue().getBytes("UTF-8")); FileDictionary dictionary = new FileDictionary(inputReader); List<List<String>> entries = fileInput.getKey(); InputIterator inputIter = dictionary.getEntryIterator(); assertFalse(inputIter.hasPayloads()); BytesRef term; int count = 0; while((term = inputIter.next()) != null) { assertTrue(entries.size() > count); List<String> entry = entries.get(count); assertTrue(entry.size() >= 1); // at least a term assertEquals(entry.get(0), term.utf8ToString()); assertEquals((entry.size() == 2) ? Long.parseLong(entry.get(1)) : 1, inputIter.weight()); assertNull(inputIter.payload()); count++; } assertEquals(count, entries.size()); } @Test public void testFileWithWeightAndPayload() throws IOException { Map.Entry<List<List<String>>,String> fileInput = generateFileInput(atLeast(100), FileDictionary.DEFAULT_FIELD_DELIMITER, true, true); InputStream inputReader = new ByteArrayInputStream(fileInput.getValue().getBytes("UTF-8")); FileDictionary dictionary = new FileDictionary(inputReader); List<List<String>> entries = fileInput.getKey(); InputIterator inputIter = dictionary.getEntryIterator(); assertTrue(inputIter.hasPayloads()); BytesRef term; int count = 0; while((term = inputIter.next()) != null) { assertTrue(entries.size() > count); List<String> entry = entries.get(count); assertTrue(entry.size() >= 2); // at least term and weight assertEquals(entry.get(0), term.utf8ToString()); assertEquals(Long.parseLong(entry.get(1)), inputIter.weight()); if (entry.size() == 3) { assertEquals(entry.get(2), inputIter.payload().utf8ToString()); } else { assertEquals(inputIter.payload().length, 0); } count++; } assertEquals(count, entries.size()); } @Test public void testFileWithOneEntry() throws IOException { Map.Entry<List<List<String>>,String> fileInput = generateFileInput(1, FileDictionary.DEFAULT_FIELD_DELIMITER, true, true); InputStream inputReader = new ByteArrayInputStream(fileInput.getValue().getBytes("UTF-8")); FileDictionary dictionary = new FileDictionary(inputReader); List<List<String>> entries = fileInput.getKey(); InputIterator inputIter = dictionary.getEntryIterator(); assertTrue(inputIter.hasPayloads()); BytesRef term; int count = 0; while((term = inputIter.next()) != null) { assertTrue(entries.size() > count); List<String> entry = entries.get(count); assertTrue(entry.size() >= 2); // at least term and weight assertEquals(entry.get(0), term.utf8ToString()); assertEquals(Long.parseLong(entry.get(1)), inputIter.weight()); if (entry.size() == 3) { assertEquals(entry.get(2), inputIter.payload().utf8ToString()); } else { assertEquals(inputIter.payload().length, 0); } count++; } assertEquals(count, entries.size()); } @Test public void testFileWithDifferentDelimiter() throws IOException { Map.Entry<List<List<String>>,String> fileInput = generateFileInput(atLeast(100), " , ", true, true); InputStream inputReader = new ByteArrayInputStream(fileInput.getValue().getBytes("UTF-8")); FileDictionary dictionary = new FileDictionary(inputReader, " , "); List<List<String>> entries = fileInput.getKey(); InputIterator inputIter = dictionary.getEntryIterator(); assertTrue(inputIter.hasPayloads()); BytesRef term; int count = 0; while((term = inputIter.next()) != null) { assertTrue(entries.size() > count); List<String> entry = entries.get(count); assertTrue(entry.size() >= 2); // at least term and weight assertEquals(entry.get(0), term.utf8ToString()); assertEquals(Long.parseLong(entry.get(1)), inputIter.weight()); if (entry.size() == 3) { assertEquals(entry.get(2), inputIter.payload().utf8ToString()); } else { assertEquals(inputIter.payload().length, 0); } count++; } assertEquals(count, entries.size()); } }