/* * Carrot2 project. * * Copyright (C) 2002-2016, Dawid Weiss, Stanisław Osiński. * All rights reserved. * * Refer to the full license file "carrot2.LICENSE" * in the root folder of the repository checkout or at: * http://www.carrot2.org/carrot2.LICENSE */ package org.carrot2.core.test; import java.util.*; import org.carrot2.core.Document; import com.carrotsearch.randomizedtesting.RandomizedContext; /** * */ public class TestDocumentFactory { protected static final Map<String, IDataGenerator<?>> DEFAULT_GENERATORS; static { DEFAULT_GENERATORS = new HashMap<String, IDataGenerator<?>>(); DEFAULT_GENERATORS.put(Document.TITLE, new SentenceGenerator(3, true)); DEFAULT_GENERATORS.put(Document.SUMMARY, new SentenceGenerator(10)); DEFAULT_GENERATORS.put(Document.CONTENT_URL, new UrlGenerator(3)); } protected static final Set<String> DEFAULT_FIELDS; static { DEFAULT_FIELDS = new HashSet<String>(); DEFAULT_FIELDS.addAll(Arrays.asList(Document.TITLE, Document.SUMMARY, Document.CONTENT_URL)); } public static final TestDocumentFactory DEFAULT = new TestDocumentFactory(DEFAULT_GENERATORS, DEFAULT_FIELDS); private final Map<String, IDataGenerator<?>> generators; private final Set<String> fields; public TestDocumentFactory(Map<String, IDataGenerator<?>> generators, Set<String> fields) { this.generators = generators; this.fields = fields; } public List<Document> generate(int number) { return generate(number, fields); } public List<Document> generate(int number, Set<String> fieldsToGenerate) { return generate(number, fieldsToGenerate, Collections .<String, IDataGenerator<?>> emptyMap()); } public List<Document> generate(int number, Set<String> fieldsToGenerate, Map<String, IDataGenerator<?>> customGenerators) { final List<Document> result = new ArrayList<Document>(number); for (int i = 0; i < number; i++) { final Document document = new Document(); for (final String field : fieldsToGenerate) { final IDataGenerator<?> generator = resolveGenerator(customGenerators, field); document.setField(field, generator.generate(i)); } result.add(document); } return result; } private IDataGenerator<?> resolveGenerator( Map<String, IDataGenerator<?>> customGenerators, String field) { IDataGenerator<?> generator = customGenerators.get(field); if (generator == null) { generator = generators.get(field); } if (generator == null) { throw new RuntimeException("No generator for field: " + field); } return generator; } public static interface IDataGenerator<T> { public T generate(int sequentialNumber); } private static class SentenceGenerator implements IDataGenerator<String> { private static final String [] WORDS = new String [] { "test", "data", "apple", "London", "PC", "disk", "eclipse", "bank", "pilot", "CD" }; private final int words; private final boolean prependSequentialNumber; public SentenceGenerator(int words) { this(words, false); } public SentenceGenerator(int words, boolean prependSequentialNumber) { this.words = words; this.prependSequentialNumber = prependSequentialNumber; } public String generate(int sequentialNumber) { final Random rnd = RandomizedContext.current().getRandom(); final StringBuilder builder = new StringBuilder(); if (prependSequentialNumber) { builder.append("[" + sequentialNumber + "] "); } for (int i = 0; i < words - 1; i++) { builder.append(WORDS[rnd.nextInt(WORDS.length)]); builder.append(" "); } builder.append(WORDS[rnd.nextInt(WORDS.length)]); return builder.toString(); } } private static class UrlGenerator implements IDataGenerator<String> { private static final String [] ELEMENTS = new String [] { "www", "mail", "carrot2", "test", "alpha", "beta" }; private static final String [] DOMAINS = new String [] { "pl", "co.uk", "com", "org", "net" }; private final int length; public UrlGenerator(int length) { this.length = length; } public String generate(int sequentialNumber) { final Random rnd = RandomizedContext.current().getRandom(); final StringBuilder builder = new StringBuilder(); for (int i = 0; i < length - 1; i++) { builder.append(ELEMENTS[rnd.nextInt(ELEMENTS.length)]); builder.append("."); } builder.append(DOMAINS[rnd.nextInt(DOMAINS.length)]); return builder.toString(); } } }