/*
* Copyright 2013 Cloudera Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.kitesdk.morphline.solr;
import com.google.common.collect.ImmutableMap;
import com.google.common.io.Files;
import java.io.File;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.Map;
import org.apache.commons.io.FileUtils;
import org.apache.solr.common.SolrDocumentList;
import org.apache.solr.schema.IndexSchema;
import org.junit.Test;
import org.kitesdk.morphline.api.MorphlineContext;
import org.kitesdk.morphline.api.Record;
import org.kitesdk.morphline.base.Fields;
import org.kitesdk.morphline.base.Notifications;
public class SolrMorphlineTest extends AbstractSolrMorphlineTest {
private static final File SOLR_INSTANCE_DIR = new File(RESOURCES_DIR + "/solr");
@Test
public void testLoadSchema() throws Exception {
SolrLocator locator = new SolrLocator(new MorphlineContext.Builder().build());
locator.setCollectionName("collection1");
locator.setSolrHomeDir(SOLR_INSTANCE_DIR + File.separator + "collection1");
assertNotNull(locator.getIndexSchema());
}
@Test
public void testLoadManagedSchema() throws Exception {
// Copy the collection1 config files, so we don't have to keep multiple
// copies of the auxiliary files in source
File solrHomeDir = Files.createTempDir();
solrHomeDir.deleteOnExit();
File collection1Dir = new File(SOLR_INSTANCE_DIR, "collection1");
FileUtils.copyDirectory(collection1Dir, solrHomeDir);
// Copy in the managed collection files, remove the schema.xml since the
// managed schema uses a generated one
File managedCollectionDir = new File(SOLR_INSTANCE_DIR, "managedSchemaCollection");
FileUtils.copyDirectory(managedCollectionDir, solrHomeDir);
File oldSchemaXml = new File(solrHomeDir + File.separator + "conf" + File.separator + "schema.xml");
oldSchemaXml.delete();
assertFalse(oldSchemaXml.exists());
SolrLocator locator = new SolrLocator(new MorphlineContext.Builder().build());
locator.setCollectionName("managedSchemaCollection");
locator.setSolrHomeDir(solrHomeDir.getAbsolutePath());
IndexSchema schema = locator.getIndexSchema();
assertNotNull(schema);
schema.getField("test-managed-morphline-field");
}
@Test
public void testLoadSolrBasic() throws Exception {
//System.setProperty("ENV_SOLR_HOME", testSolrHome + File.separator + "collection1");
morphline = createMorphline("test-morphlines" + File.separator + "loadSolrBasic");
//System.clearProperty("ENV_SOLR_HOME");
Record record = new Record();
record.put(Fields.ID, "id0");
record.put("first_name", "Nadja"); // will be sanitized
startSession();
Notifications.notifyBeginTransaction(morphline);
assertTrue(morphline.process(record));
assertEquals(1, collector.getNumStartEvents());
Notifications.notifyCommitTransaction(morphline);
Record expected = new Record();
expected.put(Fields.ID, "id0");
assertEquals(Arrays.asList(expected), collector.getRecords());
assertEquals(1, queryResultSetSize("*:*"));
Notifications.notifyRollbackTransaction(morphline);
Notifications.notifyShutdown(morphline);
}
public void testLoadSolrWithPartialUpdate() throws Exception {
morphline = createMorphline("test-morphlines" + File.separator + "loadSolrBasic");
// insert a document with a bunch of fields
Record record = new Record();
record.put(Fields.ID, "id0");
record.put("user_friends_count", 123);
record.put("text", "myText");
Notifications.notifyBeginTransaction(morphline);
assertTrue(morphline.process(record));
assertEquals(1, collector.getRecords().size());
assertEquals(1, query("*:*").getResults().size());
// remove "text" field; retain other fields as-is
record = new Record();
record.put(Fields.ID, "id0");
Map<String, Object> map = new HashMap();
map.put("set", null);
record.put("text", map);
assertTrue(morphline.process(record));
SolrDocumentList docs = query("*:*").getResults();
assertEquals(1, docs.size());
assertEquals("id0", docs.get(0).getFirstValue(Fields.ID));
assertEquals(123, docs.get(0).getFirstValue("user_friends_count"));
assertNull(docs.get(0).getFieldValue("text"));
// set "text" field to "hello world"; retain other fields as-is
record = new Record();
record.put(Fields.ID, "id0");
record.put("text", Collections.singletonMap("set", "hello world"));
assertTrue(morphline.process(record));
docs = query("*:*").getResults();
assertEquals(1, docs.size());
assertEquals("id0", docs.get(0).getFirstValue(Fields.ID));
assertEquals(123, docs.get(0).getFirstValue("user_friends_count"));
assertEquals("hello world", docs.get(0).getFirstValue("text"));
// add "goodbye moon" to text field; retain other fields as-is
record = new Record();
record.put(Fields.ID, "id0");
record.put("text", ImmutableMap.of("add", "goodbye moon"));
assertTrue(morphline.process(record));
docs = query("*:*").getResults();
assertEquals(1, docs.size());
assertEquals("id0", docs.get(0).getFirstValue(Fields.ID));
assertEquals(123, docs.get(0).getFirstValue("user_friends_count"));
assertEquals(Arrays.asList("hello world", "goodbye moon"), docs.get(0).get("text"));
// set "text" field to multiple values ["hello sun", "goodbye mars"]; retain other fields as-is
record = new Record();
record.put(Fields.ID, "id0");
record.put("text", ImmutableMap.of("set", Arrays.asList("hello sun", "goodbye mars")));
assertTrue(morphline.process(record));
docs = query("*:*").getResults();
assertEquals(1, docs.size());
assertEquals("id0", docs.get(0).getFirstValue(Fields.ID));
assertEquals(123, docs.get(0).getFirstValue("user_friends_count"));
assertEquals(Arrays.asList("hello sun", "goodbye mars"), docs.get(0).getFieldValue("text"));
// increment user_friends_count by 5; retain other fields as-is
record = new Record();
record.put(Fields.ID, "id0");
record.put("user_friends_count", ImmutableMap.of("inc", 5));
assertTrue(morphline.process(record));
docs = query("*:*").getResults();
assertEquals(1, docs.size());
assertEquals("id0", docs.get(0).getFirstValue(Fields.ID));
assertEquals(128, docs.get(0).getFirstValue("user_friends_count"));
assertEquals(Arrays.asList("hello sun", "goodbye mars"), docs.get(0).get("text"));
Notifications.notifyCommitTransaction(morphline);
Notifications.notifyShutdown(morphline);
}
@Test
public void testLoadSolrWithDelete() throws Exception {
morphline = createMorphline("test-morphlines" + File.separator + "loadSolrBasic");
// insert
Record record = new Record();
record.replaceValues(Fields.ID, "id0");
record.replaceValues("first_name", "Nadja"); // will be sanitized
Notifications.notifyBeginTransaction(morphline);
assertTrue(morphline.process(record.copy()));
assertEquals(1, query("*:*").getResults().size());
// insert
record = new Record();
record.replaceValues(Fields.ID, "id1");
assertTrue(morphline.process(record.copy()));
assertEquals(2, query("*:*").getResults().size());
// deleteById
record = new Record();
record.replaceValues(LoadSolrBuilder.LOAD_SOLR_DELETE_BY_ID, "id0");
assertTrue(morphline.process(record.copy()));
assertEquals(1, query("*:*").getResults().size());
// deleteById
record = new Record();
record.replaceValues(LoadSolrBuilder.LOAD_SOLR_DELETE_BY_ID, "idNonExistent");
assertTrue(morphline.process(record.copy()));
assertEquals(1, query("*:*").getResults().size());
// insert
record = new Record();
record.replaceValues(Fields.ID, "id2");
assertTrue(morphline.process(record.copy()));
assertEquals(2, query("*:*").getResults().size());
// insert
record = new Record();
record.replaceValues(Fields.ID, "id200");
assertTrue(morphline.process(record.copy()));
assertEquals(3, query("*:*").getResults().size());
// deleteByQuery
record = new Record();
record.replaceValues(LoadSolrBuilder.LOAD_SOLR_DELETE_BY_QUERY, "id:id2*");
record.put(LoadSolrBuilder.LOAD_SOLR_DELETE_BY_QUERY, "text:NonExistent");
assertTrue(morphline.process(record.copy()));
assertEquals(1, query("*:*").getResults().size());
// deleteByQuery
record = new Record();
record.put(LoadSolrBuilder.LOAD_SOLR_DELETE_BY_QUERY, "id:NonExistent");
record.put(LoadSolrBuilder.LOAD_SOLR_DELETE_BY_QUERY, "text:NonExistent");
assertTrue(morphline.process(record.copy()));
assertEquals(1, query("*:*").getResults().size());
// insert
record = new Record();
record.replaceValues(Fields.ID, "id3");
assertTrue(morphline.process(record.copy()));
assertEquals(2, query("*:*").getResults().size());
// verify
SolrDocumentList docs = query("*:*").getResults();
assertEquals(2, docs.size());
assertEquals("id1", docs.get(0).getFirstValue(Fields.ID));
assertNull(docs.get(0).getFirstValue(LoadSolrBuilder.LOAD_SOLR_DELETE_BY_ID));
assertNull(docs.get(0).getFirstValue(LoadSolrBuilder.LOAD_SOLR_DELETE_BY_QUERY));
assertEquals("id3", docs.get(1).getFirstValue(Fields.ID));
assertNull(docs.get(1).getFirstValue(LoadSolrBuilder.LOAD_SOLR_DELETE_BY_ID));
assertNull(docs.get(1).getFirstValue(LoadSolrBuilder.LOAD_SOLR_DELETE_BY_QUERY));
Notifications.notifyRollbackTransaction(morphline);
Notifications.notifyShutdown(morphline);
}
@Test
public void testLoadSolrWithChildDocuments() throws Exception {
morphline = createMorphline("test-morphlines" + File.separator + "loadSolrWithChildDocuments");
Record record = new Record();
record.put(Fields.ID, "id0");
startSession();
Notifications.notifyBeginTransaction(morphline);
assertTrue(morphline.process(record));
assertEquals(1, collector.getNumStartEvents());
Notifications.notifyCommitTransaction(morphline);
// This parent block join returns the parent records for records
// where the child documents contain "bar" in the id field.
SolrDocumentList docs = query("{!parent which='content_type:parent'}id:bar").getResults();
assertEquals(1, docs.size());
assertEquals("id0", docs.get(0).getFirstValue(Fields.ID));
docs = query("*:*").getResults();
assertEquals(3, docs.size());
}
@Test
public void testTokenizeText() throws Exception {
morphline = createMorphline("test-morphlines" + File.separator + "tokenizeText");
for (int i = 0; i < 3; i++) {
Record record = new Record();
record.put(Fields.MESSAGE, "Hello World!");
record.put(Fields.MESSAGE, "\nFoo@Bar.com #%()123");
Record expected = record.copy();
expected.getFields().putAll("tokens", Arrays.asList("hello", "world", "foo", "bar.com", "123"));
collector.reset();
startSession();
Notifications.notifyBeginTransaction(morphline);
assertTrue(morphline.process(record));
assertEquals(1, collector.getNumStartEvents());
Notifications.notifyCommitTransaction(morphline);
assertEquals(expected, collector.getFirstRecord());
}
}
}