/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.solr.morphlines.solr; import java.io.File; import java.util.ArrayList; import java.util.Collections; import java.util.Comparator; import java.util.Iterator; import java.util.List; import org.apache.avro.Schema.Field; import org.apache.avro.file.DataFileReader; import org.apache.avro.file.FileReader; import org.apache.avro.generic.GenericData; import org.apache.avro.generic.GenericDatumReader; import org.apache.lucene.util.Constants; import org.apache.lucene.util.LuceneTestCase.Slow; import org.apache.lucene.util.LuceneTestCase.SuppressCodecs; import org.apache.solr.client.solrj.SolrQuery; import org.apache.solr.client.solrj.response.QueryResponse; import org.apache.solr.common.SolrDocument; import org.junit.BeforeClass; import org.kitesdk.morphline.api.Record; import org.kitesdk.morphline.base.Fields; import org.kitesdk.morphline.base.Notifications; import com.carrotsearch.randomizedtesting.annotations.ThreadLeakAction; import com.carrotsearch.randomizedtesting.annotations.ThreadLeakAction.Action; import com.carrotsearch.randomizedtesting.annotations.ThreadLeakLingering; import com.carrotsearch.randomizedtesting.annotations.ThreadLeakScope; import com.carrotsearch.randomizedtesting.annotations.ThreadLeakScope.Scope; import com.carrotsearch.randomizedtesting.annotations.ThreadLeakZombies; import com.carrotsearch.randomizedtesting.annotations.ThreadLeakZombies.Consequence; import com.google.common.base.Joiner; import com.google.common.base.Preconditions; import com.google.common.io.Files; @ThreadLeakAction({Action.WARN}) @ThreadLeakLingering(linger = 0) @ThreadLeakZombies(Consequence.CONTINUE) @ThreadLeakScope(Scope.NONE) @SuppressCodecs({"Lucene3x", "Lucene40"}) @Slow public class SolrMorphlineZkAvroTest extends AbstractSolrMorphlineZkTestBase { @BeforeClass public static void beforeClass2() { assumeFalse("FIXME: This test fails under Java 8 due to the Saxon dependency - see SOLR-1301", Constants.JRE_IS_MINIMUM_JAVA8); assumeFalse("FIXME: This test fails under J9 due to the Saxon dependency - see SOLR-1301", System.getProperty("java.vm.info", "<?>").contains("IBM J9")); } @Override public void doTest() throws Exception { Joiner joiner = Joiner.on(File.separator); File file = new File(joiner.join(RESOURCES_DIR, "test-documents", "sample-statuses-20120906-141433-medium.avro")); waitForRecoveriesToFinish(false); // load avro records via morphline and zk into solr morphline = parse("test-morphlines" + File.separator + "tutorialReadAvroContainer"); Record record = new Record(); byte[] body = Files.toByteArray(file); record.put(Fields.ATTACHMENT_BODY, body); startSession(); Notifications.notifyBeginTransaction(morphline); assertTrue(morphline.process(record)); assertEquals(1, collector.getNumStartEvents()); commit(); // fetch sorted result set from solr QueryResponse rsp = cloudClient.query(new SolrQuery("*:*").setRows(100000).addSort("id", SolrQuery.ORDER.asc)); assertEquals(2104, collector.getRecords().size()); assertEquals(collector.getRecords().size(), rsp.getResults().size()); Collections.sort(collector.getRecords(), new Comparator<Record>() { @Override public int compare(Record r1, Record r2) { return r1.get("id").toString().compareTo(r2.get("id").toString()); } }); // fetch test input data and sort like solr result set List<GenericData.Record> records = new ArrayList(); FileReader<GenericData.Record> reader = new DataFileReader(file, new GenericDatumReader()); while (reader.hasNext()) { GenericData.Record expected = reader.next(); records.add(expected); } assertEquals(collector.getRecords().size(), records.size()); Collections.sort(records, new Comparator<GenericData.Record>() { @Override public int compare(GenericData.Record r1, GenericData.Record r2) { return r1.get("id").toString().compareTo(r2.get("id").toString()); } }); Object lastId = null; for (int i = 0; i < records.size(); i++) { //System.out.println("myrec" + i + ":" + records.get(i)); Object id = records.get(i); if (id != null && id.equals(lastId)) { throw new IllegalStateException("Detected duplicate id. Test input data must not contain duplicate ids!"); } lastId = id; } for (int i = 0; i < records.size(); i++) { //System.out.println("myrsp" + i + ":" + rsp.getResults().get(i)); } Iterator<SolrDocument> rspIter = rsp.getResults().iterator(); for (int i = 0; i < records.size(); i++) { // verify morphline spat out expected data Record actual = collector.getRecords().get(i); GenericData.Record expected = records.get(i); Preconditions.checkNotNull(expected); assertTweetEquals(expected, actual, i); // verify Solr result set contains expected data actual = new Record(); actual.getFields().putAll(next(rspIter)); assertTweetEquals(expected, actual, i); } Notifications.notifyRollbackTransaction(morphline); Notifications.notifyShutdown(morphline); cloudClient.shutdown(); } private void assertTweetEquals(GenericData.Record expected, Record actual, int i) { Preconditions.checkNotNull(expected); Preconditions.checkNotNull(actual); // System.out.println("\n\nexpected: " + toString(expected)); // System.out.println("actual: " + actual); String[] fieldNames = new String[] { "id", "in_reply_to_status_id", "in_reply_to_user_id", "retweet_count", "text", }; for (String fieldName : fieldNames) { assertEquals( i + " fieldName: " + fieldName, expected.get(fieldName).toString(), actual.getFirstValue(fieldName).toString()); } } private String toString(GenericData.Record avroRecord) { Record record = new Record(); for (Field field : avroRecord.getSchema().getFields()) { record.put(field.name(), avroRecord.get(field.pos())); } return record.toString(); // prints sorted by key for human readability } }