/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.morphlines.solr;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import com.carrotsearch.randomizedtesting.annotations.ThreadLeakFilters;
import com.google.common.base.Preconditions;
import org.apache.avro.file.DataFileReader;
import org.apache.avro.file.FileReader;
import org.apache.avro.generic.GenericData;
import org.apache.avro.generic.GenericDatumReader;
import org.apache.lucene.util.LuceneTestCase.Slow;
import org.apache.solr.client.solrj.SolrQuery;
import org.apache.solr.client.solrj.request.UpdateRequest;
import org.apache.solr.client.solrj.response.QueryResponse;
import org.apache.solr.common.SolrDocument;
import org.apache.solr.util.BadHdfsThreadsFilter;
import org.junit.Test;
import org.kitesdk.morphline.api.Record;
import org.kitesdk.morphline.base.Fields;
import org.kitesdk.morphline.base.Notifications;
@ThreadLeakFilters(defaultFilters = true, filters = {
BadHdfsThreadsFilter.class // hdfs currently leaks thread(s)
})
@Slow
public class SolrMorphlineZkAvroTest extends AbstractSolrMorphlineZkTestBase {
@Test
public void test() throws Exception {
Path avro = Paths.get(RESOURCES_DIR).resolve("test-documents").resolve("sample-statuses-20120906-141433-medium.avro");
// load avro records via morphline and zk into solr
morphline = parse("test-morphlines" + File.separator + "tutorialReadAvroContainer");
Record record = new Record();
byte[] body = Files.readAllBytes(avro);
record.put(Fields.ATTACHMENT_BODY, body);
startSession();
Notifications.notifyBeginTransaction(morphline);
assertTrue(morphline.process(record));
assertEquals(1, collector.getNumStartEvents());
Notifications.notifyCommitTransaction(morphline);
new UpdateRequest().commit(cluster.getSolrClient(), COLLECTION);
// fetch sorted result set from solr
QueryResponse rsp = cluster.getSolrClient()
.query(COLLECTION, new SolrQuery("*:*").setRows(100000).addSort("id", SolrQuery.ORDER.asc));
assertEquals(2104, collector.getRecords().size());
assertEquals(collector.getRecords().size(), rsp.getResults().size());
Collections.sort(collector.getRecords(), (r1, r2) -> r1.get("id").toString().compareTo(r2.get("id").toString()));
// fetch test input data and sort like solr result set
List<GenericData.Record> records = new ArrayList<>();
FileReader<GenericData.Record> reader = new DataFileReader(avro.toFile(), new GenericDatumReader());
while (reader.hasNext()) {
GenericData.Record expected = reader.next();
records.add(expected);
}
assertEquals(collector.getRecords().size(), records.size());
Collections.sort(records, (r1, r2) -> r1.get("id").toString().compareTo(r2.get("id").toString()));
Object lastId = null;
for (int i = 0; i < records.size(); i++) {
//System.out.println("myrec" + i + ":" + records.get(i));
Object id = records.get(i);
if (id != null && id.equals(lastId)) {
throw new IllegalStateException("Detected duplicate id. Test input data must not contain duplicate ids!");
}
lastId = id;
}
for (int i = 0; i < records.size(); i++) {
//System.out.println("myrsp" + i + ":" + rsp.getResults().get(i));
}
Iterator<SolrDocument> rspIter = rsp.getResults().iterator();
for (int i = 0; i < records.size(); i++) {
// verify morphline spat out expected data
Record actual = collector.getRecords().get(i);
GenericData.Record expected = records.get(i);
Preconditions.checkNotNull(expected);
assertTweetEquals(expected, actual, i);
// verify Solr result set contains expected data
actual = new Record();
actual.getFields().putAll(next(rspIter));
assertTweetEquals(expected, actual, i);
}
Notifications.notifyRollbackTransaction(morphline);
Notifications.notifyShutdown(morphline);
}
private void assertTweetEquals(GenericData.Record expected, Record actual, int i) {
Preconditions.checkNotNull(expected);
Preconditions.checkNotNull(actual);
// System.out.println("\n\nexpected: " + toString(expected));
// System.out.println("actual: " + actual);
String[] fieldNames = new String[] {
"id",
"in_reply_to_status_id",
"in_reply_to_user_id",
"retweet_count",
"text",
};
for (String fieldName : fieldNames) {
assertEquals(
i + " fieldName: " + fieldName,
expected.get(fieldName).toString(),
actual.getFirstValue(fieldName).toString());
}
}
}