package org.codelibs.elasticsearch.minhash; import static org.codelibs.elasticsearch.runner.ElasticsearchClusterRunner.newConfigs; import java.util.Map; import org.codelibs.elasticsearch.runner.ElasticsearchClusterRunner; import org.elasticsearch.action.DocWriteResponse.Result; import org.elasticsearch.action.get.GetResponse; import org.elasticsearch.action.index.IndexResponse; import org.elasticsearch.client.Client; import org.elasticsearch.common.bytes.BytesArray; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.settings.Settings.Builder; import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.common.xcontent.XContentFactory; import org.elasticsearch.common.xcontent.XContentType; import org.elasticsearch.index.get.GetField; import org.junit.Assert; import com.google.common.collect.Lists; import junit.framework.TestCase; public class MinHashPluginTest extends TestCase { private ElasticsearchClusterRunner runner; private String clusterName; @Override protected void setUp() throws Exception { clusterName = "es-minhash-" + System.currentTimeMillis(); // create runner instance runner = new ElasticsearchClusterRunner(); // create ES nodes runner.onBuild(new ElasticsearchClusterRunner.Builder() { @Override public void build(final int number, final Builder settingsBuilder) { settingsBuilder.put("http.cors.enabled", true); settingsBuilder.put("http.cors.allow-origin", "*"); settingsBuilder.putArray("discovery.zen.ping.unicast.hosts", "localhost:9301-9310"); } }).build(newConfigs().clusterName(clusterName).numOfNode(1).pluginTypes("org.codelibs.elasticsearch.minhash.MinHashPlugin")); // wait for yellow status runner.ensureYellow(); } @Override protected void tearDown() throws Exception { // close runner runner.close(); // delete all files runner.clean(); } public void test_runEs() throws Exception { final String index = "test_index"; final String type = "test_type"; { // create an index final String indexSettings = "{\"index\":{\"analysis\":{\"analyzer\":{" + "\"minhash_analyzer1\":{\"type\":\"custom\",\"tokenizer\":\"standard\",\"filter\":[\"minhash\"]}," + "\"minhash_analyzer2\":{\"type\":\"custom\",\"tokenizer\":\"standard\",\"filter\":[\"my_minhashfilter1\"]}," + "\"minhash_analyzer3\":{\"type\":\"custom\",\"tokenizer\":\"standard\",\"filter\":[\"my_minhashfilter2\"]}" + "},\"filter\":{" + "\"my_minhashfilter1\":{\"type\":\"minhash\",\"seed\":1000}," + "\"my_minhashfilter2\":{\"type\":\"minhash\",\"bit\":2,\"size\":32,\"seed\":1000}" + "}}}}"; runner.createIndex(index, Settings.builder() .loadFromSource(indexSettings, XContentType.JSON).build()); runner.ensureYellow(index); // create a mapping final XContentBuilder mappingBuilder = XContentFactory .jsonBuilder()// .startObject()// .startObject(type)// .startObject("properties")// // id .startObject("id")// .field("type", "string")// .field("index", "not_analyzed")// .endObject()// // msg .startObject("msg")// .field("type", "string")// .field("copy_to", Lists.newArrayList("minhash_value1", "minhash_value2", "minhash_value3"))// .endObject()// // bits .startObject("bits")// .field("type", "string")// .field("store", true)// .endObject()// // minhash .startObject("minhash_value1")// .field("type", "minhash")// .field("minhash_analyzer", "minhash_analyzer1")// .field("copy_bits_to", "bits")// .endObject()// // minhash .startObject("minhash_value2")// .field("type", "minhash")// .field("minhash_analyzer", "minhash_analyzer2")// .endObject()// // minhash .startObject("minhash_value3")// .field("type", "minhash")// .field("minhash_analyzer", "minhash_analyzer3")// .endObject()// .endObject()// .endObject()// .endObject(); runner.createMapping(index, type, mappingBuilder); } if (!runner.indexExists(index)) { fail(); } // create 1000 documents for (int i = 1; i <= 1000; i++) { final IndexResponse indexResponse1 = runner.insert(index, type, String.valueOf(i), "{\"id\":\"" + i + "\",\"msg\":\"test " + i % 100 + "\"}"); assertEquals(Result.CREATED, indexResponse1.getResult()); } runner.refresh(); final Client client = runner.client(); test_get(client, index, type, "1", new byte[] { 82, 56, -67, -10, 55, -89, -85, -73, 90, -35, -93, 74, 77, -121, 60, -55 }, new byte[] { 125, 73, 13, -20, -83, 34, -120, -63, -23, -44, -52, 98, 25, 121, -56, 107 }, new byte[] { 91, -99, 105, 16, -5, -118, -14, -36 }); test_get(client, index, type, "2", new byte[] { 0, 96, 125, -3, -121, -89, -5, 39, -1, -108, 27, -55, 42, -45, 29, 64 }, new byte[] { -15, 40, 77, 111, -91, 21, 10, 3, -31, -41, -84, -79, 57, -35, -117, 123 }, new byte[] { -117, 93, 96, 36, 123, 24, -1, 60 }); test_get(client, index, type, "101", new byte[] { 82, 56, -67, -10, 55, -89, -85, -73, 90, -35, -93, 74, 77, -121, 60, -55 }, new byte[] { 125, 73, 13, -20, -83, 34, -120, -63, -23, -44, -52, 98, 25, 121, -56, 107 }, new byte[] { 91, -99, 105, 16, -5, -118, -14, -36 }); } private void test_get(final Client client, final String index, final String type, final String id, final byte[] hash1, final byte[] hash2, final byte[] hash3) { final GetResponse response = client.prepareGet(index, type, id) .setStoredFields(new String[] { "_source", "minhash_value1", "minhash_value2", "minhash_value3" }).execute() .actionGet(); assertTrue(response.isExists()); final Map<String, Object> source = response.getSourceAsMap(); assertEquals("test " + Integer.parseInt(id) % 100, source.get("msg")); final GetField field1 = response.getField("minhash_value1"); final BytesArray value1 = (BytesArray) field1.getValue(); assertEquals(hash1.length, value1.length()); Assert.assertArrayEquals(hash1, value1.array()); final GetField field2 = response.getField("minhash_value2"); final BytesArray value2 = (BytesArray) field2.getValue(); assertEquals(hash2.length, value2.length()); Assert.assertArrayEquals(hash2, value2.array()); final GetField field3 = response.getField("minhash_value3"); final BytesArray value3 = (BytesArray) field3.getValue(); assertEquals(hash3.length, value3.length()); Assert.assertArrayEquals(hash3, value3.array()); } }