package org.carrot2.elasticsearch;
import org.apache.http.HttpResponse;
import org.apache.http.HttpStatus;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.entity.ByteArrayEntity;
import org.apache.http.entity.ContentType;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClientBuilder;
import org.assertj.core.api.Assertions;
import org.carrot2.core.LanguageCode;
import org.carrot2.elasticsearch.ClusteringAction.RestClusteringAction;
import org.elasticsearch.common.xcontent.XContentType;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
/**
* REST API tests for {@link ClusteringAction}.
*/
public class ClusteringActionRestIT extends SampleIndexTestCase {
private XContentType xtype = randomFrom(XContentType.values());
private ContentType contentType = ContentType.create(xtype.mediaType());
public void testPostClusterByUrl() throws Exception {
post("post_cluster_by_url.json");
}
public void testPostMultipleFieldMapping() throws Exception {
post("post_multiple_field_mapping.json");
}
public void testPostWithHighlightedFields() throws Exception {
post("post_with_highlighted_fields.json");
}
public void testPostWithFields() throws Exception {
post("post_with_fields.json");
}
public void testPostWithSourceFields() throws Exception {
post("post_with_source_fields.json");
}
@SuppressWarnings("unchecked")
@Lingo3G
public void testPostWithClusters() throws Exception {
Map<?, ?> response = post("post_with_clusters.json");
List<Map<String, ?>> clusterList = (List<Map<String, ?>>) response.get("clusters");
int indent = 0;
dumpClusters(clusterList, indent);
}
@SuppressWarnings("unchecked")
void dumpClusters(List<Map<String, ?>> clusterList, int indent) {
for (Map<String, ?> cluster : clusterList) {
float score = ((Number) cluster.get("score")).floatValue();
String label = (String) cluster.get("label");
List<?> documents = (List<?>) cluster.get("documents");
StringBuilder stringBuilder = new StringBuilder();
for (int i = 0; i < indent; i++) {
stringBuilder.append(" ");
}
List<Map<String, ?>> subclusters = (List<Map<String, ?>>) cluster.get("clusters");
logger.debug(stringBuilder + "> " + label + " (score=" + score
+ ", documents=" + (documents == null ? 0 : documents.size())
+ ", subclusters=" + (subclusters == null ? 0 : subclusters.size()));
if (subclusters != null) {
dumpClusters(subclusters, indent + 1);
}
}
}
protected Map<?,?> post(String queryJsonResource) throws Exception {
try (CloseableHttpClient httpClient = HttpClientBuilder.create().build()) {
HttpPost post = new HttpPost(restBaseUrl + "/" + RestClusteringAction.NAME + "?pretty=true");
post.setEntity(new ByteArrayEntity(jsonResourceAs(queryJsonResource, xtype), contentType));
HttpResponse response = httpClient.execute(post);
Map<?,?> map = checkHttpResponseContainsClusters(response);
List<?> clusterList = (List<?>) map.get("clusters");
Assertions.assertThat(clusterList)
.isNotNull()
.isNotEmpty();
Assertions.assertThat(clusterList.size())
.isGreaterThan(5);
return map;
}
}
public void testGetClusteringRequest() throws Exception {
try (CloseableHttpClient httpClient = HttpClientBuilder.create().build()) {
HttpGet get = new HttpGet(restBaseUrl + "/" + RestClusteringAction.NAME
+ "?pretty=true"
// search-specific attrs
+ "&q=data+mining"
+ "&_source=url,title,content"
+ "&size=100"
// clustering-specific attrs
+ "&query_hint=data+mining"
+ "&field_mapping_url=_source.url"
+ "&field_mapping_content=_source.title,_source.content"
+ "&algorithm=stc");
HttpResponse response = httpClient.execute(get);
Map<?,?> map = checkHttpResponseContainsClusters(response);
List<?> clusterList = (List<?>) map.get("clusters");
Assertions.assertThat(clusterList)
.isNotNull()
.isNotEmpty();
Assertions.assertThat(clusterList.size())
.isGreaterThan(5);
}
}
public void testRestApiPathParams() throws Exception {
try (CloseableHttpClient httpClient = HttpClientBuilder.create().build()) {
HttpPost post = new HttpPost(restBaseUrl
+ "/" + INDEX_NAME
+ "/empty/"
+ RestClusteringAction.NAME + "?pretty=true");
post.setEntity(new ByteArrayEntity(jsonResourceAs("post_with_fields.json", xtype), contentType));
HttpResponse response = httpClient.execute(post);
Map<?,?> map = checkHttpResponseContainsClusters(response);
List<?> clusterList = (List<?>) map.get("clusters");
Assertions.assertThat(clusterList)
.isNotNull()
.isEmpty();
}
}
public void testRestApiRuntimeAttributes() throws Exception {
try (CloseableHttpClient httpClient = HttpClientBuilder.create().build()) {
HttpPost post = new HttpPost(restBaseUrl + "/" + RestClusteringAction.NAME + "?pretty=true");
post.setEntity(new ByteArrayEntity(jsonResourceAs("post_runtime_attributes.json", xtype), contentType));
HttpResponse response = httpClient.execute(post);
Map<?,?> map = checkHttpResponseContainsClusters(response);
List<?> clusterList = (List<?>) map.get("clusters");
Assertions.assertThat(clusterList)
.isNotNull();
Assertions.assertThat(clusterList.size())
.isBetween(1, /* max. cluster size cap */ 5 + /* other topics */ 1);
}
}
public void testLanguageField() throws Exception {
try (CloseableHttpClient httpClient = HttpClientBuilder.create().build()) {
HttpPost post = new HttpPost(restBaseUrl + "/" + RestClusteringAction.NAME + "?pretty=true");
post.setEntity(new ByteArrayEntity(jsonResourceAs("post_language_field.json", xtype), contentType));
HttpResponse response = httpClient.execute(post);
Map<?,?> map = checkHttpResponseContainsClusters(response);
// Check top level clusters labels.
Set<String> allLanguages = new HashSet<>();
for (LanguageCode code : LanguageCode.values()) {
allLanguages.add(code.toString());
}
List<?> clusterList = (List<?>) map.get("clusters");
for (Object o : clusterList) {
@SuppressWarnings("unchecked")
Map<String, Object> cluster = (Map<String, Object>) o;
allLanguages.remove(cluster.get("label"));
}
Assertions.assertThat(allLanguages.size())
.describedAs("Expected a lot of languages to appear in top groups.")
.isLessThan(LanguageCode.values().length / 2);
}
}
public void testNonexistentFields() throws Exception {
try (CloseableHttpClient httpClient = HttpClientBuilder.create().build()) {
HttpPost post = new HttpPost(restBaseUrl + "/" + RestClusteringAction.NAME + "?pretty=true");
post.setEntity(new ByteArrayEntity(jsonResourceAs("post_nonexistent_fields.json", xtype), contentType));
HttpResponse response = httpClient.execute(post);
Map<?,?> map = checkHttpResponseContainsClusters(response);
List<?> clusterList = (List<?>) map.get("clusters");
Assertions.assertThat(clusterList).isNotNull();
}
}
public void testNonexistentAlgorithmId() throws Exception {
try (CloseableHttpClient httpClient = HttpClientBuilder.create().build()) {
HttpPost post = new HttpPost(restBaseUrl + "/" + RestClusteringAction.NAME + "?pretty=true");
post.setEntity(new ByteArrayEntity(jsonResourceAs("post_nonexistent_algorithmId.json", xtype), contentType));
HttpResponse response = httpClient.execute(post);
expectErrorResponseWithMessage(
response,
HttpStatus.SC_BAD_REQUEST,
"No such algorithm: _nonexistent_");
}
}
public void testInvalidSearchQuery() throws Exception {
try (CloseableHttpClient httpClient = HttpClientBuilder.create().build()) {
HttpPost post = new HttpPost(restBaseUrl + "/" + RestClusteringAction.NAME + "?pretty=true");
post.setEntity(new ByteArrayEntity(jsonResourceAs("post_invalid_query.json", xtype), contentType));
HttpResponse response = httpClient.execute(post);
expectErrorResponseWithMessage(
response,
HttpStatus.SC_BAD_REQUEST,
"parsing_exception");
}
}
public void testPropagatingAlgorithmException() throws Exception {
try (CloseableHttpClient httpClient = HttpClientBuilder.create().build()) {
HttpPost post = new HttpPost(restBaseUrl + "/" + RestClusteringAction.NAME + "?pretty=true");
post.setEntity(new ByteArrayEntity(jsonResourceAs("post_invalid_attribute_value.json", xtype), contentType));
HttpResponse response = httpClient.execute(post);
expectErrorResponseWithMessage(
response,
HttpStatus.SC_INTERNAL_SERVER_ERROR,
"Search results clustering error:");
}
}
}