package org.xbib.elasticsearch.index.mapper.langdetect;
import org.elasticsearch.common.bytes.BytesReference;
import org.elasticsearch.common.compress.CompressedXContent;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.index.mapper.DocumentMapper;
import org.elasticsearch.index.mapper.ParseContext;
import org.junit.Assert;
import org.junit.Test;
import org.xbib.elasticsearch.MapperTestUtils;
import java.io.IOException;
import java.io.InputStreamReader;
import static org.elasticsearch.common.io.Streams.copyToString;
import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder;
public class LangdetectMappingTest extends Assert {
@Test
public void testSimpleMappings() throws Exception {
String mapping = copyToStringFromClasspath("simple-mapping.json");
DocumentMapper docMapper = MapperTestUtils.newDocumentMapperParser("someIndex").parse("someType", new CompressedXContent(mapping));
String sampleText = copyToStringFromClasspath("english.txt");
BytesReference json = jsonBuilder().startObject().field("someField", sampleText).endObject().bytes();
ParseContext.Document doc = docMapper.parse("someIndex", "someType", "1", json).rootDoc();
assertEquals(1, doc.getFields("someField").length);
assertEquals("en", doc.getFields("someField")[0].stringValue());
// re-parse it
String builtMapping = docMapper.mappingSource().string();
docMapper = MapperTestUtils.newDocumentMapperParser("someIndex").parse("someType", new CompressedXContent(builtMapping));
json = jsonBuilder().startObject().field("someField", sampleText).endObject().bytes();
doc = docMapper.parse("someIndex", "someType", "1", json).rootDoc();
assertEquals(1, doc.getFields("someField").length);
assertEquals("en", doc.getFields("someField")[0].stringValue());
}
@Test
public void testBinary() throws Exception {
String mapping = copyToStringFromClasspath("base64-mapping.json");
DocumentMapper docMapper = MapperTestUtils.newDocumentMapperParser("someIndex").parse("someType", new CompressedXContent(mapping));
String sampleBinary = copyToStringFromClasspath("base64.txt");
String sampleText = copyToStringFromClasspath("base64-decoded.txt");
BytesReference json = jsonBuilder().startObject().field("someField", sampleBinary).endObject().bytes();
ParseContext.Document doc = docMapper.parse("someIndex", "someType", "1", json).rootDoc();
assertEquals(2, doc.getFields("someField").length);
assertEquals("en", doc.getFields("someField")[0].stringValue());
// re-parse it
String builtMapping = docMapper.mappingSource().string();
docMapper = MapperTestUtils.newDocumentMapperParser("someIndex").parse("someType", new CompressedXContent(builtMapping));
json = jsonBuilder().startObject().field("someField", sampleText).endObject().bytes();
doc = docMapper.parse("someIndex", "someType", "1", json).rootDoc();
assertEquals(1, doc.getFields("someField").length, 1);
assertEquals("en", doc.getFields("someField")[0].stringValue(), "en");
}
@Test
public void testCustomMappings() throws Exception {
String home = System.getProperty("path.home") != null ? System.getProperty("path.home") : System.getProperty("user.dir");
Settings settings = Settings.builder()
.put("path.home", home)
.loadFromStream("settings.json", getClass().getResourceAsStream("settings.json")).build();
String mapping = copyToStringFromClasspath("mapping.json");
DocumentMapper docMapper = MapperTestUtils.newDocumentMapperParser(settings, "someIndex").parse("someType", new CompressedXContent(mapping));
String sampleText = copyToStringFromClasspath("german.txt");
BytesReference json = jsonBuilder().startObject().field("someField", sampleText).endObject().bytes();
ParseContext.Document doc = docMapper.parse("someIndex", "someType", "1", json).rootDoc();
assertEquals(1, doc.getFields("someField").length);
assertEquals("Deutsch", doc.getFields("someField")[0].stringValue());
}
@Test
public void testBinary2() throws Exception {
String mapping = copyToStringFromClasspath("base64-2-mapping.json");
DocumentMapper docMapper = MapperTestUtils.newDocumentMapperParser("someIndex").parse("someType", new CompressedXContent(mapping));
//String sampleBinary = copyToStringFromClasspath("base64-2.txt");
String sampleText = copyToStringFromClasspath("base64-2-decoded.txt");
BytesReference json = jsonBuilder().startObject().field("content", sampleText).endObject().bytes();
ParseContext.Document doc = docMapper.parse("someIndex", "someType", "1", json).rootDoc();
//for (IndexableField field : doc.getFields()) {
// logger.info("binary2 {} = {} stored={}", field.name(), field.stringValue(), field.fieldType().stored());
//}
assertEquals(1, doc.getFields("content.language").length);
assertEquals("en", doc.getFields("content.language")[0].stringValue());
// re-parse it
String builtMapping = docMapper.mappingSource().string();
docMapper = MapperTestUtils.newDocumentMapperParser("someIndex").parse("someType", new CompressedXContent(builtMapping));
json = jsonBuilder().startObject().field("content", sampleText).endObject().bytes();
doc = docMapper.parse("someIndex", "someType", "1", json).rootDoc();
assertEquals(1, doc.getFields("content.language").length);
assertEquals("en", doc.getFields("content.language")[0].stringValue());
}
@Test
public void testShortTextProfile() throws Exception {
String mapping = copyToStringFromClasspath("short-text-mapping.json");
DocumentMapper docMapper = MapperTestUtils.newDocumentMapperParser("someIndex").parse("someType", new CompressedXContent(mapping));
String sampleText = copyToStringFromClasspath("english.txt");
BytesReference json = jsonBuilder().startObject().field("someField", sampleText).endObject().bytes();
ParseContext.Document doc = docMapper.parse("someIndex", "someType", "1", json).rootDoc();
assertEquals(1, doc.getFields("someField").length);
assertEquals("en", doc.getFields("someField")[0].stringValue());
// re-parse it
String builtMapping = docMapper.mappingSource().string();
docMapper = MapperTestUtils.newDocumentMapperParser("someIndex").parse("someType", new CompressedXContent(builtMapping));
json = jsonBuilder().startObject().field("someField", sampleText).endObject().bytes();
doc = docMapper.parse("someIndex", "someType", "1", json).rootDoc();
assertEquals(1, doc.getFields("someField").length);
assertEquals("en", doc.getFields("someField")[0].stringValue());
}
@Test
public void testToFields() throws Exception {
String mapping = copyToStringFromClasspath("mapping-to-fields.json");
DocumentMapper docMapper = MapperTestUtils.newDocumentMapperParser("someIndex").parse("someType", new CompressedXContent(mapping));
String sampleText = copyToStringFromClasspath("english.txt");
BytesReference json = jsonBuilder().startObject().field("someField", sampleText).endObject().bytes();
ParseContext.Document doc = docMapper.parse("someIndex", "someType", "1", json).rootDoc();
assertEquals(1, doc.getFields("someField").length);
assertEquals("en", doc.getFields("someField")[0].stringValue());
// re-parse it
String builtMapping = docMapper.mappingSource().string();
docMapper = MapperTestUtils.newDocumentMapperParser("someIndex").parse("someType", new CompressedXContent(builtMapping));
json = jsonBuilder().startObject().field("someField", sampleText).endObject().bytes();
doc = docMapper.parse("someIndex", "someType", "1", json).rootDoc();
assertEquals(1, doc.getFields("someField").length);
assertEquals("en", doc.getFields("someField")[0].stringValue());
assertEquals(1, doc.getFields("english_field").length);
assertEquals("This is a very small example of a text", doc.getFields("english_field")[0].stringValue());
}
private String copyToStringFromClasspath(String path) throws IOException {
return copyToString(new InputStreamReader(getClass().getResource(path).openStream(), "UTF-8"));
}
}