package de.danielbasedow.prospecter.core.document; import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.node.ArrayNode; import com.fasterxml.jackson.databind.node.JsonNodeType; import com.fasterxml.jackson.databind.node.ObjectNode; import de.danielbasedow.prospecter.core.Token; import de.danielbasedow.prospecter.core.analysis.Analyzer; import de.danielbasedow.prospecter.core.analysis.TokenizerException; import de.danielbasedow.prospecter.core.geo.LatLng; import de.danielbasedow.prospecter.core.index.DateTimeIndex; import de.danielbasedow.prospecter.core.index.FieldIndex; import de.danielbasedow.prospecter.core.index.FullTextIndex; import de.danielbasedow.prospecter.core.schema.Schema; import java.io.IOException; import java.text.DateFormat; import java.text.ParseException; import java.util.*; /** * Build Document instances from JSON representations */ public class DocumentBuilder { private static final ObjectMapper mapper = new ObjectMapper(); protected final Schema schema; /** * @param schema Schema to use */ public DocumentBuilder(Schema schema) { this.schema = schema; } /** * Get Document instance from a JSON representation * * @param json raw JSON * @return Document instance * @throws MalformedDocumentException */ public Document build(String json) throws MalformedDocumentException { Document doc = new Document(); try { ObjectNode root = (ObjectNode) mapper.readTree(json); Iterator<Map.Entry<String, JsonNode>> iterator = root.fields(); while (iterator.hasNext()) { Map.Entry<String, JsonNode> entry = iterator.next(); String fieldName = entry.getKey(); doc.addField(fieldName, handleField(entry.getKey(), entry.getValue())); } } catch (IOException e) { throw new MalformedDocumentException("Error parsing the JSON string", e); } catch (TokenizerException e) { throw new MalformedDocumentException("Error tokenizing field content", e); } return doc; } /** * build method dispatches every encountered field in JSON to this method. * * @param fieldName name of the field * @param node ObjectNode representing the field * @return Field instance * @throws MalformedDocumentException * @throws TokenizerException */ private Field handleField(String fieldName, JsonNode node) throws MalformedDocumentException, TokenizerException { FieldIndex index = schema.getFieldIndex(fieldName); if (index == null) { throw new MalformedDocumentException("The document field '" + fieldName + "' doesn't exist in schema"); } switch (index.getFieldType()) { case FULL_TEXT: return handleFullTextField(fieldName, node); case INTEGER: return handleIntegerField(fieldName, node); case GEO_DISTANCE: return handleGeoDistanceField(fieldName, node); case DATE_TIME: return handleDateTimeField(fieldName, node); case STRING: return handleStringField(fieldName, node); case DOUBLE: return handleDoubleField(fieldName, node); case LONG: return handleLongField(fieldName, node); default: throw new UnsupportedOperationException(); } } private Field handleLongField(String fieldName, JsonNode node) { List<Token> tokens = new ArrayList<Token>(); if (node.getNodeType() == JsonNodeType.ARRAY) { Iterator<JsonNode> iterator = ((ArrayNode) node).elements(); while (iterator.hasNext()) { JsonNode subNode = iterator.next(); if (subNode.getNodeType() == JsonNodeType.NUMBER) { tokens.add(new Token<Long>(subNode.asLong())); } } } else if (node.getNodeType() == JsonNodeType.NUMBER) { tokens.add(new Token<Long>(node.asLong())); } return new Field(fieldName, tokens); } private Field handleDoubleField(String fieldName, JsonNode node) { List<Token> tokens = new ArrayList<Token>(); if (node.getNodeType() == JsonNodeType.ARRAY) { Iterator<JsonNode> iterator = ((ArrayNode) node).elements(); while (iterator.hasNext()) { JsonNode subNode = iterator.next(); if (subNode.getNodeType() == JsonNodeType.NUMBER) { tokens.add(new Token<Double>(subNode.asDouble())); } } } else if (node.getNodeType() == JsonNodeType.NUMBER) { tokens.add(new Token<Double>(node.asDouble())); } return new Field(fieldName, tokens); } private Field handleStringField(String fieldName, JsonNode node) { List<Token> tokens = new ArrayList<Token>(); if (node.getNodeType() == JsonNodeType.ARRAY) { Iterator<JsonNode> iterator = ((ArrayNode) node).elements(); while (iterator.hasNext()) { JsonNode subNode = iterator.next(); if (subNode.getNodeType() == JsonNodeType.STRING) { tokens.add(new Token<String>(subNode.asText())); } } } else if (node.getNodeType() == JsonNodeType.STRING) { tokens.add(new Token<String>(node.asText())); } return new Field(fieldName, tokens); } /** * GeoDistanceIndex backed fields are handled here * * @param fieldName name of the field * @param node JsonNode representing the field content. This can be an array or an object * @return Field instance */ private Field handleGeoDistanceField(String fieldName, JsonNode node) { List<Token> tokens = new ArrayList<Token>(); if (node.getNodeType() == JsonNodeType.ARRAY) { Iterator<JsonNode> iterator = ((ArrayNode) node).elements(); while (iterator.hasNext()) { JsonNode subNode = iterator.next(); if (subNode.getNodeType() == JsonNodeType.OBJECT) { LatLng latLng = new LatLng(subNode.get("lat").asDouble(), subNode.get("lng").asDouble()); Token<LatLng> token = new Token<LatLng>(latLng); tokens.add(token); } } } else if (node.getNodeType() == JsonNodeType.OBJECT) { LatLng latLng = new LatLng(node.get("lat").asDouble(), node.get("lng").asDouble()); Token<LatLng> token = new Token<LatLng>(latLng); tokens.add(token); } return new Field(fieldName, tokens); } /** * IntegerIndex backed fields are handled here * * @param fieldName name of the field * @param node JsonNode representing the field content. This can be an array or a number * @return Field instance */ private Field handleIntegerField(String fieldName, JsonNode node) { List<Token> tokens = new ArrayList<Token>(); if (node.getNodeType() == JsonNodeType.ARRAY) { Iterator<JsonNode> iterator = ((ArrayNode) node).elements(); while (iterator.hasNext()) { JsonNode subNode = iterator.next(); if (subNode.getNodeType() == JsonNodeType.NUMBER) { tokens.add(new Token<Integer>(subNode.asInt())); } } } else if (node.getNodeType() == JsonNodeType.NUMBER) { tokens.add(new Token<Integer>(node.asInt())); } return new Field(fieldName, tokens); } /** * DateTimeIndex backed fields are handled here * * @param fieldName name of the field * @param node JsonNode representing the field content. This can be an array or a formatted date * @return Field instance */ private Field handleDateTimeField(String fieldName, JsonNode node) { List<Token> tokens = new ArrayList<Token>(); DateFormat dateFormat = ((DateTimeIndex) schema.getFieldIndex(fieldName)).getDateFormat(); try { if (node.getNodeType() == JsonNodeType.ARRAY) { Iterator<JsonNode> iterator = ((ArrayNode) node).elements(); while (iterator.hasNext()) { JsonNode subNode = iterator.next(); if (subNode.getNodeType() == JsonNodeType.STRING) { String value = subNode.asText(); tokens.add(new Token<Long>(dateFormat.parse(value).getTime())); } } } else if (node.getNodeType() == JsonNodeType.STRING) { String value = node.asText(); tokens.add(new Token<Long>(dateFormat.parse(value).getTime())); } } catch (ParseException e) { e.printStackTrace(); } return new Field(fieldName, tokens); } /** * FullTextIndex backed fields are handled here * * @param fieldName name of the field * @param node JsonNode representing the field content. This has to be a text node * @return Field instance * @throws TokenizerException */ private Field handleFullTextField(String fieldName, JsonNode node) throws TokenizerException { Analyzer analyzer = ((FullTextIndex) schema.getFieldIndex(fieldName)).getAnalyzer(); List<Token> tokens = analyzer.tokenize(node.asText(), true); return new Field(fieldName, tokens); } }