/** * Copyright 2014 National University of Ireland, Galway. * * This file is part of the SIREn project. Project and contact information: * * https://github.com/rdelbru/SIREn * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.sindice.siren.util; import static org.sindice.siren.analysis.JsonTokenizer.FALSE; import static org.sindice.siren.analysis.JsonTokenizer.LITERAL; import static org.sindice.siren.analysis.JsonTokenizer.NULL; import static org.sindice.siren.analysis.JsonTokenizer.NUMBER; import static org.sindice.siren.analysis.JsonTokenizer.TRUE; import java.util.ArrayList; import java.util.Arrays; import java.util.Random; import java.util.Stack; import org.apache.lucene.util.IntsRef; import org.sindice.siren.analysis.JsonTokenizer; public class JsonGenerator { public final Random rand; public int valueType; // used for generating a random json document private final StringBuilder sb = new StringBuilder(); private final Stack<Integer> states = new Stack<Integer>(); private static final int ARRAY = 0; private static final int OBJECT_ATT = 1; private static final int OBJECT_VAL = 2; public final ArrayList<String> images = new ArrayList<String>(); public final ArrayList<IntsRef> nodes = new ArrayList<IntsRef>(); public final ArrayList<Integer> incr = new ArrayList<Integer>(); public final ArrayList<String> types = new ArrayList<String>(); public final ArrayList<String> datatypes = new ArrayList<String>(); private final IntsRef curNodePath = new IntsRef(1024); public boolean shouldFail = false; private final int MAX_DEPTH = 50; private int nestedObjs = 0; public JsonGenerator(final Random rand) { this.rand = rand; } /** * Create a random Json document with random values */ public String getRandomJson(int nbNodes) { // init sb.setLength(0); sb.append("{"); states.clear(); states.add(OBJECT_ATT); images.clear(); nodes.clear(); incr.clear(); datatypes.clear(); types.clear(); curNodePath.length = 1; curNodePath.offset = 0; Arrays.fill(curNodePath.ints, -1); shouldFail = false; nestedObjs = 0; // <= so that when nbNodes == 1, the json is still valid /* * the generated json might be uncomplete, if states is not empty, and * the maximum number of nodes has been reached. */ for (final int i = 0; i <= nbNodes && !states.empty(); nbNodes++) { sb.append(this.getWhitespace()).append(this.getNextNode()).append(this.getWhitespace()); } shouldFail = shouldFail ? true : !states.empty(); return sb.toString(); } /** * Return the next element of the json document */ private String getNextNode() { final int popState; switch (states.peek()) { case ARRAY: switch (rand.nextInt(9)) { case 0: // String case final String val = "stepha" + this.getWhitespace() + "n" + this.getWhitespace() + "e"; this.addToLastNode(1); nodes.add(IntsRef.deepCopyOf(curNodePath)); images.add(val); types.add(JsonTokenizer.getTokenTypes()[LITERAL]); incr.add(1); datatypes.add(XSDDatatype.XSD_STRING); return "\"" + val + "\"" + this.getWhitespace() + ","; case 1: // DOUBLE case this.addToLastNode(1); nodes.add(IntsRef.deepCopyOf(curNodePath)); images.add("34.560e-9"); types.add(JsonTokenizer.getTokenTypes()[NUMBER]); incr.add(1); datatypes.add(XSDDatatype.XSD_DOUBLE); return "34.560e-9" + this.getWhitespace() + ","; case 2: // LONG case this.addToLastNode(1); nodes.add(IntsRef.deepCopyOf(curNodePath)); images.add("34560e-9"); types.add(JsonTokenizer.getTokenTypes()[NUMBER]); incr.add(1); datatypes.add(XSDDatatype.XSD_LONG); return "34560e-9" + this.getWhitespace() + ","; case 3: // true case this.addToLastNode(1); nodes.add(IntsRef.deepCopyOf(curNodePath)); images.add("true"); types.add(JsonTokenizer.getTokenTypes()[TRUE]); incr.add(1); datatypes.add(XSDDatatype.XSD_BOOLEAN); return "true" + this.getWhitespace() + ","; case 4: // false case this.addToLastNode(1); nodes.add(IntsRef.deepCopyOf(curNodePath)); images.add("false"); types.add(JsonTokenizer.getTokenTypes()[FALSE]); incr.add(1); datatypes.add(XSDDatatype.XSD_BOOLEAN); return "false" + this.getWhitespace() + ","; case 5: // null case this.addToLastNode(1); nodes.add(IntsRef.deepCopyOf(curNodePath)); images.add("null"); types.add(JsonTokenizer.getTokenTypes()[NULL]); incr.add(1); datatypes.add(XSDDatatype.XSD_STRING); return "null" + this.getWhitespace() + ","; case 6: // nested array case if (states.size() <= MAX_DEPTH) { this.addToLastNode(1); this.incrNodeObjectPath(); states.add(ARRAY); return "["; } return ""; case 7: // nested object case if (states.size() <= MAX_DEPTH) { this.addToLastNode(1); this.incrNodeObjectPath(); states.add(OBJECT_ATT); return "{"; } return ""; case 8: // closing array case this.decrNodeObjectPath(); popState = states.pop(); if (popState != ARRAY) { shouldFail = true; } // Remove previous comma, this is not allowed final int comma = sb.lastIndexOf(","); if (comma != -1 && sb.substring(comma + 1).matches("\\s*")) { sb.deleteCharAt(comma); } return "],"; } case OBJECT_ATT: switch (rand.nextInt(3)) { case 0: types.add(JsonTokenizer.getTokenTypes()[LITERAL]); images.add("ste ph ane"); incr.add(1); this.addToLastNode(1); nodes.add(IntsRef.deepCopyOf(curNodePath)); datatypes.add(JSONDatatype.JSON_FIELD); states.push(OBJECT_VAL); return "\"ste ph ane\"" + this.getWhitespace() + ":"; case 1: if (nestedObjs > 0) { this.decrNodeObjectPath(); nestedObjs--; } this.decrNodeObjectPath(); popState = states.pop(); if (popState != OBJECT_ATT) { shouldFail = true; } return states.empty() ? "}" : "},"; case 2: final String field; if (states.isEmpty()) { // datatype object at the root are not possible shouldFail = true; field = ""; } else if (states.peek() == OBJECT_ATT) { // field name this.addToLastNode(1); field = "\"field\":"; types.add(JsonTokenizer.getTokenTypes()[LITERAL]); images.add("field"); incr.add(1); nodes.add(IntsRef.deepCopyOf(curNodePath)); datatypes.add(JSONDatatype.JSON_FIELD); // value this.incrNodeObjectPath(); this.setLastNode(0); } else if (states.peek() == ARRAY) { this.addToLastNode(1); field = ""; } else { // should not happen throw new IllegalStateException(); } types.add(JsonTokenizer.getTokenTypes()[LITERAL]); images.add("Luke Skywalker"); incr.add(1); nodes.add(IntsRef.deepCopyOf(curNodePath)); datatypes.add("jedi"); // close datatype object this.decrNodeObjectPath(); return field + "{" + this.getWhitespace() + "\"" + JsonTokenizer.DATATYPE_LABEL + "\":" + this.getWhitespace() + "\"jedi\"," + "\"" + JsonTokenizer.DATATYPE_VALUES + "\":" + this.getWhitespace() + "\"Luke Skywalker\"" + this.getWhitespace() + "},"; } case OBJECT_VAL: switch (rand.nextInt(8)) { case 0: return this.doValString("stepha" + this.getWhitespace() + "n" + this.getWhitespace() + "e"); case 1: // DOUBLE case images.add("34.560e-9"); types.add(JsonTokenizer.getTokenTypes()[NUMBER]); incr.add(1); this.incrNodeObjectPath(); this.setLastNode(0); nodes.add(IntsRef.deepCopyOf(curNodePath)); this.decrNodeObjectPath(); datatypes.add(XSDDatatype.XSD_DOUBLE); states.pop(); // remove OBJECT_VAL state return "34.560e-9" + this.getWhitespace() + ","; case 2: // LONG case images.add("34560e-9"); types.add(JsonTokenizer.getTokenTypes()[NUMBER]); incr.add(1); this.incrNodeObjectPath(); this.setLastNode(0); nodes.add(IntsRef.deepCopyOf(curNodePath)); this.decrNodeObjectPath(); datatypes.add(XSDDatatype.XSD_LONG); states.pop(); // remove OBJECT_VAL state return "34560e-9" + this.getWhitespace() + ","; case 3: images.add("true"); types.add(JsonTokenizer.getTokenTypes()[TRUE]); incr.add(1); this.incrNodeObjectPath(); this.setLastNode(0); nodes.add(IntsRef.deepCopyOf(curNodePath)); this.decrNodeObjectPath(); datatypes.add(XSDDatatype.XSD_BOOLEAN); states.pop(); // remove OBJECT_VAL state return "true" + this.getWhitespace() + ","; case 4: images.add("false"); types.add(JsonTokenizer.getTokenTypes()[FALSE]); incr.add(1); this.incrNodeObjectPath(); this.setLastNode(0); nodes.add(IntsRef.deepCopyOf(curNodePath)); this.decrNodeObjectPath(); datatypes.add(XSDDatatype.XSD_BOOLEAN); states.pop(); // remove OBJECT_VAL state return "false" + this.getWhitespace() + ","; case 5: images.add("null"); types.add(JsonTokenizer.getTokenTypes()[NULL]); incr.add(1); this.incrNodeObjectPath(); this.setLastNode(0); nodes.add(IntsRef.deepCopyOf(curNodePath)); this.decrNodeObjectPath(); datatypes.add(XSDDatatype.XSD_STRING); states.pop(); // remove OBJECT_VAL state return "null" + this.getWhitespace() + ","; case 6: if (states.size() <= MAX_DEPTH) { states.pop(); // remove OBJECT_VAL state this.incrNodeObjectPath(); states.add(ARRAY); return "["; } return this.doValString(""); case 7: if (states.size() <= MAX_DEPTH) { states.pop(); // remove OBJECT_VAL state // Two incrementations, because the object introduce a "blank" node nestedObjs++; this.incrNodeObjectPath(); this.setLastNode(0); this.incrNodeObjectPath(); states.add(OBJECT_ATT); return "{"; } return this.doValString(""); } default: throw new IllegalStateException("Got unknown lexical state: " + states.peek()); } } /** * Return a sequence of whitespace characters */ private String getWhitespace() { final int nWS = rand.nextInt(5); String ws = ""; for (int i = 0; i < nWS; i++) { switch (rand.nextInt(6)) { case 0: ws += " "; break; case 1: ws += "\t"; break; case 2: ws += "\f"; break; case 3: ws += "\r"; break; case 4: ws += "\n"; break; case 5: ws += "\r\n"; break; default: break; } } return ws; } /** * Add an object/array to the current node path */ private void incrNodeObjectPath() { ArrayUtils.growAndCopy(curNodePath, curNodePath.length + 1); curNodePath.length++; // initialise node this.setLastNode(-1); } /** * Remove an object/array from the node path */ private void decrNodeObjectPath() { curNodePath.length--; } /** Update the path of the current values of the current object node */ private void setLastNode(final int val) { curNodePath.ints[curNodePath.length - 1] = val; } /** Update the path of the current values of the current object node */ private void addToLastNode(final int val) { curNodePath.ints[curNodePath.length - 1] += val; } /** * Add a string value to an object entry */ private String doValString(final String val) { images.add(val); types.add(JsonTokenizer.getTokenTypes()[LITERAL]); incr.add(1); this.incrNodeObjectPath(); this.setLastNode(0); nodes.add(IntsRef.deepCopyOf(curNodePath)); this.decrNodeObjectPath(); datatypes.add(XSDDatatype.XSD_STRING); states.pop(); // remove OBJECT_VAL state return "\"" + val + "\"" + this.getWhitespace() + ","; } /** * Returns a random value type */ public String getRandomValue() { valueType = rand.nextInt(5); switch (valueType) { case FALSE: return "false"; case LITERAL: return "\"stephane\""; case NULL: return "null"; case NUMBER: return "324.90E-02"; case TRUE: return "true"; default: throw new IllegalArgumentException("No value for index=" + valueType); } } }