package io.monokkel.core;
import com.google.common.collect.Lists;
import com.google.common.collect.Sets;
import io.monokkel.core.api.ResponseParser;
import io.monokkel.domain.PageData;
import io.monokkel.exceptions.ParseException;
import org.apache.commons.lang.StringUtils;
import org.json.simple.JSONObject;
import org.json.simple.parser.ContainerFactory;
import org.json.simple.parser.JSONParser;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.*;
import java.util.stream.Collectors;
import static io.monokkel.core.utils.ParserUtils.doesTheListHaveHeader;
import static java.lang.String.format;
/**
* Created by Tarjei on 25/06/14.
* <p/>
* This class is a JSON parser that receives a json string and returns extracted content
* based on a simple expression path. The returned extracted content is always wrapped as a json object.
* The path is added as a array in the constructor.
* <p/>
* A few examples (json is unescaped):
* <p/>
* ===== Example 1 =====
* <p/>
* String json = "{
* "data": {
* "field": "This is a field"
* },
* "list":[
* {
* "element": "element"
* }
* ]
* }";
* <p/>
* ArrayList<String> path = new ArrayList<String>();
* path.add("data");
* path.add("field");
* <p/>
* JsonTransformer jsonParser = new JsonTransformer(path,..);
* ParseOutput parseOutput = jsonParser.parse("http://url",json,1L);
* parseOutput.getExtractedContent(); // Returns the string {"field":"This is a field"}
* <p/>
* ===== Example 2 =====
* <p/>
* ArrayList<String> path = new ArrayList<String>();
* path.add("list");
* path.add("0"); // Index of the list
* <p/>
* JsonTransformer jsonParser = new JsonTransformer(path,..);
* ParseOutput parseOutput = jsonParser.parse("http://url",json,1L);
* parseOutput.getExtractedContent(); // Returns the string "{ element: "element" }"
* <p/>
* ===== Example 3 =====
* <p/>
* ArrayList<String> path = new ArrayList<String>();
* path.add("list");
* <p/>
* JsonTransformer jsonParser = new JsonTransformer(path,..);
* ParseOutput parseOutput = jsonParser.parse("http://url",json,1L);
* parseOutput.getExtractedContent(); // Returns the string "{"list":[{ element: "element" }]}"
* <p/>
* The second parameter in the constructor do the exact same but is intended to return urls to the crawler
*
*/
public class JsonTransformer extends JsonSupport implements ResponseParser {
private final Map<String, List<String>> fieldPathsToRetrieve;
public JsonTransformer(final Map<String, List<String>> fieldsPathsToRetrieve, final List<String> fieldToFiendNextUrl) {
super(fieldToFiendNextUrl);
if (fieldsPathsToRetrieve.size() < 1) {
throw new IllegalArgumentException("You need a json path longer than 1 hop");
}
this.fieldPathsToRetrieve = fieldsPathsToRetrieve;
}
@Override
protected Map<String, Object> transformContent(final Map<String, Object> parse, final String url, final Long timeStamp) {
return fieldPathsToRetrieve.entrySet().stream().collect(Collectors.toMap(Map.Entry::getKey, entry -> extractContent(parse, 0, url, entry.getValue(), true)));
}
}