package io.monokkel.core; import com.google.common.collect.Lists; import com.google.common.collect.Sets; import io.monokkel.core.api.ResponseParser; import io.monokkel.core.utils.ParserUtils; import io.monokkel.domain.PageData; import io.monokkel.exceptions.ParseException; import org.apache.commons.lang.StringUtils; import org.json.simple.JSONObject; import org.json.simple.parser.ContainerFactory; import org.json.simple.parser.JSONParser; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.util.LinkedHashMap; import java.util.LinkedList; import java.util.List; import java.util.Map; import static io.monokkel.core.utils.ParserUtils.doesTheListHaveHeader; import static java.lang.String.format; /** * Created by tarjei on 12/10/14. */ public abstract class JsonSupport implements ResponseParser { private Logger log = LoggerFactory.getLogger(getClass()); private final List<String> fieldToFiendNextUrl; protected JsonSupport(final List<String> fieldToFiendNextUrl) { this.fieldToFiendNextUrl = fieldToFiendNextUrl; } @Override @SuppressWarnings("unchecked") public PageData parse(String url, String response, Long timeStamp) throws ParseException { JSONParser jsonParser = new JSONParser(); ContainerFactory containerFactory = createContainerFactory(); try { final Map<String,Object> parse = (Map<String,Object>) jsonParser.parse(response, containerFactory); List<String> urls = retrieveUrls(parse,url); final Map<String, Object> transformed = transformContent(parse,url,timeStamp); return new PageData(Sets.newHashSet(urls), url, timeStamp, response, "", "",transformed); } catch (org.json.simple.parser.ParseException e) { final String message = format("Failed to parse %s", url); log.warn(message, e); throw new ParseException(message, e); } } /** * Modify or transform the input map * * @return a modified or transformed map * @param parse a json map * @param url * @param timeStamp */ protected abstract Map<String, Object> transformContent(final Map<String, Object> parse, final String url, final Long timeStamp); private List<String> retrieveUrls(Map parse, String url) { String nextUrl = (String)extractContent(parse,0,url,this.fieldToFiendNextUrl, true); if(StringUtils.isNotEmpty(nextUrl)) { return Lists.newArrayList(nextUrl); } else { return Lists.newArrayList(); } } /** * Recursive method that takes a generic field input transforms it and returns a the lowest matching field in the * json tree * * @param fieldObject a field object that is either an String, Map or LinkedList * @param fieldIndex the field index to retrieve next * @param url of the origin of the document * @param doNotWrapStringJSONObject Set this to false when you do not want a JSON string retrieved to be extracted * with a object wrapping * @return the field as a JSON String */ @SuppressWarnings("unchecked") protected Object extractContent(final Object fieldObject, final Integer fieldIndex, final String url, final List<String> fieldPathsToRetrieve, boolean doNotWrapStringJSONObject) { if (fieldIndex == fieldPathsToRetrieve.size() && fieldObject instanceof Map) { return JSONObject.toJSONString((Map) fieldObject); } final Boolean isABaseType = ParserUtils.isAValidJsonBaseType(fieldObject); if(fieldIndex == fieldPathsToRetrieve.size() && isABaseType && doNotWrapStringJSONObject){ return fieldObject; } // Wrap it in a JSON object final boolean isLastFieldAndBaseTypeOrList = fieldIndex == fieldPathsToRetrieve.size() && (fieldObject instanceof LinkedList || isABaseType); if (isLastFieldAndBaseTypeOrList) { String currentField = fieldPathsToRetrieve.get(fieldIndex - 1); JSONObject jsonObject = new JSONObject(); jsonObject.put(currentField, fieldObject); return jsonObject.toJSONString(); } if (fieldObject instanceof Map) { final String field = fieldPathsToRetrieve.get(fieldIndex); final Map parsedObject = (Map) fieldObject; final Object next = parsedObject.get(field); return extractContent(next, fieldIndex + 1, url,fieldPathsToRetrieve, doNotWrapStringJSONObject); } if (fieldObject instanceof LinkedList) { final String indexField = fieldPathsToRetrieve.get(fieldIndex); final Integer listIndex = Integer.parseInt(indexField); final LinkedList jsonList = (LinkedList) fieldObject; if (listIndex >= jsonList.size()) { log.warn("The list index {} does not exists. Check the path sent to the parser. Url: {}", indexField, url); // There are no json return ""; } final Object jsonObject = jsonList.get(listIndex); return extractContent(jsonObject, fieldIndex + 1, url,fieldPathsToRetrieve, doNotWrapStringJSONObject); } log.error("No response found when getting field {} from url {}. This is a sign of a malformed parsing or misconfiguration", fieldPathsToRetrieve, url); return ""; } @Override public Boolean shouldParse(final String url, final String response, final List<String> typesFromTheResponseHeader) { final String header = "application/json"; return doesTheListHaveHeader(typesFromTheResponseHeader, header); } private ContainerFactory createContainerFactory() { return new ContainerFactory() { @Override public Map createObjectContainer() { return new LinkedHashMap(); } @Override public List creatArrayContainer() { return new LinkedList(); } }; } }