/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.giraph.rexster.utils; import static org.apache.giraph.rexster.conf.GiraphRexsterConstants.GIRAPH_REXSTER_GRAPH; import static org.apache.giraph.rexster.conf.GiraphRexsterConstants.GIRAPH_REXSTER_GREMLIN_E_SCRIPT; import static org.apache.giraph.rexster.conf.GiraphRexsterConstants.GIRAPH_REXSTER_GREMLIN_V_SCRIPT; import static org.apache.giraph.rexster.conf.GiraphRexsterConstants.GIRAPH_REXSTER_HOSTNAME; import static org.apache.giraph.rexster.conf.GiraphRexsterConstants.GIRAPH_REXSTER_PASSWORD; import static org.apache.giraph.rexster.conf.GiraphRexsterConstants.GIRAPH_REXSTER_PORT; import static org.apache.giraph.rexster.conf.GiraphRexsterConstants.GIRAPH_REXSTER_USERNAME; import static org.apache.giraph.rexster.conf.GiraphRexsterConstants.GIRAPH_REXSTER_USES_SSL; import java.io.BufferedReader; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.net.HttpURLConnection; import java.net.URL; import java.nio.charset.Charset; import java.util.ArrayList; import java.util.List; import org.apache.commons.codec.binary.Base64; import org.apache.giraph.conf.ImmutableClassesGiraphConfiguration; import org.apache.giraph.rexster.io.RexsterInputSplit; import org.apache.hadoop.mapreduce.InputSplit; import org.apache.hadoop.mapreduce.JobContext; import org.apache.log4j.Logger; import org.json.JSONException; import org.json.JSONTokener; /** * Utility functions for the Rexster REST interface */ public abstract class RexsterUtils { /** start object symbol for JSON */ public static final char KEY_VALUE_SEPARATOR = ':'; /** start object symbol for JSON */ public static final char START_OBJECT = '{'; /** end object symbol for JSON */ public static final char END_OBJECT = '}'; /** start array symbol for JSON */ public static final char START_ARRAY = '['; /** end array symbol for JSON */ public static final char END_ARRAY = ']'; /** array elements separator symbol for JSON */ public static final char ARRAY_SEPARATOR = ','; /** Class logger. */ private static final Logger LOG = Logger.getLogger(RexsterUtils.class); /** * The default constructor is set to be private by default so that the * class is not instantiated. */ private RexsterUtils() { /* private constructor */ } /** * Parse all the vertices from the JSON retreived from Rexster. Inspired * by the implementation of the JSONObject class. * * @param br buffer over the HTTP response content * @return JSONTokener tokener over the HTTP JSON. Null in case the results * array is empty. */ public static JSONTokener parseJSONEnvelope(BufferedReader br) throws InterruptedException { JSONTokener tokener = null; try { char c; String key = null; tokener = new JSONTokener(br); /* check that the JSON is well-formed by starting with a '{' */ if (tokener.nextClean() != START_OBJECT) { LOG.error( String.format("A JSONObject text must begin with '%c'", START_OBJECT)); } /* loop on the whole array */ for (;;) { c = tokener.nextClean(); switch (c) { case 0: LOG.error(String.format("A JSONObject text must end with '%c'", END_OBJECT)); break; case END_OBJECT: return tokener; default: tokener.back(); key = tokener.nextValue().toString(); } c = tokener.nextClean(); if (c != KEY_VALUE_SEPARATOR) { LOG.error(String.format("Expected a %c after a key", c)); } if (key != null && !key.equals("results")) { tokener.nextValue(); } else { /* starting array */ c = tokener.nextClean(); if (c != START_ARRAY) { LOG.error("'results' is expected to be an array"); } /* check if the array is emty. If so, return null to signal that no objects are available in the array, otherwise return the tokener. */ c = tokener.nextClean(); if (c == END_ARRAY) { return null; } else { tokener.back(); return tokener; } } switch (tokener.nextClean()) { case ';': case ',': if (tokener.nextClean() == '}') { return tokener; } tokener.back(); break; case '}': return tokener; default: LOG.error("Expected a ',' or '}'"); } } } catch (JSONException e) { LOG.error("Unable to parse the JSON with the vertices.\n" + e.getMessage()); throw new InterruptedException(e.toString()); } } /** * Splitter used by both Vertex and Edge Input Format. * * @param context The job context * @param estimation Number of estimated objects * @return splits to be generated to read the input */ public static List<InputSplit> getSplits(JobContext context, long estimation) throws IOException, InterruptedException { final int chunks = context.getConfiguration().getInt("mapred.map.tasks", 1); final long chunkSize = estimation / chunks; final List<InputSplit> splits = new ArrayList<InputSplit>(); if (LOG.isDebugEnabled()) { LOG.debug(String.format("Estimated objects: %d", estimation)); LOG.debug(String.format("Number of chunks: %d", chunks)); } for (int i = 0; i < chunks; ++i) { final RexsterInputSplit split; final long start; final long end; start = i * chunkSize; end = ((i + 1) == chunks) ? Long.MAX_VALUE : (i * chunkSize) + chunkSize; split = new RexsterInputSplit(start, end); splits.add(split); if (LOG.isDebugEnabled()) { LOG.debug(String.format("Chunk: start %d; end %d;", start, end)); LOG.debug(String.format("Chunk: size %d;", chunkSize)); LOG.debug(split); } } return splits; } /** * Opens an HTTP connection to the specified Rexster server. * * @param conf giraph configuration * @param start start index of the Rexster page split * @param end end index of the Rexster page split * @param urlSuffix stream type (vertices or edges) needed for the * REST Url * @param gremlinScript gremlin script. If set to null, will be ignored. * @return BufferedReader the object used to retrieve the HTTP response * content */ // CHECKSTYLE: stop IllegalCatch protected static BufferedReader openRexsterStream( ImmutableClassesGiraphConfiguration conf, long start, long end, String urlSuffix, String gremlinScript) throws InterruptedException { final String uriScriptFormat = "/graphs/%s/tp/gremlin?script=%s" + "&rexster.offset.start=%s&rexster.offset.end=%s"; final String uriFormat = "/graphs/%s/%s/" + "?rexster.offset.start=%s&rexster.offset.end=%s"; final String endpoint = GIRAPH_REXSTER_HOSTNAME.get(conf); if (endpoint == null) { throw new InterruptedException(GIRAPH_REXSTER_HOSTNAME.getKey() + " is a mandatory "); } final boolean isSsl = GIRAPH_REXSTER_USES_SSL.get(conf); final int port = GIRAPH_REXSTER_PORT.get(conf); final String graph = GIRAPH_REXSTER_GRAPH.get(conf); try { URL url; /*final String url;*/ final String auth; final String username; final String password; final HttpURLConnection connection; final InputStream is; final InputStreamReader isr; if (gremlinScript != null && !gremlinScript.isEmpty()) { url = new URL(isSsl ? "https" : "http", endpoint, port, String.format(uriScriptFormat, graph, gremlinScript, start, end)); } else { url = new URL(isSsl ? "https" : "http", endpoint, port, String.format(uriFormat, graph, urlSuffix, start, end)); } LOG.info(url); username = GIRAPH_REXSTER_USERNAME.get(conf); password = GIRAPH_REXSTER_PASSWORD.get(conf); byte[] authBytes = (username + ":" + password).getBytes( Charset.defaultCharset()); auth = "Basic " + Base64.encodeBase64URLSafeString(authBytes); connection = createConnection(url, auth); connection.setDoOutput(true); is = connection.getInputStream(); isr = new InputStreamReader(is, Charset.defaultCharset()); return new BufferedReader(isr); } catch (Exception e) { throw new RuntimeException(e.getMessage(), e); } } // CHECKSTYLE: resume IllegalCatch /** * Creates a new HTTP connection to the specified server. * * @param url URI to connec to * @param authValue authetication value if available * @return a new HTTP connection */ private static HttpURLConnection createConnection(final URL url, final String authValue) throws Exception { final HttpURLConnection connection = (HttpURLConnection) url.openConnection(); connection.setConnectTimeout(0); connection.setReadTimeout(0); connection.setRequestMethod("GET"); connection.setRequestProperty("Authorization", authValue); connection.setDoOutput(true); return connection; } /** * Specific Rexster utility functions for vertices */ public static class Vertex { /** * Empty private constructor. This class should not be instantiated. */ private Vertex() { /* private constructor */ } /** * Opens an HTTP connection to the specified Rexster server for vertices. * * @param conf giraph configuration * @param start start index of the Rexster page split * @param end end index of the Rexster page split * @return BufferedReader the object used to retrieve the HTTP response */ public static BufferedReader openRexsterStream( ImmutableClassesGiraphConfiguration conf, long start, long end) throws InterruptedException { String gremlinScript = null; gremlinScript = GIRAPH_REXSTER_GREMLIN_V_SCRIPT.get(conf); return RexsterUtils.openRexsterStream(conf, start, end, "vertices", gremlinScript); } } /** * Specific Rexster utility functions for edges */ public static class Edge { /** * Empty private constructor. This class should not be instantiated. */ private Edge() { /* private constructor */ } /** * Opens an HTTP connection to the specified Rexster server for edges. * * @param conf giraph configuration * @param start start index of the Rexster page split * @param end end index of the Rexster page split * @return BufferedReader the object used to retrieve the HTTP response */ public static BufferedReader openRexsterStream( ImmutableClassesGiraphConfiguration conf, long start, long end) throws InterruptedException { String gremlinScript = null; gremlinScript = GIRAPH_REXSTER_GREMLIN_E_SCRIPT.get(conf); return RexsterUtils.openRexsterStream(conf, start, end, "edges", gremlinScript); } } }