/******************************************************************************* * Copyright 2013-2016 Thomas Schreiber * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. ******************************************************************************/ package at.alladin.rmbt.statisticServer.opendata; import java.sql.PreparedStatement; import java.sql.ResultSet; import java.sql.SQLException; import java.util.Arrays; import java.util.HashSet; import java.util.Set; import java.util.logging.Level; import java.util.logging.Logger; import org.json.JSONArray; import org.json.JSONException; import org.json.JSONObject; import org.restlet.data.Form; import org.restlet.data.Status; import org.restlet.resource.Get; import at.alladin.rmbt.shared.Classification; import at.alladin.rmbt.shared.cache.CacheHelper; import at.alladin.rmbt.statisticServer.ServerResource; public class OpenTestSearchResource extends ServerResource { private enum FieldType {STRING, DATE, LONG, DOUBLE, BOOLEAN, UUID, SORTBY, SORTORDER, IGNORE}; private static final int CACHE_EXP = 3600; private final CacheHelper cache = CacheHelper.getInstance(); public final int MAXROWS = 10000; //maximum number of rows allowed, currently approx 1.5s response time at maximum public final int DEFAULTROWS = 100; //default number of rows (when max_results is not specified) public final int MAXQUERYFIELDS = 50; //to prevent database-server overload //all fields that should be displayed in a general request (e.g. all tests for one user) private final String[] openDataFieldsSummary = {"open_uuid", "open_test_uuid", "time", "lat", "long", "download_kbit", "upload_kbit", "ping_ms", "signal_strength", "lte_rsrp", "platform", "provider_name", "model", "loc_accuracy"}; //all fields that are numbers (and are formatted as numbers in json) private final HashSet<String> openDataNumberFields = new HashSet<>(Arrays.asList(new String[]{"time", "lat", "long", "zip_code", "download_kbit", "upload_kbit","ping_ms","signal_strength", "lte_rsrp", "test_duration","num_threads","ndt_download_kbit","ndt_upload_kbit","asn","loc_accuracy"})); //all fields for which the user can sort the result private final HashSet<String> openDataFieldsSortable = new HashSet<>(Arrays.asList(new String[]{"download_kbit","upload_kbit","time","signal_strength","ping_ms"})); //additional fields that the user is allowed to request private final HashSet<String> allowedAdditionalFields = new HashSet<>(Arrays.asList(new String[] {"download_classification","upload_classification","ping_classification"})); @Get("json") public String request(final String entity) throws JSONException { addAllowOrigin(); final JSONObject response = new JSONObject(); final Form getParameters = getRequest().getResourceRef().getQueryAsForm(); final QueryParser qp = new QueryParser(); final Set<String> additionalFields; final JSONArray invalidElements = qp.parseQuery(getParameters); //calculate offset long offset = -1; if (getParameters.getNames().contains("cursor")) { //is always a valid LONG because it is checked with all other //parameters above offset = Long.parseLong(getParameters.getFirstValue("cursor")); } //get maximal results-parameter long maxrows = DEFAULTROWS; if (getParameters.getNames().contains("max_results")) { //is always a valid LONG because it is checked with all other //parameters above maxrows = Long.parseLong(getParameters.getFirstValue("max_results")); } //parse additional fields if (getParameters.getNames().contains("additional_info") || getParameters.getNames().contains("additional_info[]")) { String param = (getParameters.getNames().contains("additional_info"))?"additional_info":"additional_info[]"; for (String field : getParameters.getValuesArray(param)) { if (!allowedAdditionalFields.contains(field)) { invalidElements.put(param); } } additionalFields = new HashSet<>(Arrays.asList(getParameters.getValuesArray(param))); } else { additionalFields = null; } //if there have been errors => inform the user if (invalidElements.length()>0) { setStatus(Status.CLIENT_ERROR_BAD_REQUEST); response.put("invalid_fields", invalidElements); return response.toString(); } //if there are too many query elements (DoS-Attack?), don't let it //get to the database if (qp.getWhereParams().keySet().size() > MAXQUERYFIELDS) { setStatus(Status.CLIENT_ERROR_BAD_REQUEST); response.put("invalid_fields", "field limit exceeded"); return response.toString(); } return getSearchResult(qp, offset, maxrows, additionalFields); } /** * Gets a JSON-String containing all open-data-values of all rows * that matched the given criteria * @param whereClause the where-clause to use * @param searchValues the values for the columns which the user wants to filter * @param offset a offset-value for paging (given as "next-cursor" in the response), -1 if none is set * @param additionalFields additional fields that should be included in the response * @return */ private String getSearchResult(QueryParser qp, long offset, long maxrows, Set<String> additionalFields) { long startTime = System.currentTimeMillis(); String offsetString = (offset>0)? " AND t.uid<"+offset:""; //if no sorting is used String offsetString2 = (offset>0)? " OFFSET "+offset:""; //if sorting is used => may have concurrency issues in the results String orderClause = qp.getOrderClause(); boolean defaultOrder = true; if (orderClause == null || orderClause.isEmpty()) { orderClause = " ORDER BY t.uid DESC "; offsetString2 = ""; } else { defaultOrder = false; offsetString = ""; } if (maxrows>MAXROWS) maxrows = MAXROWS; if (maxrows <= 0) maxrows = DEFAULTROWS; //There are many LEFT JOINs in the sql statement that are usual not needed. //This has no significant impact on the performance since our DBMS (postgres) //is intelligent enough to ignore these during query optimization if they are //not needed final String sql = "SELECT" + " t.uid as cursor, " + //only for pagination " ('P' || t.open_uuid) open_uuid," + " ('O' || t.open_test_uuid) open_test_uuid," + " to_char(t.time AT TIME ZONE 'UTC', 'YYYY-MM-DD HH24:MI:SS') \"time\"," + //" nt.group_name cat_technology," + //" nt.name network_type," + //csv 6:lat " (CASE WHEN (t.geo_accuracy < ?) AND (t.geo_provider != 'manual') AND (t.geo_provider != 'geocoder') THEN" + " t.geo_lat" + " WHEN (t.geo_accuracy < ?) THEN" + " ROUND(t.geo_lat*1111)/1111" + // approx 100m " ELSE null" + " END) lat," + // csv 7:long " (CASE WHEN (t.geo_accuracy < ?) AND (t.geo_provider != 'manual') AND (t.geo_provider != 'geocoder') THEN" + " t.geo_long" + " WHEN (t.geo_accuracy < ?) THEN" + " ROUND(t.geo_long*741)/741 " + //approx 100m " ELSE null" + " END) long," + // accuracy of geo location in m " (CASE WHEN (t.geo_accuracy < ?) AND (t.geo_provider != 'manual') AND (t.geo_provider != 'geocoder') " + " THEN t.geo_accuracy " + " WHEN (t.geo_accuracy < 100) AND ((t.geo_provider = 'manual') OR (t.geo_provider = 'geocoder')) THEN 100" + // limit accuracy to 100m " WHEN (t.geo_accuracy < ?) THEN t.geo_accuracy" + " ELSE null END) loc_accuracy, " + //" t.geo_provider loc_src," + //" t.zip_code," + " t.speed_download download_kbit," + " t.speed_upload upload_kbit," + " (t.ping_median::float / 1000000) ping_ms," + " t.signal_strength," + " t.lte_rsrp," + //" ts.name server_name," + //" duration test_duration," + //" num_threads," + " CONCAT(plattform,' ',network_group_name) as platform, " + " COALESCE(adm.fullname, t.model) model," + " COALESCE(prov.shortname, mprov.shortname, msim.shortname,msim.name," + " prov.name, mprov.name, network_operator_name, t.public_ip_as_name, network_sim_operator) provider_name " + //" client_software_version client_version," + //" network_operator network_mcc_mnc," + //" network_operator_name network_name," + //" network_sim_operator sim_mcc_mnc," + //" nat_type \"connection\"," + //" public_ip_asn asn," + //" client_public_ip_anonymized ip_anonym," + //" (ndt.s2cspd*1000)::int ndt_download_kbit," + //" (ndt.c2sspd*1000)::int ndt_upload_kbit" + " FROM test t" + " LEFT JOIN network_type nt ON nt.uid=t.network_type" + " LEFT JOIN device_map adm ON adm.codename=t.model" + " LEFT JOIN test_server ts ON ts.uid=t.server_id" + " LEFT JOIN provider prov ON provider_id = prov.uid " + " LEFT JOIN provider mprov ON mobile_provider_id = mprov.uid" + " LEFT JOIN mccmnc2name msim ON mobile_sim_id = msim.uid" + //TODO: finalize migration to msim/mnwk " WHERE " + " (t.deleted = false)" + " AND status = 'FINISHED' " + qp.getWhereClause("AND") + offsetString + orderClause + " LIMIT " + maxrows + offsetString2; PreparedStatement ps = null; ResultSet rs = null; final JSONObject response = new JSONObject(); final JSONArray resultList = new JSONArray(); try { ps = conn.prepareStatement(sql); //don't show coordinates when not accurate enough double accuracy = Double.parseDouble(settings.getString("RMBT_GEO_ACCURACY_DETAIL_LIMIT")); ps.setDouble(1, accuracy); ps.setDouble(2, accuracy); ps.setDouble(3, accuracy); ps.setDouble(4, accuracy); ps.setDouble(5, accuracy); ps.setDouble(6, accuracy); //fill in values for WHERE //ps = fillInWhereClause(ps, searchValues, 1); qp.fillInWhereClause(ps, 7); //Logger.getLogger(OpenTestResource.class.getName()).log(Level.INFO, "prepstmt" + ps); if (!ps.execute()) return null; rs = ps.getResultSet(); long lastUID = 0; //remember last uid for pagination since rs can only be traversed in one direction while (rs.next()) { final JSONObject jsonItem = new JSONObject(); for (int i=0;i<openDataFieldsSummary.length;i++) { final Object obj = rs.getObject(openDataFieldsSummary[i]); if (obj==null) { jsonItem.put(openDataFieldsSummary[i], JSONObject.NULL); } else if (openDataNumberFields.contains(openDataFieldsSummary[i])) { final String tmp = obj.toString().trim(); if (tmp.isEmpty()) jsonItem.put(openDataFieldsSummary[i], JSONObject.NULL); else jsonItem.put(openDataFieldsSummary[i], JSONObject.stringToValue(tmp)); } else { jsonItem.put(openDataFieldsSummary[i], obj.toString()); } } // add additional fields if requested by user if (additionalFields != null) { if (additionalFields.contains("download_classification")) { jsonItem.put("download_classification", Classification.classify(Classification.THRESHOLD_DOWNLOAD,rs.getLong("download_kbit"), 4)); } if (additionalFields.contains("upload_classification")) { jsonItem.put("upload_classification", Classification.classify(Classification.THRESHOLD_UPLOAD,rs.getLong("upload_kbit"), 4)); } if (additionalFields.contains("ping_classification")) { jsonItem.put("ping_classification", Classification.classify(Classification.THRESHOLD_PING,rs.getLong("ping_ms") * 1000000, 4)); } } lastUID = rs.getLong("cursor"); resultList.put(jsonItem); } //if there are more results than we send, use pagination if (resultList.length() == maxrows) { //if it is the standard sort order if (defaultOrder) { response.put("next_cursor", lastUID); } else { offset = (offset<0) ? 0 : offset; response.put("next_cursor", offset+maxrows); } } else { response.put("next_cursor", JSONObject.NULL); } response.put("results", resultList); //also put in the result, how long the query took to execute long elapsedTime = System.currentTimeMillis() - startTime; response.put("duration_ms",elapsedTime); } catch (final JSONException e) { Logger.getLogger(OpenTestSearchResource.class.getName()).log(Level.SEVERE, null, e); } catch (SQLException ex) { try { setStatus(Status.CLIENT_ERROR_NOT_FOUND); response.put("error","invalid parameters"); } catch (JSONException ex1) { Logger.getLogger(OpenTestSearchResource.class.getName()).log(Level.SEVERE, null, ex1); } Logger.getLogger(OpenTestSearchResource.class.getName()).log(Level.SEVERE, null, ex); } finally { try { if (rs != null) rs.close(); if (ps != null) ps.close(); } catch (final SQLException e) { Logger.getLogger(OpenTestSearchResource.class.getName()).log(Level.SEVERE, null, e); } } return response.toString(); } }