/*
* Copyright 2015 Themistoklis Mavridis <themis.mavridis@issel.ee.auth.gr>.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.thesmartweb.swebrank;
/**
*
* @author Themis Mavridis
*/
import java.io.IOException;
import static java.lang.String.valueOf;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.logging.Level;
import java.util.logging.Logger;
import org.json.simple.parser.*;
import org.json.simple.JSONArray;
import org.json.simple.JSONObject;
import java.util.*;
import java.util.Iterator;
/**
* Class for parsing JSON responses
* @author themis
*/
public class JSONparsing {
/**
* The total links
*/
public static String[] links;
/**
* The links by yahoo or bing
*/
public static String[] links_yahoo_bing;
/**
* The entries by yahoo or bing
*/
public static Map.Entry[] entries_yahoo_bing;
/**
* The amount of semantic triples
*/
public static int triple_cnt;
/**
* The amount of entities by Yahoo Content Analysis service that contained a word of a query
*/
public static int ent_query_cnt=0;
/**
*The amount of categories by Yahoo Content Analysis service that contained a word of a query
*/
public static int cat_query_cnt=0;
/**
* The amount of entities by Dandelion named Entity Extraction API that contained a word of a query
*/
public static int ent_query_cnt_dand=0;
/**
* The amount of categories by Dandelion named Entity Extraction API that contained a word of a query
*/
public static int cat_query_cnt_dand=0;
/**
* The amount of entities by Yahoo Content Analysis service that contained the whole query
*/
public static int ent_query_cnt_whole=0;
/**
* The amount of categories by Yahoo Content Analysis service that contained the whole query
*/
public static int cat_query_cnt_whole=0;
/**
* The amount of entities by Dandelion named Entity Extraction API that contained the whole query
*/
public static int ent_query_cnt_dand_whole=0;
/**
* The amount of categories by Dandelion named Entity Extraction API that contained the whole query
*/
public static int cat_query_cnt_dand_whole=0;
JSONparsing(){links=new String[10];}//used for Google
JSONparsing(int results_number){links_yahoo_bing=new String[results_number];}//used for Yahoo
/**
* Method to get the links from Google Search API (google gets every time only 10 results)
* @param input the JSON response
* @return an array of the urls of the results
*/
public String[] GoogleJsonParsing(String input) {
try {
//Create a parser
JSONParser parser = new JSONParser();
//Create a map
JSONObject json = (JSONObject) parser.parse(input);
//Get a set of the entries
Set set = json.entrySet();
//Create an iterator
Iterator iterator = set.iterator();
//Find the entry that contain the part of JSON that contains the link
int i=0;
while(iterator.hasNext()){
Map.Entry entry = (Map.Entry) iterator.next();
if(entry.getKey().toString().equalsIgnoreCase("items")){
JSONArray jsonarray = (JSONArray) entry.getValue();
//find the key=link entry which contains the link
Iterator iterator_jsonarray= jsonarray.iterator();
while(iterator_jsonarray.hasNext()){
JSONObject next = (JSONObject) iterator_jsonarray.next();
links[i] = next.get("link").toString();
i++;
}
}
}
return links;
} catch (ParseException ex) {
Logger.getLogger(JSONparsing.class.getName()).log(Level.SEVERE, null, ex);
return links;
}
}
/**
* Method to get the links from Yahoo Search API
* @param input the JSON response
* @param yahoo_result_number the number of results to get
* @return an array of the urls of the results
*/
public String[] YahooJsonParsing(String input,int yahoo_result_number){
try {
//Create a parser
JSONParser parser = new JSONParser();
//Create the map
Map json = (Map) parser.parse(input);
// Get a set of the entries
Set set = json.entrySet();
Object[] arr = set.toArray();
Map.Entry entry = (Map.Entry) arr[0];
//****get to second level of yahoo jsonmap
String you = entry.getValue().toString();
json = (Map) parser.parse(you);
set = json.entrySet();
arr = set.toArray();
entry = (Map.Entry) arr[1];
//***get to third level of yahoo jsonmap
you = entry.getValue().toString();
json = (Map) parser.parse(you);
set = json.entrySet();
arr = set.toArray();
entry = (Map.Entry) arr[0];
you = entry.getValue().toString();
JSONArray json_arr = (JSONArray) parser.parse(you);
for (int j = 0; j < yahoo_result_number; j++) {
Map json_new = (Map) json_arr.get(j);
Set set_new = json_new.entrySet();
Object[] arr_new = set_new.toArray();
for (int k = 0; k < arr_new.length; k++) {
entries_yahoo_bing[k] = (Map.Entry) arr_new[k];
}
//find the entry that has label "link" in ordet to get the link
for (int y = 0; y < arr_new.length; y++) {
if (entries_yahoo_bing[y].getKey().toString().equalsIgnoreCase("url")) {
links_yahoo_bing[j] = (String) entries_yahoo_bing[y].getValue().toString();
}
}
}
return links_yahoo_bing;
} catch (ParseException ex) {
Logger.getLogger(JSONparsing.class.getName()).log(Level.SEVERE, null, ex);
return links_yahoo_bing;
}
}
/**
* Method to get Bing Search API results
* @param input the JSON response
* @param bing_result_number the results number
* @return an array with the urls of the results
*/
public String[] BingAzureJsonParsing(String input,int bing_result_number) {
try {
//Create a parser
JSONParser parser = new JSONParser();
//Create the map
JSONObject jsonmap = (JSONObject) parser.parse(input);
// Get a set of the entries
Set set = jsonmap.entrySet();
Iterator iterator=set.iterator();
int i=0;
while(iterator.hasNext()){
Map.Entry entry = (Map.Entry) iterator.next();
if(entry.getKey().toString().equalsIgnoreCase("d")){
JSONObject jsonobject=(JSONObject) entry.getValue();
JSONArray jsonarray = (JSONArray) jsonobject.get("results");
Iterator jsonarrayiterator=jsonarray.listIterator();
while(jsonarrayiterator.hasNext()){
JSONObject linkobject= (JSONObject) jsonarrayiterator.next();
links_yahoo_bing[i]=linkobject.get("Url").toString();
i++;
}
}
}
return links_yahoo_bing;
}
catch (ParseException ex) {
Logger.getLogger(JSONparsing.class.getName()).log(Level.SEVERE, null, ex);
return links_yahoo_bing;
}
}
/**
* Method to get the semantic namespaces by SINDICE JSON response
* @param input the JSON response by the Sindice API
* @return a boolean array for all the namespaces
*/
public boolean[] TripleParse(String input) {
try {
boolean[] namespaces=new boolean[39];
if(input.length()>0){
//Create a parser
JSONParser parser = new JSONParser();
//Create the map
Map json = (Map) parser.parse(input);
// Get a set of the entries
Set set = json.entrySet();
Object[] arr = set.toArray();
int flagresults=0;
int flagstatus=0;
Map.Entry entry;
for(int j=0;j<arr.length;j++){
entry = (Map.Entry) arr[j];
if(entry.getKey().toString().equalsIgnoreCase("extractorResults")){
flagresults=j;
}
if(entry.getKey().toString().equalsIgnoreCase("status")){
flagstatus=j;
}
}
Map.Entry entrystatus=(Map.Entry) arr[flagstatus];
if(entrystatus.getValue().toString().equalsIgnoreCase("ok")){
entry=(Map.Entry) arr[flagresults];
String you = entry.getValue().toString();
json = (Map) parser.parse(you);
set = json.entrySet();
arr = set.toArray();
for(int j=0;j<arr.length;j++){
entry = (Map.Entry) arr[j];
if(entry.getKey().toString().equalsIgnoreCase("metadata")){
flagresults=j;
}
}
entry = (Map.Entry) arr[flagresults];
//****get to the third level of bing jsonmap
you = entry.getValue().toString();
json = (Map) parser.parse(you);
set = json.entrySet();
arr = set.toArray();
for(int j=0;j<arr.length;j++){
entry = (Map.Entry) arr[j];
if(entry.getKey().toString().equalsIgnoreCase("explicit")){
flagresults=j;
}
}
entry = (Map.Entry) arr[flagresults];
you = entry.getValue().toString();
json = (Map) parser.parse(you);
set = json.entrySet();
arr = set.toArray();
for(int j=0;j<arr.length;j++){
entry = (Map.Entry) arr[j];
if(entry.getKey().toString().equalsIgnoreCase("bindings")){
flagresults=j;
}
}
entry = (Map.Entry) arr[flagresults];
JSONArray entry_new=(JSONArray)entry.getValue();
for(int p=0;p<entry_new.size();p++){
json = (Map) entry_new.get(p);
set = json.entrySet();
arr = set.toArray();
for(int kj=0;kj<arr.length;kj++){
entry = (Map.Entry) arr[kj];
if(entry.getKey().toString().contains("p")){
JSONObject jo= (JSONObject) entry.getValue();
String next = jo.get("value").toString();
if(next.contains("http://purl.org/vocab/bio/0.1/")){
namespaces[0]=true;
}
if(next.contains("http://purl.org/dc/elements/1.1/")){
namespaces[1]=true;
}
if(next.contains("http://purl.org/coo/n")){
namespaces[2]=true;
}
if(next.contains("http://web.resource.org/cc/")){
namespaces[3]=true;
}
if(next.contains("http://diligentarguont.ontoware.org/2005/10/arguonto")){
namespaces[4]=true;
}
if(next.contains("http://usefulinc.com/ns/doap")){
namespaces[5]=true;
}
if(next.contains("http://xmlns.com/foaf/0.1/")){
namespaces[6]=true;
}
if(next.contains("http://purl.org/goodrelations/")){
namespaces[7]=true;
}
if(next.contains("http://purl.org/muto/core")){
namespaces[8]=true;
}
if(next.contains("http://webns.net/mvcb/")){
namespaces[9]=true;
}
if(next.contains("http://purl.org/ontology/mo/")){
namespaces[10]=true;
}
if(next.contains("http://purl.org/innovation/ns")){
namespaces[11]=true;
}
if(next.contains("http://openguid.net/rdf")){
namespaces[12]=true;
}
if(next.contains("http://www.slamka.cz/ontologies/diagnostika.owl")){
namespaces[13]=true;
}
if(next.contains("http://purl.org/ontology/po/")){
namespaces[14]=true;
}
if(next.contains("http://purl.org/net/provenance/ns")){
namespaces[15]=true;
}
if(next.contains("http://purl.org/rss/1.0/modules/syndication")){
namespaces[16]=true;
}
if(next.contains("http://rdfs.org/sioc/ns")){
namespaces[17]=true;
}
if(next.contains("http://madskills.com/public/xml/rss/module/trackback/")){
namespaces[18]=true;
}
if(next.contains("http://rdfs.org/ns/void")){
namespaces[19]=true;
}
if(next.contains("http://www.fzi.de/2008/wise/")){
namespaces[20]=true;
}
if(next.contains("http://xmlns.com/wot/0.1")){
namespaces[21]=true;
}
if(next.contains("http://www.w3.org/1999/02/22-rdf-syntax-ns")){
namespaces[22]=true;
}
if(next.contains("http://www.w3.org/")&next.contains("rdf-schema")){
namespaces[23]=true;
}
if(next.contains("http://www.w3.org/")&next.contains("XMLSchema#")){
namespaces[24]=true;
}
if(next.contains("http://www.w3.org")&&next.contains("owl")){
namespaces[25]=true;
}
if(next.contains("http://purl.org/dc/terms/")){
namespaces[26]=true;
}
if(next.contains("http://www.w3.org/")&&next.contains("vcard")){
namespaces[27]=true;
}
if(next.contains("http://www.geonames.org/ontology")){
namespaces[28]=true;
}
if(next.contains("http://search.yahoo.com/searchmonkey/commerce/")){
namespaces[29]=true;
}
if(next.contains("http://search.yahoo.com/searchmonkey/media/")){
namespaces[30]=true;
}
if(next.contains("http://cb.semsol.org/ns#")){
namespaces[31]=true;
}
if(next.contains("http://blogs.yandex.ru/schema/foaf/")){
namespaces[32]=true;
}
if(next.contains("http://www.w3.org/2003/01/geo/wgs84_pos#")){
namespaces[33]=true;
}
if(next.contains("http://rdfs.org/sioc/ns#")){
namespaces[34]=true;
}
if(next.contains("http://rdfs.org/sioc/types#")){
namespaces[35]=true;
}
if(next.contains("http://smw.ontoware.org/2005/smw#")){
namespaces[36]=true;
}
if(next.contains("http://purl.org/rss/1.0/")){
namespaces[37]=true;
}
if(next.contains("http://www.w3.org/2004/12/q/contentlabel#")){
namespaces[38]=true;
}
}
}
}
}
}
return namespaces;
}
catch (ParseException ex) {
Logger.getLogger(JSONparsing.class.getName()).log(Level.SEVERE, null, ex);
boolean[] namespaces=new boolean[40];
return namespaces;
}
catch (Exception x) {
Logger.getLogger(JSONparsing.class.getName()).log(Level.SEVERE, null, x);
boolean[] namespaces=new boolean[40];
return namespaces;
}
}
/**
* Method to parse the JSON response by Diffbot
* @param input the JSON response of Diffbot
* @return a String containing all the Diffbot tags
*/
public String DiffbotParsing(String input){
String output="";
try {
//Create a parser
JSONParser parser = new JSONParser();
//Create the map
Map json = (Map) parser.parse(input);
// Get a set of the entries
Set set = json.entrySet();
Object[] arr = set.toArray();
Map.Entry entry = (Map.Entry) arr[0];
//****get to second level of jsonmap to get the tags
Object value = entry.getValue();
String you = entry.getValue().toString();
DataManipulation tp=new DataManipulation();
output=tp.removeChars(you).toLowerCase();
return output;
}
catch (ParseException ex) {
Logger.getLogger(JSONparsing.class.getName()).log(Level.SEVERE, null, ex);
output="fail";
return output;
}
}
private List<String> entities;//the list to contain all the semantic entities
private List<String> categories;//the list to contain all the semantic categories
private double ent_avg_yahoo_score;//the average score of the entities recognized
private double cat_avg_yahoo_score;//the average score of the categories recognized
/**
* Method to get all the Entities and Categories (and the corresponding stats) by Yahoo Content Analysis API
* @param input the JSON response by the Yahoo Content Analysis API
* @param quer the query to count the stats for
* @param StemFlag flag for stemming
* @param score_threshold threshold for the entities score
*/
public void YahooEntityJsonParsing(String input, String quer,boolean StemFlag, double score_threshold){
try {
double threshold = score_threshold;//threshold for the scores of entities in yahoo
ent_query_cnt=0;
cat_query_cnt=0;
entities = new ArrayList<>();//it is going to contain all the entities
categories = new ArrayList<>();//it is going to contain all the categories
ent_avg_yahoo_score=0.0;
cat_avg_yahoo_score=0.0;
//Create a parser
JSONParser parser = new JSONParser();
//Create the map
Map json = (Map) parser.parse(input);
// Get a set of the entries
Set set = json.entrySet();
Object[] arr = set.toArray();
Map.Entry entry = (Map.Entry) arr[0];
//****get to second level of yahoo jsonmap
String you = entry.getValue().toString();
json = (Map) parser.parse(you);
set = json.entrySet();
arr = set.toArray();
searchforresult:
for (int kj=0;kj<arr.length;kj++){
entry = (Map.Entry) arr[kj];
if(entry.getKey().toString().contains("results")){
break searchforresult;
}
}
//***get to third level of yahoo jsonmap
//fix to search value = result
if(entry.getValue()!=null){
you = entry.getValue().toString();
json = (Map) parser.parse(you);
set = json.entrySet();
arr = set.toArray();//here we have in arr[0] the categories related to the url and in arr[1] the entities related to
//--we get the categories first
for(int jk=0;jk<arr.length;jk++){
entry = (Map.Entry) arr[jk];
if(entry.getKey().toString().contains("yctCategories")){
you = entry.getValue().toString();
json = (Map) parser.parse(you);
set = json.entrySet();
Object[] arr_cat = set.toArray();
for (int ip=0;ip<arr_cat.length;ip++){
entry = (Map.Entry) arr_cat[ip];
if(entry.getKey().toString().contains("yctCategory")){
you = entry.getValue().toString();
if(you.startsWith("[")){
JSONArray json_arr = (JSONArray) parser.parse(you);
for(int ka=0;ka<json_arr.size();ka++){
json = (Map) json_arr.get(ka);
set = json.entrySet();
arr_cat = set.toArray();
double score=0.0;
for(int kj=0;kj<arr_cat.length;kj++){
entry = (Map.Entry) arr_cat[kj];
if(entry.getKey().toString().contains("score")){
score = Double.parseDouble(entry.getValue().toString());
if(score>threshold){
cat_avg_yahoo_score=cat_avg_yahoo_score+score;
}
}
if(entry.getKey().toString().contains("content")&&score>threshold){
categories.add(entry.getValue().toString().toLowerCase());
}
}
}
}
if(you.startsWith("{")){
json = (Map) parser.parse(you);
set = json.entrySet();
arr_cat = set.toArray();
double score=0.0;
for(int ka=0;ka<arr_cat.length;ka++){
entry = (Map.Entry) arr_cat[ka];
if(entry.getKey().toString().contains("score")){
score = Double.parseDouble(entry.getValue().toString());
if(score>threshold){
cat_avg_yahoo_score=cat_avg_yahoo_score+score;
}
}
if(entry.getKey().toString().contains("content")&&score>threshold){
String categoryString=entry.getValue().toString().toLowerCase();
if(StemFlag){
String[] splitEntity = categoryString.split(" ");
categoryString="";
StemmerSnow stemmer = new StemmerSnow();
List<String> splitEntityList=stemmer.stem(Arrays.asList(splitEntity));
StringBuilder sb = new StringBuilder();
for(String s:splitEntityList){
sb.append(s.trim());
sb.append(" ");
}
categoryString = sb.toString().trim();
}
categories.add(categoryString);
}
}
}
}
}
}
//--we get the entities now
if(entry.getKey().toString().contains("entities")){
you = entry.getValue().toString();
json = (Map) parser.parse(you);
set = json.entrySet();
Object[] arr_ent = set.toArray();
for (int ip=0;ip<arr_ent.length;ip++){
entry = (Map.Entry) arr_ent[ip];
if(entry.getKey().toString().contains("entity")){
you = entry.getValue().toString();
if(you.startsWith("[")){
JSONArray json_arr = (JSONArray) parser.parse(you);
for(int ka=0;ka<json_arr.size();ka++){
json = (Map) json_arr.get(ka);
set = json.entrySet();
arr_ent = set.toArray();
double score=0.0;
for(int kj=0;kj<arr_ent.length;kj++){
entry = (Map.Entry) arr_ent[kj];
if(entry.getKey().toString().contains("score")){
score = Double.parseDouble(entry.getValue().toString());
if(score>threshold){
ent_avg_yahoo_score=ent_avg_yahoo_score+score;
}
}
if(entry.getKey().toString().contains("text")&&score>threshold){
you = entry.getValue().toString();
json = (Map) parser.parse(you);
set = json.entrySet();
arr_ent = set.toArray();
for(int kai=0;kai<arr_ent.length;kai++){
entry = (Map.Entry) arr_ent[kai];
if(entry.getKey().toString().contains("content")){
entities.add(entry.getValue().toString().toLowerCase());
}
}
}
}
}
}
if(you.startsWith("{")){
json = (Map) parser.parse(you);
set = json.entrySet();
arr_ent = set.toArray();
double score=0.0;
for(int ka=0;ka<arr_ent.length;ka++){
entry = (Map.Entry) arr_ent[ka];
if(entry.getKey().toString().contains("score")){
score = Double.parseDouble(entry.getValue().toString());
if(score>threshold){
ent_avg_yahoo_score=ent_avg_yahoo_score+score;
}
}
if(entry.getKey().toString().contains("text")&&score>threshold){
you = entry.getValue().toString();
json = (Map) parser.parse(you);
set = json.entrySet();
arr_ent = set.toArray();
for(int kai=0;kai<arr_ent.length;kai++){
entry = (Map.Entry) arr_ent[kai];
if(entry.getKey().toString().contains("content")){
String entityString =entry.getValue().toString().toLowerCase();
if(StemFlag){
String[] splitEntity = entityString.split(" ");
entityString="";
StemmerSnow stemmer = new StemmerSnow();
List<String> splitEntityList=stemmer.stem(Arrays.asList(splitEntity));
StringBuilder sb = new StringBuilder();
for(String s:splitEntityList){
sb.append(s.trim());
sb.append(" ");
}
entityString = sb.toString().trim();
}
entities.add(entityString);
}
}
}
}
}
}
}
}
}
}
ent_query_cnt=0;
ent_query_cnt_whole=0;
cat_query_cnt_whole=0;
cat_query_cnt_whole=0;
quer =quer.toLowerCase();
String[] split = quer.split("\\+");
if(StemFlag){
List<String> splitQuery = Arrays.asList(split);
StemmerSnow stemmer = new StemmerSnow();
splitQuery = stemmer.stem(splitQuery);
split = splitQuery.toArray(new String[splitQuery.size()]);
}
int ent_count=0;
for(String s:entities){
ent_count=0;
for(String splitStr:split){
if(s.contains(splitStr)){
ent_query_cnt++;
ent_count++;
}
}
if(ent_count==split.length){
ent_query_cnt_whole++;
}
}
int cat_count=0;
for(String s:categories){
cat_count=0;
for(String splitStr:split){
if(s.contains(splitStr)){
cat_query_cnt++;
cat_count++;
}
}
if(cat_count==split.length){
cat_query_cnt_whole++;
}
}
ent_avg_yahoo_score = ent_avg_yahoo_score/ (double) entities.size();
cat_avg_yahoo_score = cat_avg_yahoo_score/ (double) categories.size();
} catch (ParseException ex) {
Logger.getLogger(JSONparsing.class.getName()).log(Level.SEVERE, null, ex);
}
}
/**
* Method to get the entities counter with partial match query
* @return entities counter with partial match query
*/
public int GetEntQuerCnt(){
return ent_query_cnt;
}
/**
* Method to get the categories counter with partial match query
* @return categories counter with partial match query
*/
public int GetCatQuerCnt(){
return cat_query_cnt;
}
/**
* Method to get the entities counter containing the whole query
* @return entities counter containing the whole query
*/
public int GetEntQuerCntWhole(){
return ent_query_cnt_whole;
}
/**
* Method to get the categories counter containing the whole query
* @return categories counter containing the whole query
*/
public int GetCatQuerCntWhole(){
return cat_query_cnt_whole;
}
/**
* Method to get the entities List
* @return entities List
*/
public List<String> GetEntitiesYahoo(){return entities;}
/**
* Method to get the categories List
* @return categories List
*/
public List<String> GetCategoriesYahoo(){return categories;}
/**
* Method to get the entities average score
* @return entities score of entities recognized
*/
public double GetEntitiesScoreYahoo(){return ent_avg_yahoo_score;}
/**
* Method to get the categories average score
* @return average score of categories recognized
*/
public double GetCategoriesScoreYahoo(){return cat_avg_yahoo_score;}
/**
* Get meta info for a Youtube link
* @param ventry the id of the Youtube video
* @return a String with all the meta info about the youtube video
*/
public String GetYoutubeDetails(String ventry) {
try {
String apikey = "AIzaSyDLm-MfYHcbTHQO1S8ROX2rpvsqd5oYSRI";
String output = "";
URL link_ur = new URL("https://www.googleapis.com/youtube/v3/videos?id="+ventry+"&key=" + apikey+"&part=snippet");
APIconn apicon = new APIconn();
String line = apicon.connect(link_ur);
JSONParser parser = new JSONParser();
//Create the map
Map json = (Map) parser.parse(line);
// Get a set of the entries
Set set = json.entrySet();
Iterator iterator=set.iterator();
Map.Entry entry = null;
boolean flagfound = false;
while(iterator.hasNext()&&!flagfound){
entry= (Map.Entry) iterator.next();
if(entry.getKey().toString().equalsIgnoreCase("items")){
flagfound=true;
}
}
JSONArray jsonarray=(JSONArray) entry.getValue();
Iterator iteratorarray = jsonarray.iterator();
flagfound=false;
JSONObject get =null;
while(iteratorarray.hasNext()&&!flagfound){
JSONObject next = (JSONObject) iteratorarray.next();
if(next.containsKey("snippet")){
get = (JSONObject) next.get("snippet");
flagfound=true;
}
}
String description="";
String title="";
if(flagfound){
if(get.containsKey("description")){
description=get.get("description").toString();
}
if(get.containsKey("title")){
title=get.get("title").toString();
}
output = description + " " + title;
}
Stopwords stopwords = new Stopwords();
output = stopwords.stop(output);
return output;
} catch (IOException | ArrayIndexOutOfBoundsException | ParseException ex) {
Logger.getLogger(JSONparsing.class.getName()).log(Level.SEVERE, null, ex);
String output = null;
return output;
}
}
/**
* Method to get all the Entities and Categories (and the corresponding stats) by Dandelion named Entity Extraction API
* @param input the JSON response by the Yahoo Dandelion named Entity Extraction API
* @param quer the query to count the stats for
* @param StemFlag flag for stemming
* @return
*/
private List<String> entitiesDand = new ArrayList<>();//contain all the entities of Dandelion API
private List<String> categoriesDand = new ArrayList<>();//contain all the categories of Dandelion API
private double ent_avg_dand_score=0.0;
public void DandelionParsing(String input, String query, boolean StemFlag){
try {
ent_avg_dand_score=0.0;
//Create a parser
JSONParser parser = new JSONParser();
//Create the map
Object parse = parser.parse(input);
Map json = (Map) parser.parse(input);
Set entrySet = json.entrySet();
Iterator iterator=entrySet.iterator();
Map.Entry entry = null;
boolean flagfound = false;
//we are going to search if we have semantic annotations
while(iterator.hasNext()&&!flagfound){
entry= (Map.Entry) iterator.next();
if(entry.getKey().toString().equalsIgnoreCase("annotations")){
flagfound=true;
}
}
if(flagfound){
//if we have annotations we get the value
JSONArray jsonarray=(JSONArray) entry.getValue();
Iterator iteratorarray = jsonarray.iterator();
flagfound=false;
JSONObject get =null;
while(iteratorarray.hasNext()&&!flagfound){
JSONObject next = (JSONObject) iteratorarray.next();
if(next.containsKey("label")){
String entityString =next.get("label").toString().toLowerCase();
if(StemFlag){
String[] splitEntity = entityString.split(" ");
entityString="";
StemmerSnow stemmer = new StemmerSnow();
List<String> splitEntityList=stemmer.stem(Arrays.asList(splitEntity));
StringBuilder sb = new StringBuilder();
for(String s:splitEntityList){
sb.append(s.trim());
sb.append(" ");
}
entityString = sb.toString().trim();
}
entitiesDand.add(entityString);
}
if(next.containsKey("categories")){
jsonarray = (JSONArray) next.get("categories");
for(int i=0;i<jsonarray.size();i++){
String categoryString =jsonarray.get(i).toString().toLowerCase();
if(StemFlag){
String[] splitEntity = categoryString.split(" ");
categoryString="";
StemmerSnow stemmer = new StemmerSnow();
List<String> splitEntityList=stemmer.stem(Arrays.asList(splitEntity));
StringBuilder sb = new StringBuilder();
for(String s:splitEntityList){
sb.append(s.trim());
sb.append(" ");
}
categoryString = sb.toString().trim();
}
categoriesDand.add(categoryString);
}
}
if(next.containsKey("confidence")){
ent_avg_dand_score = ent_avg_dand_score + Double.parseDouble(next.get("confidence").toString());
}
}
ent_avg_dand_score = ent_avg_dand_score/(double)entitiesDand.size();
ent_query_cnt_dand=0;
cat_query_cnt_dand=0;
ent_query_cnt_dand_whole=0;
cat_query_cnt_dand_whole=0;
query =query.toLowerCase();
String[] split = query.split("\\+");
if(StemFlag){
List<String> splitQuery = Arrays.asList(split);
StemmerSnow stemmer = new StemmerSnow();
splitQuery = stemmer.stem(splitQuery);
split = splitQuery.toArray(new String[splitQuery.size()]);
}
int ent_count=0;
for(String s:entitiesDand){
ent_count=0;
for(String splitStr:split){
if(s.contains(splitStr)){
ent_query_cnt_dand++;
ent_count++;
}
}
if(ent_count==split.length){
ent_query_cnt_dand_whole++;
}
}
int cat_count=0;
for(String s:categoriesDand){
cat_count=0;
for(String splitStr:split){
if(s.contains(splitStr)){
cat_query_cnt_dand++;
cat_count++;
}
}
if(cat_count==split.length){
cat_query_cnt_dand_whole++;
}
}
}
}
catch (ParseException ex) {
Logger.getLogger(JSONparsing.class.getName()).log(Level.SEVERE, null, ex);
}
}
/**
* Method to return the entities counter (partial query match)
* @return the entities counter (partial query match)
*/
public int GetEntQuerCntDand(){
return ent_query_cnt_dand;
}
/**
* Method to return the categories counter (partial query match)
* @return the categories counter (partial query match)
*/
public int GetCatQuerCntDand(){
return cat_query_cnt_dand;
}
/**
* Method to return the the entities counter with whole query match
* @return the entities counter with whole query match
*/
public int GetEntQuerCntDandWhole(){
return ent_query_cnt_dand_whole;
}
/**
* Method to return the categories counter with whole query match
* @return the categories counter with whole query match
*/
public int GetCatQuerCntDandWhole(){
return cat_query_cnt_dand_whole;
}
/**
* Method to return the entities by Dandelion API
* @return the entities by Dandelion API
*/
public List<String> GetEntitiesDand(){
return entitiesDand;
}
/**
* Method to return the categories by Dandelion API
* @return the categories by Dandelion API
*/
public List<String> GetCategoriesDand(){
return categoriesDand;
}
/**
* Method to return the entities average score by Dandelion API
* @return the entities average score by Dandelion API
*/
public double GetEntitiesScoreDand(){
return ent_avg_dand_score;
}
}