/*
* Copyright 2015 Themistoklis Mavridis <themis.mavridis@issel.ee.auth.gr>.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.thesmartweb.swebrank;
import java.io.UnsupportedEncodingException;
import java.net.MalformedURLException;
import java.net.URL;
import java.net.URLEncoder;
import java.util.List;
import java.util.logging.Level;
import java.util.logging.Logger;
/**
* Class to deal with the semantic entities and concepts (categories) of Dandelion named Entity Extraction API
* <a href="https://dandelion.eu/products/datatxt/nex/demo/?text=The+Mona+Lisa+is+a+16th+century+oil+painting+created+by+Leonardo.+It%27s+held+at+the+Louvre+in+Paris.&lang=auto&min_confidence=0.6&exec=true#results">check more</a>
* @author Themistoklis Mavridis
*/
public class DandelionEntities {
public static int ent_query_cnt=0;// the number of entities that contained a term of the query
public static int cat_query_cnt=0;// the number of categories that contained a term of the query
public static int ent_query_cnt_whole=0;//the number of entities that contained the query as a whole
public static int cat_query_cnt_whole=0;//the number of categories that contained the query as a whole
private List<String> entities;//the list to contain all the semantic entities
private List<String> categories;//the list to contain all the semantic categories
private double ent_avg_dand_score=0.0;
/**
* Method that recognizes the entities through Dandelion named Entity Extraction API of the content of a given URL
* @param urlcheck the url to be annotated
* @param quer the query term that which the url was a result of
* @param StemFlag a flag to determine if we want to use stemming
* @param config_path the path to find the keys for dandelion
* @param confi the min confidence for dandelion api
*/
public void connect(String urlcheck, String quer, boolean StemFlag, String config_path, double confi) {
try {
ent_query_cnt=0;
cat_query_cnt=0;
String line="";
String baseUrl = "https://api.dandelion.eu/datatxt/nex/v1?url=";
String fullUrlStr = baseUrl + URLEncoder.encode(urlcheck, "UTF-8")+"&min_confidence="+Double.toString(confi)+"&include=types%2Ccategories%2Clod";
String[] apiCreds = GetKeys(config_path);
fullUrlStr =fullUrlStr +"&$app_id="+apiCreds[0]+"&$app_key="+apiCreds[1];
URL link_ur = new URL(fullUrlStr);
//we connect and then check the connection
APIconn apicon = new APIconn();
line = apicon.sslconnect(link_ur);
if(!line.equalsIgnoreCase("fail")){
JSONparsing jsonParser= new JSONparsing();
//get the links in an array
jsonParser.DandelionParsing(line, quer, StemFlag);
ent_query_cnt=jsonParser.GetEntQuerCntDand();
cat_query_cnt=jsonParser.GetCatQuerCntDand();
ent_query_cnt_whole=jsonParser.GetEntQuerCntDandWhole();
cat_query_cnt_whole=jsonParser.GetCatQuerCntDandWhole();
entities = jsonParser.GetEntitiesDand();
categories = jsonParser.GetCategoriesDand();
ent_avg_dand_score = jsonParser.GetEntitiesScoreDand();
}
} catch (MalformedURLException | UnsupportedEncodingException ex) {
Logger.getLogger(DandelionEntities.class.getName()).log(Level.SEVERE, null, ex);
}
}
/**
* Method to get the entities counter (partial query match)
* @return entities counter
*/
public int getEnt(){return ent_query_cnt;}
/**
* Method to get the categories counter (partial query match)
* @return categories counter that have a partial query match
*/
public int getCat(){return cat_query_cnt;}
/**
* Method to get the entities counter (whole query match)
* @return entities counter that have whole query match
*/
public int getEntWhole(){return ent_query_cnt_whole;}
/**
* Method to get the categories counter (whole query match)
* @return categories counter that have whole query match
*/
public int getCatWhole(){return cat_query_cnt_whole;}
/**
* Method to get the entities List
* @return entities List
*/
public List<String> GetEntitiesDand(){return entities;}
/**
* Method to get the entities average score
* @return entities score of entities recognized
*/
public double GetEntitiesScoreDand(){return ent_avg_dand_score;}
/**
* Method to get the categories List
* @return categories List
*/
public List<String> GetCategoriesDand(){return categories;}
/**
* Method to get the keys of Dandelion API
* @param config_path the directory to get the keys from
* @return all the keys of Dandelion API
*/
public String[] GetKeys(String config_path){
ReadInput ri = new ReadInput();
List<String> dandKeysList = ri.GetKeyFile(config_path, "dandelionkeys");
String[] keys=new String[dandKeysList.size()];
keys = dandKeysList.toArray(keys);
return keys;
}
}