/*
* Copyright 2015 Themistoklis Mavridis <themis.mavridis@issel.ee.auth.gr>.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.thesmartweb.swebrank;
import java.util.*;
import java.sql.PreparedStatement;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.SQLException;
import java.util.logging.Level;
import java.util.logging.Logger;
import org.elasticsearch.ElasticsearchException;
import org.elasticsearch.action.index.IndexRequest;
import org.elasticsearch.action.index.IndexResponse;
import org.elasticsearch.client.Client;
import org.elasticsearch.client.transport.TransportClient;
import org.elasticsearch.common.settings.ImmutableSettings;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.transport.InetSocketTransportAddress;
import org.elasticsearch.node.Node;
import static org.elasticsearch.node.NodeBuilder.*;
import org.json.simple.JSONArray;
import org.json.simple.JSONObject;
/**
* Class for analysis of all the queries through Search APIs and capturing of the result statistics
* @author Themistoklis Mavridis
*/
public class Search_analysis {
/**
* Method to perform the queries to the search engines, get the links and get all the webpage and semantic stats for the links
* @param iteration_counter The iteration number of the algorithm (to use it in the id for elasticsearch)
* @param directory_save The directory we are going to several files
* @param domain The domain that we are searching for (to use it in the id for elasticsearch)
* @param enginechoice The search engines that were chosen to be used
* @param quer the query we search for
* @param results_number the results number that we are going to get from every search engine
* @param top_visible the number of results if we use Visibility score
* @param SWebRankSettings the settings for LDA and SwebRank in general (check the ReadInput Class)
* @param alpha alpha value of LDA
* @param mozMetrics the metrics of choice if Moz is going to be used
* @param top_count_moz the amount of results if we use Moz
* @param moz_threshold_option flag to show if we are going to use a threshold in Moz metrics or not
* @param moz_threshold the moz threshold value
* @param ContentSemantics get the choice of Content Semantic Analysis algorithm that we are going to use
* @param SensebotConcepts the amount of concepts to be recognized if Sensebot is used
* @param config_path the configuration path to get all the api keys
* @return a list with the words recognized as important by the content semantic analysis algorithm we have chosen
*/
public List<String> perform(int iteration_counter,String directory_save, String domain, List<Boolean> enginechoice, String quer, int results_number, int top_visible,List<Double> SWebRankSettings,double alpha, List<Boolean> mozMetrics, int top_count_moz, boolean moz_threshold_option,double moz_threshold, List<Boolean> ContentSemantics, int SensebotConcepts, String config_path){
//=======connect to mysql=========
Connection conn = null;
PreparedStatement stmt = null;
try {
ReadInput ri = new ReadInput();
List<String> mysqlAdminSettings= ri.GetKeyFile(config_path, "mysqlAdmin");
String port = mysqlAdminSettings.get(2);
String dbname = mysqlAdminSettings.get(3);
String url = "jdbc:mysql://localhost:"+port+"/"+dbname+"?zeroDateTimeBehavior=convertToNull";
String user = mysqlAdminSettings.get(0);
String password = mysqlAdminSettings.get(1);
System.out.println("Connecting to database...");
conn = DriverManager.getConnection(url,user,password);
LinksParseAnalysis ld=new LinksParseAnalysis();
//we create the array that are going to store the results from each search engine
String[] links_google=new String[results_number];
String[] links_yahoo=new String[results_number];
String[] links_bing=new String[results_number];
//we create the array that is going to store all the results from all the search engines together
String[] links_total=new String[(results_number*3)];
//--------if we have selected to use a Moz metric, then we should set the links_total to be of size of top_count_seomoz*3 since it means that the results_number has been set to its max value (50)
if(mozMetrics.get(0)){
links_total=new String[(top_count_moz)*3];
}
int[] nlinks=new int[2];
if(enginechoice.get(0)){
//get bing results
BingResults br = new BingResults();
links_bing=br.Get(quer, results_number, directory_save,config_path);
}
if(enginechoice.get(1)){
//get google results
GoogleResults gr = new GoogleResults();
links_google=gr.Get(quer,results_number,directory_save,config_path);
}
if(enginechoice.get(2)){
//get yahoo results
YahooResults yr = new YahooResults();
links_yahoo=yr.Get(quer,results_number,directory_save,config_path);
}
HashMap<Integer,List<String>> EntitiesMapDBP = new HashMap<>();
HashMap<Integer,List<String>> CategoriesMapDBP = new HashMap<>();
HashMap<Integer,List<String>> EntitiesMapDand = new HashMap<>();
HashMap<Integer,List<String>> CategoriesMapDand = new HashMap<>();
HashMap<Integer,List<String>> EntitiesMapYahoo = new HashMap<>();
HashMap<Integer,List<String>> CategoriesMapYahoo = new HashMap<>();
HashMap<Integer,String> parseOutputList = new HashMap<>();
for(int i=0;i<results_number*3;i++){
parseOutputList.put(i,"");
}
//*************
boolean false_flag=true;
if(false_flag){
if(mozMetrics.get(0)){
//we check if moz works
Moz moz=new Moz();
boolean checkmoz=moz.check(config_path);
if(checkmoz){
//perform
if(links_yahoo.length>0){
links_yahoo=moz.perform(links_yahoo,top_count_moz,moz_threshold,moz_threshold_option,mozMetrics, config_path);
}
if(links_google.length>0){
links_google=moz.perform(links_google,top_count_moz,moz_threshold,moz_threshold_option,mozMetrics, config_path);
}
if(links_bing.length>0){
links_bing=moz.perform(links_bing,top_count_moz,moz_threshold,moz_threshold_option,mozMetrics, config_path);
}
}
}
//we are creating Sindice class in order to get the number of semantic triples of a webpage
Sindice striple=new Sindice();
//create htmlparser to get the number of links in a webpage
if(mozMetrics.get(0)){
results_number=links_yahoo.length;
}
WebParser htm=new WebParser();
//create an array that contains all the links together
for(int i=0;i<3;i++){
try{
if(i==0){System.arraycopy(links_yahoo, 0, links_total, 0, results_number);}
if(i==1){System.arraycopy(links_google, 0, links_total, links_yahoo.length, results_number);}
if(i==2){System.arraycopy(links_bing, 0, links_total,((links_yahoo.length)+(links_google.length)), results_number);}
} catch (ArrayIndexOutOfBoundsException ex) {
Logger.getLogger(Search_analysis.class.getName()).log(Level.SEVERE, null, ex);
ArrayList<String> finalList = new ArrayList<String>();
return finalList;
}
}
//merged true => visibility score
if(enginechoice.get(3)){
VisibilityScore vb=new VisibilityScore();//we have a merged engine
//erase using vb.perform all the duplicate links
links_total=vb.perform(links_google, links_yahoo, links_bing);
//if we have Moz option set to true we have to get the results rearranged according to the moz metric selected
if(mozMetrics.get(0)){
Moz checkMoz=new Moz();
boolean check_seo=checkMoz.check(config_path);
if (check_seo){
Moz MOZ=new Moz();
links_total=MOZ.perform(links_total,top_count_moz,moz_threshold,moz_threshold_option,mozMetrics, config_path);
}
}
//here we calculate the visibility score
links_total=vb.visibility_score(links_total, links_yahoo, links_bing, links_google, top_visible);
}
String[][] total_catent= new String[links_total.length][2];
for(int r=0;r<total_catent.length;r++){
total_catent[r][0]="";
total_catent[r][1]="";
}
for(int j=0;j<links_total.length;j++){
if(links_total[j]!=null){
String urlString=links_total[j];
if(urlString.length()>199){
urlString=links_total[j].substring(0, 198);
}
int rank=-1;
int engine=-1;//0 for yahoo,1 for google,2 for bing
if(j<results_number){
rank=j;
engine=0;
}
else if(j<results_number*2){
rank=j-results_number;
engine=1;
}
else if(j<results_number*3){
rank=j-results_number*2;
engine=2;
}
try{
//we initialize the row in settings table
conn = DriverManager.getConnection(url,user,password);
stmt = conn.prepareStatement("INSERT INTO SETTINGS (url,query,search_engine,search_engine_rank,domain) VALUES (?,?,?,?,?) ON DUPLICATE KEY UPDATE url=VALUES(url),query=VALUES(query),search_engine=VALUES(search_engine),domain=VALUES(domain)");
stmt.setString(1,urlString);
stmt.setString(2,quer);
stmt.setInt(3,engine);
stmt.setInt(4,rank);
stmt.setString(5,domain);
stmt.executeUpdate();
}
finally{
try {
if (stmt != null) stmt.close();
if (conn != null) conn.close();
} catch (SQLException ex) {
Logger.getLogger(Search_analysis.class.getName()).log(Level.SEVERE, null, ex);
}
}
try{
//we initialize the row in semantic stats table
conn = DriverManager.getConnection(url,user,password);
stmt = conn.prepareStatement("INSERT INTO SEMANTICSTATS (url,query,search_engine,search_engine_rank,domain) VALUES (?,?,?,?,?) ON DUPLICATE KEY UPDATE url=VALUES(url),query=VALUES(query),search_engine=VALUES(search_engine),domain=VALUES(domain)");
stmt.setString(1,urlString);
stmt.setString(2,quer);
stmt.setInt(3,engine);
stmt.setInt(4,rank);
stmt.setString(5,domain);
stmt.executeUpdate();
}
finally{
try {
if (stmt != null) stmt.close();
if (conn != null) conn.close();
} catch (SQLException ex) {
Logger.getLogger(Search_analysis.class.getName()).log(Level.SEVERE, null, ex);
}
}
try{
//we initialize the row in namespaces stats table
conn = DriverManager.getConnection(url,user,password);
stmt = conn.prepareStatement("INSERT INTO NAMESPACESSTATS (url,query,search_engine,search_engine_rank,domain) VALUES (?,?,?,?,?) ON DUPLICATE KEY UPDATE url=VALUES(url),query=VALUES(query),search_engine=VALUES(search_engine),domain=VALUES(domain)");
stmt.setString(1,urlString);
stmt.setString(2,quer);
stmt.setInt(3,engine);
stmt.setInt(4,rank);
stmt.setString(5,domain);
stmt.executeUpdate();
}
finally{
try {
if (stmt != null) stmt.close();
if (conn != null) conn.close();
} catch (SQLException ex) {
Logger.getLogger(Search_analysis.class.getName()).log(Level.SEVERE, null, ex);
}
}
try{
//we put the info inside the settings
conn = DriverManager.getConnection(url,user,password);
StringBuilder settingsStmBuild = new StringBuilder();
settingsStmBuild.append("UPDATE SETTINGS SET ");
settingsStmBuild.append("`nTopics`=? , ");
settingsStmBuild.append("`alpha`=? , ");
settingsStmBuild.append("`beta`=? , ");
settingsStmBuild.append("`niters`=? , ");
settingsStmBuild.append("`prob_threshold`=? , ");
settingsStmBuild.append("`moz`=? , ");
settingsStmBuild.append("`top_count_moz`=? , ");
settingsStmBuild.append("`moz_threshold`=? , ");
settingsStmBuild.append("`moz_threshold_option`=? , ");
settingsStmBuild.append("`top_visible`=? , ");
settingsStmBuild.append("`Domain_Authority`=? , ");
settingsStmBuild.append("`External_MozRank`=? , ");
settingsStmBuild.append("`MozRank`=? , ");
settingsStmBuild.append("`MozTrust`=? , ");
settingsStmBuild.append("`Page_Authority`=? , ");
settingsStmBuild.append("`Subdomain_mozRank`=? , ");
settingsStmBuild.append("`merged`=? , ");
settingsStmBuild.append("`results_number`=? , ");
settingsStmBuild.append("`Diffbotflag`=? , ");
settingsStmBuild.append("`LDAflag`=? , ");
settingsStmBuild.append("`Sensebotflag`=? , ");
settingsStmBuild.append("`TFIDFflag`=? , ");
settingsStmBuild.append("`SensebotConcepts`=? , ");
settingsStmBuild.append("`nTopTopics`=? , ");
settingsStmBuild.append("`combinelimit`=? ,");
settingsStmBuild.append("`newtermstocombine`=? ,");
settingsStmBuild.append("`newqueriesmax`=? ,");
settingsStmBuild.append("`ngdthreshold`=? ,");
settingsStmBuild.append("`entitiesconfi`=? ,");
settingsStmBuild.append("`dbpediasup`=? ");
settingsStmBuild.append("WHERE `url`=? AND `query`=? AND `search_engine`=? AND `domain`=?");
stmt = conn.prepareStatement(settingsStmBuild.toString());
stmt.setInt(1,SWebRankSettings.get(1).intValue());
stmt.setDouble(2,alpha);
stmt.setDouble(3,SWebRankSettings.get(0));
stmt.setInt(4,SWebRankSettings.get(2).intValue());
stmt.setDouble(5,SWebRankSettings.get(3));
stmt.setBoolean(6,mozMetrics.get(0));
stmt.setInt(7,top_count_moz);
stmt.setDouble(8,moz_threshold);
stmt.setBoolean(9,moz_threshold_option);
stmt.setInt(10,top_visible);
stmt.setBoolean(11,mozMetrics.get(1));
stmt.setBoolean(12,mozMetrics.get(2));
stmt.setBoolean(13,mozMetrics.get(3));
stmt.setBoolean(14,mozMetrics.get(4));
stmt.setBoolean(15,mozMetrics.get(5));
stmt.setBoolean(16,mozMetrics.get(6));
stmt.setBoolean(17,enginechoice.get(3));
stmt.setInt(18,results_number);
stmt.setBoolean(19,ContentSemantics.get(0));
stmt.setBoolean(20,ContentSemantics.get(1));
stmt.setBoolean(21,ContentSemantics.get(2));
stmt.setBoolean(22,ContentSemantics.get(3));
stmt.setInt(23,SensebotConcepts);
stmt.setInt(24,SWebRankSettings.get(11).intValue());
stmt.setInt(25,SWebRankSettings.get(7).intValue());
stmt.setInt(26,SWebRankSettings.get(9).intValue());
stmt.setInt(27,SWebRankSettings.get(10).intValue());
stmt.setDouble(28,SWebRankSettings.get(6));
stmt.setDouble(29,SWebRankSettings.get(12));
stmt.setDouble(30,SWebRankSettings.get(13));
stmt.setString(31,urlString);
stmt.setString(32,quer);
stmt.setInt(33,engine);
stmt.setString(34,domain);
stmt.executeUpdate();
}
finally{
try {
if (stmt != null) stmt.close();
if (conn != null) conn.close();
} catch (SQLException ex) {
Logger.getLogger(Search_analysis.class.getName()).log(Level.SEVERE, null, ex);
}
}
if(htm.checkconn(links_total[j])){//if we can connect to the url we continue to update semantics stats and namespaces stats tables
nlinks=htm.getnlinks(links_total[j]);
StringBuilder webstatsStmBuild = new StringBuilder();
try{
conn = DriverManager.getConnection(url,user,password);
webstatsStmBuild.append("UPDATE SEMANTICSTATS SET ");
webstatsStmBuild.append("`number_links`=? , ");
webstatsStmBuild.append("`redirect_links`=? , ");
webstatsStmBuild.append("`internal_links`=? ");
webstatsStmBuild.append("WHERE `url`=? AND `query`=? AND `search_engine`=? AND `domain`=?");
stmt = conn.prepareStatement(webstatsStmBuild.toString());
stmt.setInt(1,nlinks[0]);//total numbers of links
stmt.setInt(2,nlinks[0]-nlinks[1]);
stmt.setInt(3,nlinks[1]);//internal links
stmt.setString(4,urlString);
stmt.setString(5,quer);
stmt.setInt(6,engine);
stmt.setString(7,domain);
stmt.executeUpdate();
}
finally{
try {
if (stmt != null) stmt.close();
if (conn != null) conn.close();
} catch (SQLException ex) {
Logger.getLogger(Search_analysis.class.getName()).log(Level.SEVERE, null, ex);
}
}
try{
conn = DriverManager.getConnection(url,user,password);
System.out.println("I am going to get the stats from Sindice\n");
int ntriples=striple.getsindicestats(links_total[j]);//get the amount of semantic triples using Sindice API
System.out.println("I am going insert the semantic triples number in the DB\n");
stmt = conn.prepareStatement("UPDATE SEMANTICSTATS SET `total_semantic_triples`=? WHERE `url` =? AND `query`=? AND `search_engine`=? AND `domain`=?" );
stmt.setInt(1,ntriples);
stmt.setString(2,urlString);
stmt.setString(3,quer);
stmt.setInt(4,engine);
stmt.setString(5,domain);
stmt.executeUpdate();
System.out.println("I inserted the semantic triples number in the DB\n");
//---namespaces-----
System.out.println("I am going to insert the namespaces in the DB\n");
}
finally{
try {
if (stmt != null) stmt.close();
if (conn != null) conn.close();
} catch (SQLException ex) {
Logger.getLogger(Search_analysis.class.getName()).log(Level.SEVERE, null, ex);
}
}
boolean flagStriple=false;
if(flagStriple){
if(striple.namespaces[0]){
try{
conn = DriverManager.getConnection(url,user,password);
stmt = conn.prepareStatement("UPDATE NAMESPACESSTATS SET `http://purl.org/vocab/bio/0.1/` = ? WHERE `url` = ? AND `query`=? AND `search_engine`=? AND `domain`=?" );
stmt.setBoolean(1,true);
stmt.setString(2,urlString);
stmt.setString(3,quer);
stmt.setInt(4,engine);
stmt.setString(5,domain);
stmt.executeUpdate();
}
finally{
try {
if (stmt != null) stmt.close();
if (conn != null) conn.close();
} catch (SQLException ex) {
Logger.getLogger(Search_analysis.class.getName()).log(Level.SEVERE, null, ex);
}
}
}
if(striple.namespaces[1]){
try{
conn = DriverManager.getConnection(url,user,password);
stmt = conn.prepareStatement("UPDATE NAMESPACESSTATS SET `http://purl.org/dc/elements/1.1/` =? WHERE `url`=? AND `query`=? AND `search_engine`=? AND `domain`=?" );
stmt.setBoolean(1,true);
stmt.setString(2,urlString);
stmt.setString(3,quer);
stmt.setInt(4,engine);
stmt.setString(5,domain);
stmt.executeUpdate();
}
finally{
try {
if (stmt != null) stmt.close();
if (conn != null) conn.close();
} catch (SQLException ex) {
Logger.getLogger(Search_analysis.class.getName()).log(Level.SEVERE, null, ex);
}
}
}
if(striple.namespaces[2]){
try{
conn = DriverManager.getConnection(url,user,password);
stmt = conn.prepareStatement("UPDATE NAMESPACESSTATS SET `http://purl.org/coo/n` = ? WHERE `url`=? AND `query`=? AND `search_engine`=? AND `domain`=?" );
stmt.setBoolean(1,true);
stmt.setString(2,urlString);
stmt.setString(3,quer);
stmt.setInt(4,engine);
stmt.setString(5,domain);
stmt.executeUpdate();
}
finally{
try {
if (stmt != null) stmt.close();
if (conn != null) conn.close();
} catch (SQLException ex) {
Logger.getLogger(Search_analysis.class.getName()).log(Level.SEVERE, null, ex);
}
}
}
if(striple.namespaces[3]){
try{
conn = DriverManager.getConnection(url,user,password);
stmt = conn.prepareStatement("UPDATE NAMESPACESSTATS SET `http://web.resource.org/cc/`=? WHERE `url`=? AND `query`=? AND `search_engine`=? AND `domain`=?" );
stmt.setBoolean(1,true);
stmt.setString(2,urlString);
stmt.setString(3,quer);
stmt.setInt(4,engine);
stmt.setString(5,domain);
stmt.executeUpdate();
}
finally{
try {
if (stmt != null) stmt.close();
if (conn != null) conn.close();
} catch (SQLException ex) {
Logger.getLogger(Search_analysis.class.getName()).log(Level.SEVERE, null, ex);
}
}
}
if(striple.namespaces[4]){
try{
conn = DriverManager.getConnection(url,user,password);
stmt = conn.prepareStatement("UPDATE NAMESPACESSTATS SET `http://diligentarguont.ontoware.org/2005/10/arguonto`=? WHERE `url`=? AND `query`=? AND `search_engine`=? AND `domain`=?" );
stmt.setBoolean(1,true);
stmt.setString(2,urlString);
stmt.setString(3,quer);
stmt.setInt(4,engine);
stmt.setString(5,domain);
stmt.executeUpdate();
}
finally{
try {
if (stmt != null) stmt.close();
if (conn != null) conn.close();
} catch (SQLException ex) {
Logger.getLogger(Search_analysis.class.getName()).log(Level.SEVERE, null, ex);
}
}
}
if(striple.namespaces[5]){
try{
conn = DriverManager.getConnection(url,user,password);
stmt = conn.prepareStatement("UPDATE NAMESPACESSTATS SET `http://usefulinc.com/ns/doap`=? WHERE `url`=? AND `query`=? AND `search_engine`=? AND `domain`=?" );
stmt.setBoolean(1,true);
stmt.setString(2,urlString);
stmt.setString(3,quer);
stmt.setInt(4,engine);
stmt.setString(5,domain);
stmt.executeUpdate();
}
finally{
try {
if (stmt != null) stmt.close();
if (conn != null) conn.close();
} catch (SQLException ex) {
Logger.getLogger(Search_analysis.class.getName()).log(Level.SEVERE, null, ex);
}
}
}
if(striple.namespaces[6]){
try{
conn = DriverManager.getConnection(url,user,password);
stmt = conn.prepareStatement("UPDATE NAMESPACESSTATS SET `http://xmlns.com/foaf/0.1/`=? WHERE `url`=? AND `query`=? AND `search_engine`=? AND `domain`=?" );
stmt.setBoolean(1,true);
stmt.setString(2,urlString);
stmt.setString(3,quer);
stmt.setInt(4,engine);
stmt.setString(5,domain);
stmt.executeUpdate();
}
finally{
try {
if (stmt != null) stmt.close();
if (conn != null) conn.close();
} catch (SQLException ex) {
Logger.getLogger(Search_analysis.class.getName()).log(Level.SEVERE, null, ex);
}
}
}
if(striple.namespaces[7]){
try{
conn = DriverManager.getConnection(url,user,password);
stmt = conn.prepareStatement("UPDATE NAMESPACESSTATS SET `http://purl.org/goodrelations/`=? WHERE `url`=? AND `query`=? AND `search_engine`=? AND `domain`=?" );
stmt.setBoolean(1,true);
stmt.setString(2,urlString);
stmt.setString(3,quer);
stmt.setInt(4,engine);
stmt.setString(5,domain);
stmt.executeUpdate();
}
finally{
try {
if (stmt != null) stmt.close();
if (conn != null) conn.close();
} catch (SQLException ex) {
Logger.getLogger(Search_analysis.class.getName()).log(Level.SEVERE, null, ex);
}
}
}
if(striple.namespaces[8]){
try{
conn = DriverManager.getConnection(url,user,password);
stmt = conn.prepareStatement("UPDATE NAMESPACESSTATS SET `http://purl.org/muto/core`=? WHERE `url`=? AND `query`=? AND `search_engine`=? AND `domain`=?" );
stmt.setBoolean(1,true);
stmt.setString(2,urlString);
stmt.setString(3,quer);
stmt.setInt(4,engine);
stmt.setString(5,domain);
stmt.executeUpdate();
}
finally{
try {
if (stmt != null) stmt.close();
if (conn != null) conn.close();
} catch (SQLException ex) {
Logger.getLogger(Search_analysis.class.getName()).log(Level.SEVERE, null, ex);
}
}
}
if(striple.namespaces[9]){
try{
conn = DriverManager.getConnection(url,user,password);
stmt = conn.prepareStatement("UPDATE NAMESPACESSTATS SET `http://webns.net/mvcb/`=? WHERE `url`=? AND `query`=? AND `search_engine`=? AND `domain`=?" );
stmt.setBoolean(1,true);
stmt.setString(2,urlString);
stmt.setString(3,quer);
stmt.setInt(4,engine);
stmt.setString(5,domain);
stmt.executeUpdate();
}
finally{
try {
if (stmt != null) stmt.close();
if (conn != null) conn.close();
} catch (SQLException ex) {
Logger.getLogger(Search_analysis.class.getName()).log(Level.SEVERE, null, ex);
}
}
}
if(striple.namespaces[10]){
try{
conn = DriverManager.getConnection(url,user,password);
stmt = conn.prepareStatement("UPDATE NAMESPACESSTATS SET `http://purl.org/ontology/mo/`=? WHERE `url`=? AND `query`=? AND `search_engine`=? AND `domain`=?" );
stmt.setBoolean(1,true);
stmt.setString(2,urlString);
stmt.setString(3,quer);
stmt.setInt(4,engine);
stmt.setString(5,domain);
stmt.executeUpdate();
}
finally{
try {
if (stmt != null) stmt.close();
if (conn != null) conn.close();
} catch (SQLException ex) {
Logger.getLogger(Search_analysis.class.getName()).log(Level.SEVERE, null, ex);
}
}
}
if(striple.namespaces[11]){
try{
conn = DriverManager.getConnection(url,user,password);
stmt = conn.prepareStatement("UPDATE NAMESPACESSTATS SET `http://purl.org/innovation/ns`=? WHERE `url`=? AND `query`=? AND `search_engine`=? AND `domain`=?" );
stmt.setBoolean(1,true);
stmt.setString(2,urlString);
stmt.setString(3,quer);
stmt.setInt(4,engine);
stmt.setString(5,domain);
stmt.executeUpdate();
}
finally{
try {
if (stmt != null) stmt.close();
if (conn != null) conn.close();
} catch (SQLException ex) {
Logger.getLogger(Search_analysis.class.getName()).log(Level.SEVERE, null, ex);
}
}
}
if(striple.namespaces[12]){
try{
stmt = conn.prepareStatement("UPDATE NAMESPACESSTATS SET `http://openguid.net/rdf`=? WHERE `url`=? AND `query`=? AND `search_engine`=? AND `domain`=?" );
stmt.setBoolean(1,true);
stmt.setString(2,urlString);
stmt.setString(3,quer);
stmt.setInt(4,engine);
stmt.setString(5,domain);
stmt.executeUpdate();
}
finally{
try {
if (stmt != null) stmt.close();
if (conn != null) conn.close();
} catch (SQLException ex) {
Logger.getLogger(Search_analysis.class.getName()).log(Level.SEVERE, null, ex);
}
}
}
if(striple.namespaces[13]){
try{
conn = DriverManager.getConnection(url,user,password);
stmt = conn.prepareStatement("UPDATE NAMESPACESSTATS SET `http://www.slamka.cz/ontologies/diagnostika.owl`=? WHERE `url`=? AND `query`=? AND `search_engine`=? AND `domain`=?" );
stmt.setBoolean(1,true);
stmt.setString(2,urlString);
stmt.setString(3,quer);
stmt.setInt(4,engine);
stmt.setString(5,domain);
stmt.executeUpdate();
}
finally{
try {
if (stmt != null) stmt.close();
if (conn != null) conn.close();
} catch (SQLException ex) {
Logger.getLogger(Search_analysis.class.getName()).log(Level.SEVERE, null, ex);
}
}
}
if(striple.namespaces[14]){
try{
conn = DriverManager.getConnection(url,user,password);
stmt = conn.prepareStatement("UPDATE NAMESPACESSTATS SET `http://purl.org/ontology/po/`=? WHERE `url`=? AND `query`=? AND `search_engine`=? AND `domain`=?" );
stmt.setBoolean(1,true);
stmt.setString(2,urlString);
stmt.setString(3,quer);
stmt.setInt(4,engine);
stmt.setString(5,domain);
stmt.executeUpdate();
}
finally{
try {
if (stmt != null) stmt.close();
if (conn != null) conn.close();
} catch (SQLException ex) {
Logger.getLogger(Search_analysis.class.getName()).log(Level.SEVERE, null, ex);
}
}
}
if(striple.namespaces[15]){
try{
conn = DriverManager.getConnection(url,user,password);
stmt = conn.prepareStatement("UPDATE NAMESPACESSTATS SET `http://purl.org/net/provenance/ns`=? WHERE `url`=? AND `query`=? AND `search_engine`=? AND `domain`=?" );
stmt.setBoolean(1,true);
stmt.setString(2,urlString);
stmt.setString(3,quer);
stmt.setInt(4,engine);
stmt.setString(5,domain);
stmt.executeUpdate();
}
finally{
try {
if (stmt != null) stmt.close();
if (conn != null) conn.close();
} catch (SQLException ex) {
Logger.getLogger(Search_analysis.class.getName()).log(Level.SEVERE, null, ex);
}
}
}
if(striple.namespaces[16]){
try {
conn = DriverManager.getConnection(url,user,password);
stmt = conn.prepareStatement("UPDATE NAMESPACESSTATS SET `http://purl.org/rss/1.0/modules/syndication`=? WHERE `url`=? AND `query`=? AND `search_engine`=? AND `domain`=?" );
stmt.setBoolean(1,true);
stmt.setString(2,urlString);
stmt.setString(3,quer);
stmt.setInt(4,engine);
stmt.setString(5,domain);
stmt.executeUpdate();
}
finally{
try {
if (stmt != null) stmt.close();
if (conn != null) conn.close();
} catch (SQLException ex) {
Logger.getLogger(Search_analysis.class.getName()).log(Level.SEVERE, null, ex);
}
}
}
if(striple.namespaces[17]){
try {
conn = DriverManager.getConnection(url,user,password);
stmt = conn.prepareStatement("UPDATE NAMESPACESSTATS SET `http://rdfs.org/sioc/ns`=? WHERE `url`=? AND `query`=? AND `search_engine`=? AND `domain`=?" );
stmt.setBoolean(1,true);
stmt.setString(2,urlString);
stmt.setString(3,quer);
stmt.setInt(4,engine);
stmt.setString(5,domain);
stmt.executeUpdate();
}
finally{
try {
if (stmt != null) stmt.close();
if (conn != null) conn.close();
} catch (SQLException ex) {
Logger.getLogger(Search_analysis.class.getName()).log(Level.SEVERE, null, ex);
}
}
}
if(striple.namespaces[18]){
try {
conn = DriverManager.getConnection(url,user,password);
stmt = conn.prepareStatement("UPDATE NAMESPACESSTATS SET `http://madskills.com/public/xml/rss/module/trackback/`=? WHERE `url`=? AND `query`=? AND `search_engine`=? AND `domain`=?" );
stmt.setBoolean(1,true);
stmt.setString(2,urlString);
stmt.setString(3,quer);
stmt.setInt(4,engine);
stmt.setString(5,domain);
stmt.executeUpdate();
}
finally{
try {
if (stmt != null) stmt.close();
if (conn != null) conn.close();
} catch (SQLException ex) {
Logger.getLogger(Search_analysis.class.getName()).log(Level.SEVERE, null, ex);
}
}
}
if(striple.namespaces[19]){
try {
conn = DriverManager.getConnection(url,user,password);
stmt = conn.prepareStatement("UPDATE NAMESPACESSTATS SET `http://rdfs.org/ns/void`=? WHERE `url`=? AND `query`=? AND `search_engine`=? AND `domain`=?" );
stmt.setBoolean(1,true);
stmt.setString(2,urlString);
stmt.setString(3,quer);
stmt.setInt(4,engine);
stmt.setString(5,domain);
stmt.executeUpdate();
}
finally{
try {
if (stmt != null) stmt.close();
if (conn != null) conn.close();
} catch (SQLException ex) {
Logger.getLogger(Search_analysis.class.getName()).log(Level.SEVERE, null, ex);
}
}
}
if(striple.namespaces[20]){
try {
conn = DriverManager.getConnection(url,user,password);
stmt = conn.prepareStatement("UPDATE NAMESPACESSTATS SET `http://www.fzi.de/2008/wise/`=? WHERE `url`=? AND `query`=? AND `search_engine`=? AND `domain`=?" );
stmt.setBoolean(1,true);
stmt.setString(2,urlString);
stmt.setString(3,quer);
stmt.setInt(4,engine);
stmt.setString(5,domain);
stmt.executeUpdate();
}
finally{
try {
if (stmt != null) stmt.close();
if (conn != null) conn.close();
} catch (SQLException ex) {
Logger.getLogger(Search_analysis.class.getName()).log(Level.SEVERE, null, ex);
}
}
}
if(striple.namespaces[21]){
try {
conn = DriverManager.getConnection(url,user,password);
stmt = conn.prepareStatement("UPDATE NAMESPACESSTATS SET `http://xmlns.com/wot/0.1`=? WHERE `url`=? AND `query`=? AND `search_engine`=? AND `domain`=?" );
stmt.setBoolean(1,true);
stmt.setString(2,urlString);
stmt.setString(3,quer);
stmt.setInt(4,engine);
stmt.setString(5,domain);
stmt.executeUpdate();
}
finally{
try {
if (stmt != null) stmt.close();
if (conn != null) conn.close();
} catch (SQLException ex) {
Logger.getLogger(Search_analysis.class.getName()).log(Level.SEVERE, null, ex);
}
}
}
if(striple.namespaces[22]){
try {
conn = DriverManager.getConnection(url,user,password);
stmt = conn.prepareStatement("UPDATE NAMESPACESSTATS SET `http://www.w3.org/1999/02/22-rdf-syntax-ns`=? WHERE `url`=? AND `query`=? AND `search_engine`=? AND `domain`=?" );
stmt.setBoolean(1,true);
stmt.setString(2,urlString);
stmt.setString(3,quer);
stmt.setInt(4,engine);
stmt.setString(5,domain);
stmt.executeUpdate();
}
finally{
try {
if (stmt != null) stmt.close();
if (conn != null) conn.close();
} catch (SQLException ex) {
Logger.getLogger(Search_analysis.class.getName()).log(Level.SEVERE, null, ex);
}
}
}
if(striple.namespaces[23]){
try {
conn = DriverManager.getConnection(url,user,password);
stmt = conn.prepareStatement("UPDATE NAMESPACESSTATS SET `rdf-schema`=? WHERE `url`=? AND `query`=? AND `search_engine`=? AND `domain`=?" );
stmt.setBoolean(1,true);
stmt.setString(2,urlString);
stmt.setString(3,quer);
stmt.setInt(4,engine);
stmt.setString(5,domain);
stmt.executeUpdate();
}
finally{
try {
if (stmt != null) stmt.close();
if (conn != null) conn.close();
} catch (SQLException ex) {
Logger.getLogger(Search_analysis.class.getName()).log(Level.SEVERE, null, ex);
}
}
}
if(striple.namespaces[24]){
try {
conn = DriverManager.getConnection(url,user,password);
stmt = conn.prepareStatement("UPDATE NAMESPACESSTATS SET `XMLschema`=? WHERE `url`=? AND `query`=? AND `search_engine`=? AND `domain`=?" );
stmt.setBoolean(1,true);
stmt.setString(2,urlString);
stmt.setString(3,quer);
stmt.setInt(4,engine);
stmt.setString(5,domain);
stmt.executeUpdate();
}
finally{
try {
if (stmt != null) stmt.close();
if (conn != null) conn.close();
} catch (SQLException ex) {
Logger.getLogger(Search_analysis.class.getName()).log(Level.SEVERE, null, ex);
}
}
}
if(striple.namespaces[25]){
try {
conn = DriverManager.getConnection(url,user,password);
stmt = conn.prepareStatement("UPDATE NAMESPACESSTATS SET `OWL`=? WHERE `url`=? AND `query`=? AND `search_engine`=? AND `domain`=?" );
stmt.setBoolean(1,true);
stmt.setString(2,urlString);
stmt.setString(3,quer);
stmt.setInt(4,engine);
stmt.setString(5,domain);
stmt.executeUpdate();
}
finally{
try {
if (stmt != null) stmt.close();
if (conn != null) conn.close();
} catch (SQLException ex) {
Logger.getLogger(Search_analysis.class.getName()).log(Level.SEVERE, null, ex);
}
}
}
if(striple.namespaces[26]){
try {
conn = DriverManager.getConnection(url,user,password);
stmt = conn.prepareStatement("UPDATE NAMESPACESSTATS SET `http://purl.org/dc/terms/`=? WHERE `url`=? AND `query`=? AND `search_engine`=? AND `domain`=?" );
stmt.setBoolean(1,true);
stmt.setString(2,urlString);
stmt.setString(3,quer);
stmt.setInt(4,engine);
stmt.setString(5,domain);
stmt.executeUpdate();
}
finally{
try {
if (stmt != null) stmt.close();
if (conn != null) conn.close();
} catch (SQLException ex) {
Logger.getLogger(Search_analysis.class.getName()).log(Level.SEVERE, null, ex);
}
}
}
if(striple.namespaces[27]){
try {
conn = DriverManager.getConnection(url,user,password);
stmt = conn.prepareStatement("UPDATE NAMESPACESSTATS SET `VCARD`=? WHERE `url`=? AND `query`=? AND `search_engine`=? AND `domain`=?" );
stmt.setBoolean(1,true);
stmt.setString(2,urlString);
stmt.setString(3,quer);
stmt.setInt(4,engine);
stmt.setString(5,domain);
stmt.executeUpdate();
}
finally{
try {
if (stmt != null) stmt.close();
if (conn != null) conn.close();
} catch (SQLException ex) {
Logger.getLogger(Search_analysis.class.getName()).log(Level.SEVERE, null, ex);
}
}
}
if(striple.namespaces[28]){
try {
conn = DriverManager.getConnection(url,user,password);
stmt = conn.prepareStatement("UPDATE NAMESPACESSTATS SET `http://www.geonames.org/ontology`=? WHERE `url`=? AND `query`=? AND `search_engine`=? AND `domain`=?" );
stmt.setBoolean(1,true);
stmt.setString(2,urlString);
stmt.setString(3,quer);
stmt.setInt(4,engine);
stmt.setString(5,domain);
stmt.executeUpdate();
}
finally{
try {
if (stmt != null) stmt.close();
if (conn != null) conn.close();
} catch (SQLException ex) {
Logger.getLogger(Search_analysis.class.getName()).log(Level.SEVERE, null, ex);
}
}
}
if(striple.namespaces[29]){
try {
conn = DriverManager.getConnection(url,user,password);
stmt = conn.prepareStatement("UPDATE NAMESPACESSTATS SET `http://search.yahoo.com/searchmonkey/commerce/`=? WHERE `url`=? AND `query`=? AND `search_engine`=? AND `domain`=?" );
stmt.setBoolean(1,true);
stmt.setString(2,urlString);
stmt.setString(3,quer);
stmt.setInt(4,engine);
stmt.setString(5,domain);
stmt.executeUpdate();
}
finally{
try {
if (stmt != null) stmt.close();
if (conn != null) conn.close();
} catch (SQLException ex) {
Logger.getLogger(Search_analysis.class.getName()).log(Level.SEVERE, null, ex);
}
}
}
if(striple.namespaces[30]){
try {
conn = DriverManager.getConnection(url,user,password);
stmt = conn.prepareStatement("UPDATE NAMESPACESSTATS SET `http://search.yahoo.com/searchmonkey/media/`=? WHERE `url`=? AND `query`=? AND `search_engine`=? AND `domain`=?" );
stmt.setBoolean(1,true);
stmt.setString(2,urlString);
stmt.setString(3,quer);
stmt.setInt(4,engine);
stmt.setString(5,domain);
stmt.executeUpdate();
}
finally{
try {
if (stmt != null) stmt.close();
if (conn != null) conn.close();
} catch (SQLException ex) {
Logger.getLogger(Search_analysis.class.getName()).log(Level.SEVERE, null, ex);
}
}
}
if(striple.namespaces[31]){
try {
conn = DriverManager.getConnection(url,user,password);
stmt = conn.prepareStatement("UPDATE NAMESPACESSTATS SET `http://cb.semsol.org/ns#`=? WHERE `url`=? AND `query`=? AND `search_engine`=? AND `domain`=?" );
stmt.setBoolean(1,true);
stmt.setString(2,urlString);
stmt.setString(3,quer);
stmt.setInt(4,engine);
stmt.setString(5,domain);
stmt.executeUpdate();
}
finally{
try {
if (stmt != null) stmt.close();
if (conn != null) conn.close();
} catch (SQLException ex) {
Logger.getLogger(Search_analysis.class.getName()).log(Level.SEVERE, null, ex);
}
}
}
if(striple.namespaces[32]){
try {
conn = DriverManager.getConnection(url,user,password);
stmt = conn.prepareStatement("UPDATE NAMESPACESSTATS SET `http://blogs.yandex.ru/schema/foaf/`=? WHERE `url`=? AND `query`=? AND `search_engine`=? AND `domain`=?" );
stmt.setBoolean(1,true);
stmt.setString(2,urlString);
stmt.setString(3,quer);
stmt.setInt(4,engine);
stmt.setString(5,domain);
stmt.executeUpdate();
}
finally{
try {
if (stmt != null) stmt.close();
if (conn != null) conn.close();
} catch (SQLException ex) {
Logger.getLogger(Search_analysis.class.getName()).log(Level.SEVERE, null, ex);
}
}
}
if(striple.namespaces[33]){
try {
conn = DriverManager.getConnection(url,user,password);
stmt = conn.prepareStatement("UPDATE NAMESPACESSTATS SET `http://www.w3.org/2003/01/geo/wgs84_pos#`=? WHERE `url`=? AND `query`=? AND `search_engine`=? AND `domain`=?" );
stmt.setBoolean(1,true);
stmt.setString(2,urlString);
stmt.setString(3,quer);
stmt.setInt(4,engine);
stmt.setString(5,domain);
stmt.executeUpdate();
}
finally{
try {
if (stmt != null) stmt.close();
if (conn != null) conn.close();
} catch (SQLException ex) {
Logger.getLogger(Search_analysis.class.getName()).log(Level.SEVERE, null, ex);
}
}
}
if(striple.namespaces[34]){
try {
conn = DriverManager.getConnection(url,user,password);
stmt = conn.prepareStatement("UPDATE NAMESPACESSTATS SET `http://rdfs.org/sioc/ns#`=? WHERE `url`=? AND `query`=? AND `search_engine`=? AND `domain`=?" );
stmt.setBoolean(1,true);
stmt.setString(2,urlString);
stmt.setString(3,quer);
stmt.setInt(4,engine);
stmt.setString(5,domain);
stmt.executeUpdate();
}
finally{
try {
if (stmt != null) stmt.close();
if (conn != null) conn.close();
} catch (SQLException ex) {
Logger.getLogger(Search_analysis.class.getName()).log(Level.SEVERE, null, ex);
}
}
}
if(striple.namespaces[35]){
try {
conn = DriverManager.getConnection(url,user,password);
stmt = conn.prepareStatement("UPDATE NAMESPACESSTATS SET `http://rdfs.org/sioc/types#`=? WHERE `url`=? AND `query`=? AND `search_engine`=? AND `domain`=?" );
stmt.setBoolean(1,true);
stmt.setString(2,urlString);
stmt.setString(3,quer);
stmt.setInt(4,engine);
stmt.setString(5,domain);
stmt.executeUpdate();
}
finally{
try {
if (stmt != null) stmt.close();
if (conn != null) conn.close();
} catch (SQLException ex) {
Logger.getLogger(Search_analysis.class.getName()).log(Level.SEVERE, null, ex);
}
}
}
if(striple.namespaces[36]){
try {
conn = DriverManager.getConnection(url,user,password);
stmt = conn.prepareStatement("UPDATE NAMESPACESSTATS SET `http://smw.ontoware.org/2005/smw#`=? WHERE `url`=? AND `query`=? AND `search_engine`=? AND `domain`=?" );
stmt.setBoolean(1,true);
stmt.setString(2,urlString);
stmt.setString(3,quer);
stmt.setInt(4,engine);
stmt.setString(5,domain);
stmt.executeUpdate();
}
finally{
try {
if (stmt != null) stmt.close();
if (conn != null) conn.close();
} catch (SQLException ex) {
Logger.getLogger(Search_analysis.class.getName()).log(Level.SEVERE, null, ex);
}
}
}
if(striple.namespaces[37]){
try {
conn = DriverManager.getConnection(url,user,password);
stmt = conn.prepareStatement("UPDATE NAMESPACESSTATS SET `http://purl.org/rss/1.0/`= ? WHERE `url`=? AND `query`=? AND `search_engine`=? AND `domain`=?" );
stmt.setBoolean(1,true);
stmt.setString(2,urlString);
stmt.setString(3,quer);
stmt.setInt(4,engine);
stmt.setString(5,domain);
stmt.executeUpdate();
}
finally{
try {
if (stmt != null) stmt.close();
if (conn != null) conn.close();
} catch (SQLException ex) {
Logger.getLogger(Search_analysis.class.getName()).log(Level.SEVERE, null, ex);
}
}
}
if(striple.namespaces[38]){
try {
conn = DriverManager.getConnection(url,user,password);
stmt = conn.prepareStatement("UPDATE NAMESPACESSTATS SET `http://www.w3.org/2004/12/q/contentlabel#`=? WHERE `url`=? AND `query`=? AND `search_engine`=? AND `domain`=?" );
stmt.setBoolean(1,true);
stmt.setString(2,urlString);
stmt.setString(3,quer);
stmt.setInt(4,engine);
stmt.setString(5,domain);
stmt.executeUpdate();
}
finally{
try {
if (stmt != null) stmt.close();
if (conn != null) conn.close();
} catch (SQLException ex) {
Logger.getLogger(Search_analysis.class.getName()).log(Level.SEVERE, null, ex);
}
}
}
}
System.out.println("I inserted the namespaces in the DB\n");
System.out.println("I will get the semantic entities and categories\n");
//get the semantic entities and categories from Yahoo Content Analysis Service
YahooEntityCategory yec=new YahooEntityCategory();
yec.connect(links_total[j],quer, false,SWebRankSettings.get(12));//without stemming
EntitiesMapYahoo.put(j, yec.GetEntitiesYahoo());
CategoriesMapYahoo.put(j, yec.GetCategoriesYahoo());
double ent_avg_yahoo_score = yec.GetEntitiesYahooScore();
double cat_avg_yahoo_score = yec.GetCategoriesYahooScore();
int cat_cnt=yec.GetCatQuerCnt();
int ent_cnt=yec.GetEntQuerCnt();
int cat_cnt_whole=yec.GetCatQuerCntWhole();
int ent_cnt_whole=yec.GetEntQuerCntWhole();
yec.connect(links_total[j],quer, true,SWebRankSettings.get(12));//with stemming
int cat_cnt_stem=yec.GetCatQuerCnt();
int ent_cnt_stem=yec.GetEntQuerCnt();
int cat_cnt_whole_stem=yec.GetCatQuerCntWhole();
int ent_cnt_whole_stem=yec.GetEntQuerCntWhole();
//get the semantic entities and categories from Dandelion Named entity extraction API
DandelionEntities dec = new DandelionEntities();
dec.connect(links_total[j], quer,false,config_path,SWebRankSettings.get(12));//without stemming
EntitiesMapDand.put(j, dec.GetEntitiesDand());
CategoriesMapDand.put(j, dec.GetCategoriesDand());
double ent_avg_d_score = dec.GetEntitiesScoreDand();
int cat_cnt_dand=dec.getCat();
int ent_cnt_dand=dec.getEnt();
int cat_cnt_dand_whole=dec.getCatWhole();
int ent_cnt_dand_whole=dec.getEntWhole();
dec.connect(links_total[j], quer,true,config_path,SWebRankSettings.get(12));//with stemming
int cat_cnt_dand_stem=dec.getCat();
int ent_cnt_dand_stem=dec.getEnt();
int cat_cnt_dand_whole_stem=dec.getCatWhole();
int ent_cnt_dand_whole_stem=dec.getEntWhole();
//get the semantic entities and categories from dbpedia spotlight
DBpediaSpotlightClient dbpspot = new DBpediaSpotlightClient(SWebRankSettings.get(12),SWebRankSettings.get(13).intValue());
dbpspot.countEntCat(links_total[j], quer,false);//false is not stemming
EntitiesMapDBP.put(j, dbpspot.getEntities());
CategoriesMapDBP.put(j, dbpspot.getCategories());
double ent_avg_dbpspot_score = dbpspot.getEntitiesAvgScore();
double ent_max_dbpspot_score = dbpspot.getEntitiesMaxScore();
double ent_min_dbpspot_score = dbpspot.getEntitiesMinScore();
double ent_median_dbpspot_score = dbpspot.getEntitiesMedianScore();
double ent_std_dbpspot_score = dbpspot.getEntitiesStdScore();
double ent_avg_dbpspot_support = dbpspot.getEntitiesAvgSupport();
double ent_max_dbpspot_support = dbpspot.getEntitiesMaxSupport();
double ent_min_dbpspot_support = dbpspot.getEntitiesMinSupport();
double ent_median_dbpspot_support = dbpspot.getEntitiesMedianSupport();
double ent_std_dbpspot_support = dbpspot.getEntitiesStdSupport();
double ent_avg_dbpspot_dif = dbpspot.getEntitiesAvgDif();
double ent_max_dbpspot_dif = dbpspot.getEntitiesMaxDif();
double ent_min_dbpspot_dif = dbpspot.getEntitiesMinDif();
double ent_median_dbpspot_dif = dbpspot.getEntitiesMedianDif();
double ent_std_dbpspot_dif = dbpspot.getEntitiesStdDif();
double unique_ent_cnt_dbpspot = dbpspot.getUniqueEntCnt();
double unique_ent_scoreSum_dbpspot = dbpspot.getUniqueEntScoreSum();
int cat_cnt_dbpspot = dbpspot.getcountCat();
int ent_cnt_dbpspot = dbpspot.getcountEnt();
int cat_cnt_dbpspot_whole = dbpspot.getcountCatWhole();
int ent_cnt_dbpspot_whole = dbpspot.getcountEntWhole();
double ent_sup_cnt_dbpspot = dbpspot.getcountSupEnt();
double ent_sim_cnt_dbpspot = dbpspot.getcountSimEnt();
double ent_dif_cnt_dbpspot = dbpspot.getcountDifEnt();
double high_precision_content_dbpspot = dbpspot.getHighPrecEntities();
dbpspot.countEntCat(links_total[j], quer,true);//true is for stemming
int cat_cnt_dbpspot_stem = dbpspot.getcountCat();
int ent_cnt_dbpspot_stem = dbpspot.getcountEnt();
int cat_cnt_dbpspot_whole_stem = dbpspot.getcountCatWhole();
int ent_cnt_dbpspot_whole_stem = dbpspot.getcountEntWhole();
double ent_sup_cnt_dbpspot_stem = dbpspot.getcountSupEnt();
double ent_sim_cnt_dbpspot_stem = dbpspot.getcountSimEnt();
double ent_dif_cnt_dbpspot_stem = dbpspot.getcountDifEnt();
System.out.println("I insert the semantic entities and categories stats in the DB\n");
StringBuilder entitiesStatementBuilder = new StringBuilder();
try{
entitiesStatementBuilder.append("UPDATE SEMANTICSTATS SET ");
entitiesStatementBuilder.append("`ent_avg_y_score`=?,");
entitiesStatementBuilder.append("`cat_avg_y_score`=? ");
entitiesStatementBuilder.append("WHERE `url`=? AND `query`=? AND `search_engine`=? AND `domain`=?");
conn = DriverManager.getConnection(url,user,password);
stmt = conn.prepareStatement(entitiesStatementBuilder.toString());
stmt.setDouble(1,ent_avg_yahoo_score);
stmt.setDouble(2,cat_avg_yahoo_score);
stmt.setString(3,urlString);
stmt.setString(4,quer);
if(j<results_number){
stmt.setInt(5,0);//0 for yahoo
}
else if(j<results_number*2){
stmt.setInt(5,1);//1 for google
}
else if(j<results_number*3){
stmt.setInt(5,2);//2 for bing
}
stmt.setString(6,domain);
stmt.executeUpdate();
}
catch (SQLException ex) {
Logger.getLogger(Search_analysis.class.getName()).log(Level.SEVERE, null, ex);
}
finally{
try {
if (stmt != null) stmt.close();
if (conn != null) conn.close();
} catch (SQLException ex) {
Logger.getLogger(Search_analysis.class.getName()).log(Level.SEVERE, null, ex);
}
}
try{
entitiesStatementBuilder = new StringBuilder();
entitiesStatementBuilder.append("UPDATE SEMANTICSTATS SET ");
entitiesStatementBuilder.append("`ent_avg_dand_score`=?,");
entitiesStatementBuilder.append("`ent_avg_dbpspot_score`=? ");
entitiesStatementBuilder.append("WHERE `url`=? AND `query`=? AND `search_engine`=? AND `domain`=?");
conn = DriverManager.getConnection(url,user,password);
stmt = conn.prepareStatement(entitiesStatementBuilder.toString());
stmt.setDouble(1,ent_avg_d_score);
stmt.setDouble(2,ent_avg_dbpspot_score);
stmt.setString(3,urlString);
stmt.setString(4,quer);
if(j<results_number){
stmt.setInt(5,0);//0 for yahoo
}
else if(j<results_number*2){
stmt.setInt(5,1);//1 for google
}
else if(j<results_number*3){
stmt.setInt(5,2);//2 for bing
}
stmt.setString(6,domain);
stmt.executeUpdate();
}
catch (SQLException ex) {
Logger.getLogger(Search_analysis.class.getName()).log(Level.SEVERE, null, ex);
}
finally{
try {
if (stmt != null) stmt.close();
if (conn != null) conn.close();
} catch (SQLException ex) {
Logger.getLogger(Search_analysis.class.getName()).log(Level.SEVERE, null, ex);
}
}
try{
entitiesStatementBuilder = new StringBuilder();
entitiesStatementBuilder.append("UPDATE SEMANTICSTATS SET ");
entitiesStatementBuilder.append("`ent_max_dbpspot_score`=?,");
entitiesStatementBuilder.append("`ent_min_dbpspot_score`=?,");
entitiesStatementBuilder.append("`ent_median_dbpspot_score`=?,");
entitiesStatementBuilder.append("`ent_std_dbpspot_score`=? ");
entitiesStatementBuilder.append("WHERE `url`=? AND `query`=? AND `search_engine`=? AND `domain`=?");
conn = DriverManager.getConnection(url,user,password);
stmt = conn.prepareStatement(entitiesStatementBuilder.toString());
stmt.setDouble(1,ent_max_dbpspot_score);
stmt.setDouble(2,ent_min_dbpspot_score);
stmt.setDouble(3,ent_median_dbpspot_score);
stmt.setDouble(4,ent_std_dbpspot_score);
stmt.setString(5,links_total[j]);
stmt.setString(6,quer);
if(j<results_number){
stmt.setInt(7,0);//0 for yahoo
}
else if(j<results_number*2){
stmt.setInt(7,1);//1 for google
}
else if(j<results_number*3){
stmt.setInt(7,2);//2 for bing
}
stmt.setString(8,domain);
stmt.executeUpdate();
}
catch (SQLException ex) {
Logger.getLogger(Search_analysis.class.getName()).log(Level.SEVERE, null, ex);
}
finally{
try {
if (stmt != null) stmt.close();
if (conn != null) conn.close();
} catch (SQLException ex) {
Logger.getLogger(Search_analysis.class.getName()).log(Level.SEVERE, null, ex);
}
}
try{
entitiesStatementBuilder = new StringBuilder();
entitiesStatementBuilder.append("UPDATE SEMANTICSTATS SET ");
entitiesStatementBuilder.append("`ent_avg_dbpspot_support`=?,");
entitiesStatementBuilder.append("`ent_max_dbpspot_support`=?,");
entitiesStatementBuilder.append("`ent_min_dbpspot_support`=?,");
entitiesStatementBuilder.append("`ent_median_dbpspot_support`=?,");
entitiesStatementBuilder.append("`ent_std_dbpspot_support`=? ");
entitiesStatementBuilder.append("WHERE `url`=? AND `query`=? AND `search_engine`=? AND `domain`=?");
conn = DriverManager.getConnection(url,user,password);
stmt = conn.prepareStatement(entitiesStatementBuilder.toString());
stmt.setDouble(1,ent_avg_dbpspot_support);
stmt.setDouble(2,ent_max_dbpspot_support);
stmt.setDouble(3,ent_min_dbpspot_support);
stmt.setDouble(4,ent_median_dbpspot_support);
stmt.setDouble(5,ent_std_dbpspot_support);
stmt.setString(6,links_total[j]);
stmt.setString(7,quer);
if(j<results_number){
stmt.setInt(8,0);//0 for yahoo
}
else if(j<results_number*2){
stmt.setInt(8,1);//1 for google
}
else if(j<results_number*3){
stmt.setInt(8,2);//2 for bing
}
stmt.setString(9,domain);
System.out.println("avg db support"+ent_avg_dbpspot_support);
stmt.executeUpdate();
}
catch (SQLException ex) {
Logger.getLogger(Search_analysis.class.getName()).log(Level.SEVERE, null, ex);
}
finally{
try {
if (stmt != null) stmt.close();
if (conn != null) conn.close();
} catch (SQLException ex) {
Logger.getLogger(Search_analysis.class.getName()).log(Level.SEVERE, null, ex);
}
}
try{
entitiesStatementBuilder = new StringBuilder();
entitiesStatementBuilder.append("UPDATE SEMANTICSTATS SET ");
entitiesStatementBuilder.append("`ent_avg_dbpspot_dif`=?,");
entitiesStatementBuilder.append("`ent_max_dbpspot_dif`=?,");
entitiesStatementBuilder.append("`ent_min_dbpspot_dif`=?,");
entitiesStatementBuilder.append("`ent_median_dbpspot_dif`=?,");
entitiesStatementBuilder.append("`ent_std_dbpspot_dif`=? ");
entitiesStatementBuilder.append("WHERE `url`=? AND `query`=? AND `search_engine`=? AND `domain`=?");
conn = DriverManager.getConnection(url,user,password);
stmt = conn.prepareStatement(entitiesStatementBuilder.toString());
stmt.setDouble(1,ent_avg_dbpspot_dif);
stmt.setDouble(2,ent_max_dbpspot_dif);
stmt.setDouble(3,ent_min_dbpspot_dif);
stmt.setDouble(4,ent_median_dbpspot_dif);
stmt.setDouble(5,ent_std_dbpspot_dif);
stmt.setString(6,links_total[j]);
stmt.setString(7,quer);
if(j<results_number){
stmt.setInt(8,0);//0 for yahoo
}
else if(j<results_number*2){
stmt.setInt(8,1);//1 for google
}
else if(j<results_number*3){
stmt.setInt(8,2);//2 for bing
}
stmt.setString(9,domain);
stmt.executeUpdate();
}
catch (SQLException ex) {
Logger.getLogger(Search_analysis.class.getName()).log(Level.SEVERE, null, ex);
}
finally{
try {
if (stmt != null) stmt.close();
if (conn != null) conn.close();
} catch (SQLException ex) {
Logger.getLogger(Search_analysis.class.getName()).log(Level.SEVERE, null, ex);
}
}
try {
conn = DriverManager.getConnection(url,user,password);
entitiesStatementBuilder = new StringBuilder();
entitiesStatementBuilder.append("UPDATE SEMANTICSTATS SET ");
entitiesStatementBuilder.append("`ent_sup_cnt_dbpspot`=?,");
entitiesStatementBuilder.append("`ent_dif_cnt_dbpspot`=?,");
entitiesStatementBuilder.append("`ent_sim_cnt_dbpspot`=? ");
entitiesStatementBuilder.append("WHERE `url`=? AND `query`=? AND `search_engine`=? AND `domain`=?");
stmt = conn.prepareStatement(entitiesStatementBuilder.toString());
stmt.setDouble(1,ent_sup_cnt_dbpspot);
stmt.setDouble(2,ent_dif_cnt_dbpspot);
stmt.setDouble(3,ent_sim_cnt_dbpspot);
stmt.setString(4,links_total[j]);
stmt.setString(5,quer);
if(j<results_number){
stmt.setInt(6,0);//0 for yahoo
}
else if(j<results_number*2){
stmt.setInt(6,1);//1 for google
}
else if(j<results_number*3){
stmt.setInt(6,2);//2 for bing
}
stmt.setString(7,domain);
stmt.executeUpdate();
}
finally{
try {
if (stmt != null) stmt.close();
if (conn != null) conn.close();
} catch (SQLException ex) {
Logger.getLogger(Search_analysis.class.getName()).log(Level.SEVERE, null, ex);
}
}
try {
conn = DriverManager.getConnection(url,user,password);
entitiesStatementBuilder = new StringBuilder();
entitiesStatementBuilder.append("UPDATE SEMANTICSTATS SET ");
entitiesStatementBuilder.append("`ent_sup_cnt_dbpspot_stem`=?,");
entitiesStatementBuilder.append("`ent_dif_cnt_dbpspot_stem`=?,");
entitiesStatementBuilder.append("`ent_sim_cnt_dbpspot_stem`=? ");
entitiesStatementBuilder.append("WHERE `url`=? AND `query`=? AND `search_engine`=? AND `domain`=?");
stmt = conn.prepareStatement(entitiesStatementBuilder.toString());
stmt.setDouble(1,ent_sup_cnt_dbpspot_stem);
stmt.setDouble(2,ent_dif_cnt_dbpspot_stem);
stmt.setDouble(3,ent_sim_cnt_dbpspot_stem);
stmt.setString(4,links_total[j]);
stmt.setString(5,quer);
if(j<results_number){
stmt.setInt(6,0);//0 for yahoo
}
else if(j<results_number*2){
stmt.setInt(6,1);//1 for google
}
else if(j<results_number*3){
stmt.setInt(6,2);//2 for bing
}
stmt.setString(7,domain);
stmt.executeUpdate();
}
finally{
try {
if (stmt != null) stmt.close();
if (conn != null) conn.close();
} catch (SQLException ex) {
Logger.getLogger(Search_analysis.class.getName()).log(Level.SEVERE, null, ex);
}
}
try {
conn = DriverManager.getConnection(url,user,password);
entitiesStatementBuilder = new StringBuilder();
entitiesStatementBuilder.append("UPDATE SEMANTICSTATS SET ");
entitiesStatementBuilder.append("`unique_ent_cnt_dbpspot`=?,");
entitiesStatementBuilder.append("`unique_ent_scoreSum_dbpspot`=?,");
entitiesStatementBuilder.append("`high_precision_content_dbpspot`=? ");
entitiesStatementBuilder.append("WHERE `url`=? AND `query`=? AND `search_engine`=? AND `domain`=?");
stmt = conn.prepareStatement(entitiesStatementBuilder.toString());
stmt.setDouble(1,unique_ent_cnt_dbpspot);
stmt.setDouble(2,unique_ent_scoreSum_dbpspot);
stmt.setDouble(3,high_precision_content_dbpspot);
stmt.setString(4,links_total[j]);
stmt.setString(5,quer);
if(j<results_number){
stmt.setInt(6,0);//0 for yahoo
}
else if(j<results_number*2){
stmt.setInt(6,1);//1 for google
}
else if(j<results_number*3){
stmt.setInt(6,2);//2 for bing
}
stmt.setString(7,domain);
stmt.executeUpdate();
}
finally{
try {
if (stmt != null) stmt.close();
if (conn != null) conn.close();
} catch (SQLException ex) {
Logger.getLogger(Search_analysis.class.getName()).log(Level.SEVERE, null, ex);
}
}
try{
entitiesStatementBuilder = new StringBuilder();
entitiesStatementBuilder.append("UPDATE SEMANTICSTATS SET ");
entitiesStatementBuilder.append("`Categories_Contained_Query_Y`=?,");
entitiesStatementBuilder.append("`Entities_Contained_Query_Y`=?,");
entitiesStatementBuilder.append("`Categories_Contained_Query_Y_W`=? ");
entitiesStatementBuilder.append("WHERE `url`=? AND `query`=? AND `search_engine`=? AND `domain`=?");
conn = DriverManager.getConnection(url,user,password);
stmt = conn.prepareStatement(entitiesStatementBuilder.toString());
stmt.setInt(1,cat_cnt);
stmt.setInt(2,ent_cnt);
stmt.setInt(3,cat_cnt_whole);
stmt.setString(4,urlString);
stmt.setString(5,quer);
if(j<results_number){
stmt.setInt(6,0);//0 for yahoo
}
else if(j<results_number*2){
stmt.setInt(6,1);//1 for google
}
else if(j<results_number*3){
stmt.setInt(6,2);//2 for bing
}
stmt.setString(7,domain);
stmt.executeUpdate();
}
finally{
try {
if (stmt != null) stmt.close();
if (conn != null) conn.close();
} catch (SQLException ex) {
Logger.getLogger(Search_analysis.class.getName()).log(Level.SEVERE, null, ex);
}
}
try {
conn = DriverManager.getConnection(url,user,password);
entitiesStatementBuilder = new StringBuilder();
entitiesStatementBuilder.append("UPDATE SEMANTICSTATS SET ");
entitiesStatementBuilder.append("`Entities_Contained_Query_Y_W`=?,");
entitiesStatementBuilder.append("`Categories_Contained_Query_D`=?,");
entitiesStatementBuilder.append("`Entities_Contained_Query_D`=? ");
entitiesStatementBuilder.append("WHERE `url`=? AND `query`=? AND `search_engine`=? AND `domain`=?");
stmt = conn.prepareStatement(entitiesStatementBuilder.toString());
stmt.setInt(1,ent_cnt_whole);
stmt.setInt(2,cat_cnt_dand);
stmt.setInt(3,ent_cnt_dand);
stmt.setString(4,urlString);
stmt.setString(5,quer);
if(j<results_number){
stmt.setInt(6,0);//0 for yahoo
}
else if(j<results_number*2){
stmt.setInt(6,1);//1 for google
}
else if(j<results_number*3){
stmt.setInt(6,2);//2 for bing
}
stmt.setString(7,domain);
stmt.executeUpdate();
}
finally{
try {
if (stmt != null) stmt.close();
if (conn != null) conn.close();
} catch (SQLException ex) {
Logger.getLogger(Search_analysis.class.getName()).log(Level.SEVERE, null, ex);
}
}
try {
conn = DriverManager.getConnection(url,user,password);
entitiesStatementBuilder = new StringBuilder();
entitiesStatementBuilder.append("UPDATE SEMANTICSTATS SET ");
entitiesStatementBuilder.append("`Categories_Contained_Query_D_W`=?,");
entitiesStatementBuilder.append("`Entities_Contained_Query_D_W`=? ");
entitiesStatementBuilder.append("WHERE `url`=? AND `query`=? AND `search_engine`=? AND `domain`=?");
stmt = conn.prepareStatement(entitiesStatementBuilder.toString());
stmt.setInt(1,cat_cnt_dand_whole);
stmt.setInt(2,ent_cnt_dand_whole);
stmt.setString(3,urlString);
stmt.setString(4,quer);
if(j<results_number){
stmt.setInt(5,0);//0 for yahoo
}
else if(j<results_number*2){
stmt.setInt(5,1);//1 for google
}
else if(j<results_number*3){
stmt.setInt(5,2);//2 for bing
}
stmt.setString(6,domain);
stmt.executeUpdate();
}
finally{
try {
if (stmt != null) stmt.close();
if (conn != null) conn.close();
} catch (SQLException ex) {
Logger.getLogger(Search_analysis.class.getName()).log(Level.SEVERE, null, ex);
}
}
try {
conn = DriverManager.getConnection(url,user,password);
entitiesStatementBuilder = new StringBuilder();
entitiesStatementBuilder.append("UPDATE SEMANTICSTATS SET ");
entitiesStatementBuilder.append("`Categories_Contained_Query_DBPspot`=?,");
entitiesStatementBuilder.append("`Entities_Contained_Query_DBPspot`=? ");
entitiesStatementBuilder.append("WHERE `url`=? AND `query`=? AND `search_engine`=? AND `domain`=?");
stmt = conn.prepareStatement(entitiesStatementBuilder.toString());
stmt.setInt(1,cat_cnt_dbpspot);
stmt.setInt(2,ent_cnt_dbpspot);
stmt.setString(3,urlString);
stmt.setString(4,quer);
if(j<results_number){
stmt.setInt(5,0);//0 for yahoo
}
else if(j<results_number*2){
stmt.setInt(5,1);//1 for google
}
else if(j<results_number*3){
stmt.setInt(5,2);//2 for bing
}
stmt.setString(6,domain);
stmt.executeUpdate();
}
finally{
try {
if (stmt != null) stmt.close();
if (conn != null) conn.close();
} catch (SQLException ex) {
Logger.getLogger(Search_analysis.class.getName()).log(Level.SEVERE, null, ex);
}
}
try {
conn = DriverManager.getConnection(url,user,password);
entitiesStatementBuilder = new StringBuilder();
entitiesStatementBuilder.append("UPDATE SEMANTICSTATS SET ");
entitiesStatementBuilder.append("`Categories_Contained_Query_DBPspot_W`=?,");
entitiesStatementBuilder.append("`Entities_Contained_Query_DBPspot_W`=? ");
entitiesStatementBuilder.append("WHERE `url`=? AND `query`=? AND `search_engine`=? AND `domain`=?");
stmt = conn.prepareStatement(entitiesStatementBuilder.toString());
stmt.setInt(1,cat_cnt_dbpspot_whole);
stmt.setInt(2,ent_cnt_dbpspot_whole);
stmt.setString(3,urlString);
stmt.setString(4,quer);
if(j<results_number){
stmt.setInt(5,0);//0 for yahoo
}
else if(j<results_number*2){
stmt.setInt(5,1);//1 for google
}
else if(j<results_number*3){
stmt.setInt(5,2);//2 for bing
}
stmt.setString(6,domain);
stmt.executeUpdate();
}
finally{
try {
if (stmt != null) stmt.close();
if (conn != null) conn.close();
} catch (SQLException ex) {
Logger.getLogger(Search_analysis.class.getName()).log(Level.SEVERE, null, ex);
}
}
try {
conn = DriverManager.getConnection(url,user,password);
entitiesStatementBuilder = new StringBuilder();
entitiesStatementBuilder.append("UPDATE SEMANTICSTATS SET ");
entitiesStatementBuilder.append("`Categories_Contained_Query_Y_Stem`=?,");
entitiesStatementBuilder.append("`Entities_Contained_Query_Y_Stem`=? ");
entitiesStatementBuilder.append("WHERE `url`=? AND `query`=? AND `search_engine`=? AND `domain`=?");
stmt = conn.prepareStatement(entitiesStatementBuilder.toString());
stmt.setInt(1,cat_cnt_stem);
stmt.setInt(2,ent_cnt_stem);
stmt.setString(3,urlString);
stmt.setString(4,quer);
if(j<results_number){
stmt.setInt(5,0);//0 for yahoo
}
else if(j<results_number*2){
stmt.setInt(5,1);//1 for google
}
else if(j<results_number*3){
stmt.setInt(5,2);//2 for bing
}
stmt.setString(6,domain);
stmt.executeUpdate();
}
finally{
try {
if (stmt != null) stmt.close();
if (conn != null) conn.close();
} catch (SQLException ex) {
Logger.getLogger(Search_analysis.class.getName()).log(Level.SEVERE, null, ex);
}
}
try {
conn = DriverManager.getConnection(url,user,password);
entitiesStatementBuilder = new StringBuilder();
entitiesStatementBuilder.append("UPDATE SEMANTICSTATS SET ");
entitiesStatementBuilder.append("`Categories_Contained_Query_Y_W_Stem`=?,");
entitiesStatementBuilder.append("`Entities_Contained_Query_Y_W_Stem`=? ");
entitiesStatementBuilder.append("WHERE `url`=? AND `query`=? AND `search_engine`=? AND `domain`=?");
stmt = conn.prepareStatement(entitiesStatementBuilder.toString());
stmt.setInt(1,cat_cnt_whole_stem);
stmt.setInt(2,ent_cnt_whole_stem);
stmt.setString(3,urlString);
stmt.setString(4,quer);
if(j<results_number){
stmt.setInt(5,0);//0 for yahoo
}
else if(j<results_number*2){
stmt.setInt(5,1);//1 for google
}
else if(j<results_number*3){
stmt.setInt(5,2);//2 for bing
}
stmt.setString(6,domain);
stmt.executeUpdate();
}
finally{
try {
if (stmt != null) stmt.close();
if (conn != null) conn.close();
} catch (SQLException ex) {
Logger.getLogger(Search_analysis.class.getName()).log(Level.SEVERE, null, ex);
}
}
try {
conn = DriverManager.getConnection(url,user,password);
entitiesStatementBuilder = new StringBuilder();
entitiesStatementBuilder.append("UPDATE SEMANTICSTATS SET ");
entitiesStatementBuilder.append("`Categories_Contained_Query_D_Stem`=?,");
entitiesStatementBuilder.append("`Entities_Contained_Query_D_Stem`=? ");
entitiesStatementBuilder.append("WHERE `url`=? AND `query`=? AND `search_engine`=? AND `domain`=?");
stmt = conn.prepareStatement(entitiesStatementBuilder.toString());
stmt.setInt(1,cat_cnt_dand_stem);
stmt.setInt(2,ent_cnt_dand_stem);
stmt.setString(3,urlString);
stmt.setString(4,quer);
if(j<results_number){
stmt.setInt(5,0);//0 for yahoo
}
else if(j<results_number*2){
stmt.setInt(5,1);//1 for google
}
else if(j<results_number*3){
stmt.setInt(5,2);//2 for bing
}
stmt.setString(6,domain);
stmt.executeUpdate();
}
finally{
try {
if (stmt != null) stmt.close();
if (conn != null) conn.close();
} catch (SQLException ex) {
Logger.getLogger(Search_analysis.class.getName()).log(Level.SEVERE, null, ex);
}
}
try {
conn = DriverManager.getConnection(url,user,password);
entitiesStatementBuilder = new StringBuilder();
entitiesStatementBuilder.append("UPDATE SEMANTICSTATS SET ");
entitiesStatementBuilder.append("`Categories_Contained_Query_D_W_Stem`=?,");
entitiesStatementBuilder.append("`Entities_Contained_Query_D_W_Stem`=? ");
entitiesStatementBuilder.append("WHERE `url`=? AND `query`=? AND `search_engine`=? AND `domain`=?");
stmt = conn.prepareStatement(entitiesStatementBuilder.toString());
stmt.setInt(1,cat_cnt_dand_whole_stem);
stmt.setInt(2,ent_cnt_dand_whole_stem);
stmt.setString(3,urlString);
stmt.setString(4,quer);
if(j<results_number){
stmt.setInt(5,0);//0 for yahoo
}
else if(j<results_number*2){
stmt.setInt(5,1);//1 for google
}
else if(j<results_number*3){
stmt.setInt(5,2);//2 for bing
}
stmt.setString(6,domain);
stmt.executeUpdate();
}
finally{
try {
if (stmt != null) stmt.close();
if (conn != null) conn.close();
} catch (SQLException ex) {
Logger.getLogger(Search_analysis.class.getName()).log(Level.SEVERE, null, ex);
}
}
try {
conn = DriverManager.getConnection(url,user,password);
entitiesStatementBuilder = new StringBuilder();
entitiesStatementBuilder.append("UPDATE SEMANTICSTATS SET ");
entitiesStatementBuilder.append("`Categories_Contained_Query_DBPspot_Stem`=?,");
entitiesStatementBuilder.append("`Entities_Contained_Query_DBPspot_Stem`=? ");
entitiesStatementBuilder.append("WHERE `url`=? AND `query`=? AND `search_engine`=? AND `domain`=?");
stmt = conn.prepareStatement(entitiesStatementBuilder.toString());
stmt.setInt(1,cat_cnt_dbpspot_stem);
stmt.setInt(2,ent_cnt_dbpspot_stem);
stmt.setString(3,urlString);
stmt.setString(4,quer);
if(j<results_number){
stmt.setInt(5,0);//0 for yahoo
}
else if(j<results_number*2){
stmt.setInt(5,1);//1 for google
}
else if(j<results_number*3){
stmt.setInt(5,2);//2 for bing
}
stmt.setString(6,domain);
stmt.executeUpdate();
}
finally{
try {
if (stmt != null) stmt.close();
if (conn != null) conn.close();
} catch (SQLException ex) {
Logger.getLogger(Search_analysis.class.getName()).log(Level.SEVERE, null, ex);
}
}
try {
conn = DriverManager.getConnection(url,user,password);
entitiesStatementBuilder = new StringBuilder();
entitiesStatementBuilder.append("UPDATE SEMANTICSTATS SET ");
entitiesStatementBuilder.append("`Categories_Contained_Query_DBPspot_W_Stem`=?,");
entitiesStatementBuilder.append("`Entities_Contained_Query_DBPspot_W_Stem`=? ");
entitiesStatementBuilder.append("WHERE `url`=? AND `query`=? AND `search_engine`=? AND `domain`=?");
stmt = conn.prepareStatement(entitiesStatementBuilder.toString());
stmt.setInt(1,cat_cnt_dbpspot_whole_stem);
stmt.setInt(2,ent_cnt_dbpspot_whole_stem);
stmt.setString(3,urlString);
stmt.setString(4,quer);
if(j<results_number){
stmt.setInt(5,0);//0 for yahoo
}
else if(j<results_number*2){
stmt.setInt(5,1);//1 for google
}
else if(j<results_number*3){
stmt.setInt(5,2);//2 for bing
}
stmt.setString(6,domain);
stmt.executeUpdate();
}
finally{
try {
if (stmt != null) stmt.close();
if (conn != null) conn.close();
} catch (SQLException ex) {
Logger.getLogger(Search_analysis.class.getName()).log(Level.SEVERE, null, ex);
}
}
System.out.println("I inserted the semantic entities and categories stats in the DB\n");
System.out.println("I will get the html stats for the "+j+" link:"+links_total[j]+"\n");
boolean flag_htmlstats=htm.gethtmlstats(links_total[j]);//get the semantic stats from the html code
if(flag_htmlstats){
System.out.println("I got the html stats for the "+j+" link:"+links_total[j]+"\n");
int scripts_cnt = htm.scripts_number;
int nschem=htm.nschem;
int hreln=htm.hreln;
int total_micron=htm.total_micron;
int micron1=htm.micron1;
int micron2=htm.micron2;
int microd=htm.microd;
System.out.println("I will insert webstats in the DB\n");
webstatsStmBuild.setLength(0);
webstatsStmBuild.append("UPDATE SEMANTICSTATS SET ");
webstatsStmBuild.append("`scripts_cnt`=? ");
webstatsStmBuild.append("WHERE `url`=? AND `query`=? AND `search_engine`=? AND `domain`=?");
try{
conn = DriverManager.getConnection(url,user,password);
stmt = conn.prepareStatement(webstatsStmBuild.toString());
stmt.setInt(1,scripts_cnt);
stmt.setString(2,urlString);
stmt.setString(3,quer);
if(j<results_number){
stmt.setInt(4,0);//0 for yahoo
}
else if(j<results_number*2){
stmt.setInt(4,1);//1 for google
}
else if(j<results_number*3){
stmt.setInt(4,2);//2 for bing
}
stmt.setString(5,domain);
stmt.executeUpdate();
}
finally{
try {
if (stmt != null) stmt.close();
if (conn != null) conn.close();
} catch (SQLException ex) {
Logger.getLogger(Search_analysis.class.getName()).log(Level.SEVERE, null, ex);
}
}
try {
conn = DriverManager.getConnection(url,user,password);
System.out.println("I inserted webstats in the DB\n");
System.out.println("I will insert semantic stats in the DB\n");
StringBuilder semanticstatsStmBuild = new StringBuilder();
semanticstatsStmBuild.append("UPDATE SEMANTICSTATS SET ");
semanticstatsStmBuild.append("`schema.org_entities`=? , ");
semanticstatsStmBuild.append("`hreltags`=? ");
semanticstatsStmBuild.append("WHERE `url`=? AND `query`=? AND `search_engine`=? AND `domain`=?");
stmt = conn.prepareStatement(semanticstatsStmBuild.toString());
stmt.setInt(1,nschem);
stmt.setInt(2,hreln);
stmt.setString(3,urlString);
stmt.setString(4,quer);
if(j<results_number){
stmt.setInt(5,0);//0 for yahoo
}
else if(j<results_number*2){
stmt.setInt(5,1);//1 for google
}
else if(j<results_number*3){
stmt.setInt(5,2);//2 for bing
}
stmt.setString(6,domain);
stmt.executeUpdate();
}
finally{
try {
if (stmt != null) stmt.close();
if (conn != null) conn.close();
} catch (SQLException ex) {
Logger.getLogger(Search_analysis.class.getName()).log(Level.SEVERE, null, ex);
}
}
try {
conn = DriverManager.getConnection(url,user,password);
StringBuilder semanticstatsStmBuild = new StringBuilder();
semanticstatsStmBuild.append("UPDATE SEMANTICSTATS SET ");
semanticstatsStmBuild.append("`total_microformats`=? , ");
semanticstatsStmBuild.append("`Microformats-1`=? ");
semanticstatsStmBuild.append("WHERE `url`=? AND `query`=? AND `search_engine`=? AND `domain`=?");
stmt = conn.prepareStatement(semanticstatsStmBuild.toString());
stmt.setInt(1,total_micron);
stmt.setInt(2,micron1);
stmt.setString(3,urlString);
stmt.setString(4,quer);
if(j<results_number){
stmt.setInt(5,0);//0 for yahoo
}
else if(j<results_number*2){
stmt.setInt(5,1);//1 for google
}
else if(j<results_number*3){
stmt.setInt(5,2);//2 for bing
}
stmt.setString(6,domain);
stmt.executeUpdate();
}
finally{
try {
if (stmt != null) stmt.close();
if (conn != null) conn.close();
} catch (SQLException ex) {
Logger.getLogger(Search_analysis.class.getName()).log(Level.SEVERE, null, ex);
}
}
try {
conn = DriverManager.getConnection(url,user,password);
StringBuilder semanticstatsStmBuild = new StringBuilder();
semanticstatsStmBuild.append("UPDATE SEMANTICSTATS SET ");
semanticstatsStmBuild.append("`Microformats-2`=? , ");
semanticstatsStmBuild.append("`Microdata`=? , ");
semanticstatsStmBuild.append("`FOAF_HTML`=? ");
semanticstatsStmBuild.append("WHERE `url`=? AND `query`=? AND `search_engine`=? AND `domain`=?");
stmt = conn.prepareStatement(semanticstatsStmBuild.toString());
stmt.setInt(1,micron2);
stmt.setInt(2,microd);
stmt.setInt(3,htm.foaf);
stmt.setString(4,urlString);
stmt.setString(5,quer);
if(j<results_number){
stmt.setInt(6,0);//0 for yahoo
}
else if(j<results_number*2){
stmt.setInt(6,1);//1 for google
}
else if(j<results_number*3){
stmt.setInt(6,2);//2 for bing
}
stmt.setString(7,domain);
stmt.executeUpdate();
}
finally{
try {
if (stmt != null) stmt.close();
if (conn != null) conn.close();
} catch (SQLException ex) {
Logger.getLogger(Search_analysis.class.getName()).log(Level.SEVERE, null, ex);
}
}
System.out.println("I inserted semantic stats in the DB\n");
}
}
}
}
String[] parse_output;
if(ContentSemantics.get(3)||ContentSemantics.get(1)){
//we perform LDA or TFIDF analysis to the links obtained
if(!enginechoice.get(3)){
if(enginechoice.get(2)){//Yahoo
parse_output=ld.perform(links_yahoo, domain, "yahoo", directory_save, quer, SWebRankSettings.get(1).intValue(), alpha, SWebRankSettings.get(0).doubleValue(), SWebRankSettings.get(2).intValue(), SWebRankSettings.get(3).intValue(),ContentSemantics.get(1),ContentSemantics.get(3), config_path);
int j=0;
for(String s:parse_output){
parseOutputList.put(j,s);
j++;
}
System.gc();
}
if(enginechoice.get(1)){//Google
parse_output=ld.perform(links_google, domain, "google", directory_save, quer, SWebRankSettings.get(1).intValue(), alpha, SWebRankSettings.get(0).doubleValue(), SWebRankSettings.get(2).intValue(), SWebRankSettings.get(3).intValue(),ContentSemantics.get(1),ContentSemantics.get(3), config_path);
int j=results_number;
for(String s:parse_output){
parseOutputList.put(j, s);
j++;
}
System.gc();
}
if(enginechoice.get(0)){//Bing
parse_output=ld.perform(links_bing, domain, "bing", directory_save, quer, SWebRankSettings.get(1).intValue(), alpha, SWebRankSettings.get(0).doubleValue(), SWebRankSettings.get(2).intValue(), SWebRankSettings.get(3).intValue(),ContentSemantics.get(1),ContentSemantics.get(3), config_path);
int j=results_number*2;
for(String s:parse_output){
parseOutputList.put(j, s);
j++;
}
System.gc();
}
}
/*else{
System.gc();//links_total
parse_output=ld.perform(links_total, domain, "merged", directory_save, quer, SWebRankSettings.get(1).intValue(), alpha, SWebRankSettings.get(0).doubleValue(), SWebRankSettings.get(2).intValue(), SWebRankSettings.get(3).intValue(),"Merged",ContentSemantics.get(1),ContentSemantics.get(3), config_path);
Collections.addAll(parseOutputList, parse_output);
System.gc();
}*/
}
}
System.gc();
List<String> wordList=null;
//hashmap for every engine, with topics, words and probability of each word
HashMap<String,HashMap<Integer,HashMap<String,Double>>> enginetopicwordprobmap = new HashMap<>();
List<String> lda_output = new ArrayList<>();
if(ContentSemantics.get(3)){
//get the top content from TFIDF
System.out.println("i ll try to read the keys");
wordList=ld.return_topWordsTFIDF();
System.out.println("i returned the wordlist to search analysis");
}
else if (ContentSemantics.get(0)){//get the wordlist from Diffbot
Diffbot db=new Diffbot();
wordList=db.compute(links_total, directory_save, config_path);
}
else if (ContentSemantics.get(2)){//get the wordllist from Sensebot
Sensebot sb=new Sensebot();
wordList=sb.compute(links_total, directory_save,SensebotConcepts, config_path);
}
else {
//get the top content from LDA
System.out.println("i ll try to read the keys");
LDAtopicsWords rk = new LDAtopicsWords();
enginetopicwordprobmap= rk.readFile(directory_save, SWebRankSettings.get(4),SWebRankSettings.get(3).intValue(), SWebRankSettings.get(1).intValue(), SWebRankSettings.get(11).intValue());
JSONArray ArrayEngineLevel = new JSONArray();
List<String> ids=new ArrayList<>();
//Node node = nodeBuilder().client(true).clusterName("lshrankldacluster").node();
//Client client = node.client();
Settings settings = ImmutableSettings.settingsBuilder()
.put("cluster.name","lshrankldacluster").build();
Client client = new TransportClient(settings)
.addTransportAddress(new
InetSocketTransportAddress("localhost", 9300)
);
//save in elastic search the produced by LDA distributions of words over topics for every engine
for(String engine: enginetopicwordprobmap.keySet()){
HashMap<Integer,HashMap<String,Double>> topicwordprobmap = new HashMap<>();
topicwordprobmap=enginetopicwordprobmap.get(engine);
JSONObject objEngineLevel = new JSONObject();
JSONArray ArrayTopicLevel = new JSONArray();
//for every topic get the words and their probability
for(Integer topicindex:topicwordprobmap.keySet()){
JSONObject objTopicLevel = new JSONObject();
objTopicLevel.put("topic",topicindex);
JSONObject objmap = new JSONObject(topicwordprobmap.get(topicindex));
Set keySet = objmap.keySet();
Iterator iterator = keySet.iterator();
while(iterator.hasNext()){
String word = iterator.next().toString();
if(!lda_output.contains(word)){
lda_output.add(word);
}//get the words in a separate list
}
objTopicLevel.put("wordsmap",objmap);//write the words in elastic search
ArrayTopicLevel.add(objTopicLevel);
}
objEngineLevel.put("engine",engine);
objEngineLevel.put("query",quer);
objEngineLevel.put("domain",domain);
objEngineLevel.put("iteration",iteration_counter);
objEngineLevel.put("TopicsWordMap", ArrayTopicLevel);
ArrayEngineLevel.add(objEngineLevel);
String id = domain+"/"+quer+"/"+engine+"/"+iteration_counter;//create unique id for the elasticsearch document
ids.add(id);//add to the ids list which contains the ids of the current round
List<String> elasticIndexes=ri.GetKeyFile(config_path, "elasticSearchIndexes");
IndexRequest indexReq=new IndexRequest(elasticIndexes.get(2),"content",id);
indexReq.source(objEngineLevel);
IndexResponse indexRes = client.index(indexReq).actionGet();
}
//node.close();
client.close();
ElasticGetWordList elasticGetwordList=new ElasticGetWordList();//get the wordlist from elastic search for the ids from the current round
wordList=elasticGetwordList.get(ids,config_path);
DataManipulation datamanipulation = new DataManipulation();
wordList=datamanipulation.clearListString(wordList);
System.out.println("i returned the wordlist to search analysis");
}
//get some stats regarding the entities, categories and parsed content from each link comparing it to the top words produced by lda
for(int j=0;j<links_total.length;j++){
if(links_total[j]!=null){
String urlString = links_total[j];
if(urlString.length()>199){
urlString=links_total[j].substring(0, 198);
}
int rank=-1;
int engine=-1;//0 for yahoo,1 for google,2 for bing
if(j<results_number){
rank=j;
engine=0;
}
else if(j<results_number*2){
rank=j-results_number;
engine=1;
}
else if(j<results_number*3){
rank=j-results_number*2;
engine=2;
}
LDAsemStats ldaSemStats = new LDAsemStats();//get the stats by comparing the top words produced by LDA and the parsed content
//check the LDAsemStats class for more
StringBuilder webstatsStmBuild = new StringBuilder();
if(!parseOutputList.isEmpty()){
if(!parseOutputList.get(j).equalsIgnoreCase("")&&!parseOutputList.get(j).equalsIgnoreCase("null")&&(parseOutputList.get(j).length()>0)){
ldaSemStats.getTopWordsStats(parseOutputList.get(j), lda_output, false);//without stemming
int top_words_lda = ldaSemStats.getTopStats();
double top_words_lda_per = ldaSemStats.getTopPercentageStats();
webstatsStmBuild.append("UPDATE SEMANTICSTATS SET ");
webstatsStmBuild.append("`top_words_lda`=? , ");
webstatsStmBuild.append("`top_words_lda_per`=? ");
webstatsStmBuild.append("WHERE `url`=? AND `query`=? AND `search_engine`=? AND `domain`=?");
try{
conn = DriverManager.getConnection(url,user,password);
stmt = conn.prepareStatement(webstatsStmBuild.toString());
stmt.setInt(1,top_words_lda);
stmt.setDouble(2,top_words_lda_per);
stmt.setString(3,urlString);
stmt.setString(4,quer);
stmt.setInt(5,engine);
stmt.setString(6,domain);
stmt.executeUpdate();
}
finally{
try {
if (stmt != null) stmt.close();
if (conn != null) conn.close();
} catch (SQLException ex) {
Logger.getLogger(Search_analysis.class.getName()).log(Level.SEVERE, null, ex);
}
}
ldaSemStats.getTopWordsStats(parseOutputList.get(j), lda_output, true);//with stemming
int top_words_lda_stem = ldaSemStats.getTopStats();
double top_words_lda_per_stem = ldaSemStats.getTopPercentageStats();
webstatsStmBuild = new StringBuilder();
webstatsStmBuild.append("UPDATE SEMANTICSTATS SET ");
webstatsStmBuild.append("`top_words_lda_stem`=? , ");
webstatsStmBuild.append("`top_words_lda_per_stem`=? ");
webstatsStmBuild.append("WHERE `url`=? AND `query`=? AND `search_engine`=? AND `domain`=?");
try{
conn = DriverManager.getConnection(url,user,password);
stmt = conn.prepareStatement(webstatsStmBuild.toString());
stmt.setInt(1,top_words_lda_stem);
stmt.setDouble(2,top_words_lda_per_stem);
stmt.setString(3,urlString);
stmt.setString(4,quer);
stmt.setInt(5,engine);
stmt.setString(6,domain);
stmt.executeUpdate();
}
finally{
try {
if (stmt != null) stmt.close();
if (conn != null) conn.close();
} catch (SQLException ex) {
Logger.getLogger(Search_analysis.class.getName()).log(Level.SEVERE, null, ex);
}
}
}
}
if(EntitiesMapDBP.get(j)!=null && CategoriesMapDBP.get(j) !=null){
//we are going to check if semantic entities and categories recognized exist in the lda words recognized as prominent
//we are going to use DBPEDIA spotligh and Dandelion named Entity Extraction API
//and stemming through Snowball Stemmer
ldaSemStats.getEntCatStats(EntitiesMapDBP.get(j), CategoriesMapDBP.get(j), lda_output, false);
int ent_cnt_dbpspot_lda = ldaSemStats.getEntStats();
int cat_cnt_dbpspot_lda = ldaSemStats.getCategoryStats();
webstatsStmBuild = new StringBuilder();
webstatsStmBuild.append("UPDATE SEMANTICSTATS SET ");
webstatsStmBuild.append("`ent_cnt_dbpspot_lda`=? , ");
webstatsStmBuild.append("`cat_cnt_dbpspot_lda`=? ");
webstatsStmBuild.append("WHERE `url`=? AND `query`=? AND `search_engine`=? AND `domain`=?");
try{
conn = DriverManager.getConnection(url,user,password);
stmt = conn.prepareStatement(webstatsStmBuild.toString());
stmt.setInt(1,ent_cnt_dbpspot_lda);
stmt.setInt(2,cat_cnt_dbpspot_lda);
stmt.setString(3,urlString);
stmt.setString(4,quer);
stmt.setInt(5,engine);
stmt.setString(6,domain);
stmt.executeUpdate();
}
finally{
try {
if (stmt != null) stmt.close();
if (conn != null) conn.close();
} catch (SQLException ex) {
Logger.getLogger(Search_analysis.class.getName()).log(Level.SEVERE, null, ex);
}
}
ldaSemStats.getEntCatStats( EntitiesMapDBP.get(j), CategoriesMapDBP.get(j), lda_output, true);
int ent_cnt_dbpspot_lda_stem = ldaSemStats.getEntStats();
int cat_cnt_dbpspot_lda_stem = ldaSemStats.getCategoryStats();
webstatsStmBuild = new StringBuilder();
webstatsStmBuild.append("UPDATE SEMANTICSTATS SET ");
webstatsStmBuild.append("`ent_cnt_dbpspot_lda_stem`=? , ");
webstatsStmBuild.append("`cat_cnt_dbpspot_lda_stem`=? ");
webstatsStmBuild.append("WHERE `url`=? AND `query`=? AND `search_engine`=? AND `domain`=?");
try{
conn = DriverManager.getConnection(url,user,password);
stmt = conn.prepareStatement(webstatsStmBuild.toString());
stmt.setInt(1,ent_cnt_dbpspot_lda_stem);
stmt.setInt(2,cat_cnt_dbpspot_lda_stem);
stmt.setString(3,urlString);
stmt.setString(4,quer);
stmt.setInt(5,engine);
stmt.setString(6,domain);
stmt.executeUpdate();
}
finally{
try {
if (stmt != null) stmt.close();
if (conn != null) conn.close();
} catch (SQLException ex) {
Logger.getLogger(Search_analysis.class.getName()).log(Level.SEVERE, null, ex);
}
}
}
if(EntitiesMapDand.get(j)!=null && CategoriesMapDand.get(j) !=null){
ldaSemStats.getEntCatStats(EntitiesMapDand.get(j), CategoriesMapDand.get(j), lda_output, false);
int ent_cnt_dand_lda = ldaSemStats.getEntStats();
int cat_cnt_dand_lda = ldaSemStats.getCategoryStats();
webstatsStmBuild = new StringBuilder();
webstatsStmBuild.append("UPDATE SEMANTICSTATS SET ");
webstatsStmBuild.append("`ent_cnt_dand_lda`=? , ");
webstatsStmBuild.append("`cat_cnt_dand_lda`=? ");
webstatsStmBuild.append("WHERE `url`=? AND `query`=? AND `search_engine`=? AND `domain`=?");
try{
conn = DriverManager.getConnection(url,user,password);
stmt = conn.prepareStatement(webstatsStmBuild.toString());
stmt.setInt(1,ent_cnt_dand_lda);
stmt.setInt(2,cat_cnt_dand_lda);
stmt.setString(3,urlString);
stmt.setString(4,quer);
stmt.setInt(5,engine);
stmt.setString(6,domain);
stmt.executeUpdate();
}
finally{
try {
if (stmt != null) stmt.close();
if (conn != null) conn.close();
} catch (SQLException ex) {
Logger.getLogger(Search_analysis.class.getName()).log(Level.SEVERE, null, ex);
}
}
ldaSemStats.getEntCatStats(EntitiesMapDand.get(j), CategoriesMapDand.get(j), lda_output, true);
int ent_cnt_dand_lda_stem = ldaSemStats.getEntStats();
int cat_cnt_dand_lda_stem = ldaSemStats.getCategoryStats();
webstatsStmBuild = new StringBuilder();
webstatsStmBuild.append("UPDATE SEMANTICSTATS SET ");
webstatsStmBuild.append("`ent_cnt_dand_lda_stem`=? , ");
webstatsStmBuild.append("`cat_cnt_dand_lda_stem`=? ");
webstatsStmBuild.append("WHERE `url`=? AND `query`=? AND `search_engine`=? AND `domain`=?");
try{
conn = DriverManager.getConnection(url,user,password);
stmt = conn.prepareStatement(webstatsStmBuild.toString());
stmt.setInt(1,ent_cnt_dand_lda_stem);
stmt.setInt(2,cat_cnt_dand_lda_stem);
stmt.setString(3,urlString);
stmt.setString(4,quer);
stmt.setInt(5,engine);
stmt.setString(6,domain);
stmt.executeUpdate();
}
finally{
try {
if (stmt != null) stmt.close();
if (conn != null) conn.close();
} catch (SQLException ex) {
Logger.getLogger(Search_analysis.class.getName()).log(Level.SEVERE, null, ex);
}
}
}
if(EntitiesMapYahoo.get(j)!=null && CategoriesMapYahoo.get(j) !=null){
//we are going to check if semantic entities and categories recognized exist in the lda words recognized as prominent
//we are going to use DBPEDIA spotligh and Dandelion named Entity Extraction API
//and stemming through Snowball Stemmer
ldaSemStats.getEntCatStats(EntitiesMapYahoo.get(j), CategoriesMapYahoo.get(j), lda_output, false);
int ent_cnt_y_lda = ldaSemStats.getEntStats();
int cat_cnt_y_lda = ldaSemStats.getCategoryStats();
webstatsStmBuild = new StringBuilder();
webstatsStmBuild.append("UPDATE SEMANTICSTATS SET ");
webstatsStmBuild.append("`ent_cnt_y_lda`=? , ");
webstatsStmBuild.append("`cat_cnt_y_lda`=? ");
webstatsStmBuild.append("WHERE `url`=? AND `query`=? AND `search_engine`=? AND `domain`=?");
try{
conn = DriverManager.getConnection(url,user,password);
stmt = conn.prepareStatement(webstatsStmBuild.toString());
stmt.setInt(1,ent_cnt_y_lda);
stmt.setInt(2,cat_cnt_y_lda);
stmt.setString(3,urlString);
stmt.setString(4,quer);
stmt.setInt(5,engine);
stmt.setString(6,domain);
stmt.executeUpdate();
}
finally{
try {
if (stmt != null) stmt.close();
if (conn != null) conn.close();
} catch (SQLException ex) {
Logger.getLogger(Search_analysis.class.getName()).log(Level.SEVERE, null, ex);
}
}
ldaSemStats.getEntCatStats( EntitiesMapYahoo.get(j), CategoriesMapYahoo.get(j), lda_output, true);
int ent_cnt_y_lda_stem = ldaSemStats.getEntStats();
int cat_cnt_y_lda_stem = ldaSemStats.getCategoryStats();
webstatsStmBuild = new StringBuilder();
webstatsStmBuild.append("UPDATE SEMANTICSTATS SET ");
webstatsStmBuild.append("`ent_cnt_y_lda_stem`=? , ");
webstatsStmBuild.append("`cat_cnt_y_lda_stem`=? ");
webstatsStmBuild.append("WHERE `url`=? AND `query`=? AND `search_engine`=? AND `domain`=?");
try{
conn = DriverManager.getConnection(url,user,password);
stmt = conn.prepareStatement(webstatsStmBuild.toString());
stmt.setInt(1,ent_cnt_y_lda_stem);
stmt.setInt(2,cat_cnt_y_lda_stem);
stmt.setString(3,urlString);
stmt.setString(4,quer);
stmt.setInt(5,engine);
stmt.setString(6,domain);
stmt.executeUpdate();
}
finally{
try {
if (stmt != null) stmt.close();
if (conn != null) conn.close();
} catch (SQLException ex) {
Logger.getLogger(Search_analysis.class.getName()).log(Level.SEVERE, null, ex);
}
}
}
}
}
return wordList;
}
catch (NullPointerException ex) {
Logger.getLogger(Search_analysis.class.getName()).log(Level.SEVERE, null, ex);
ArrayList<String> finalList = new ArrayList<>();
return finalList;
}
catch (SQLException | ElasticsearchException ex) {
Logger.getLogger(Search_analysis.class.getName()).log(Level.SEVERE, null, ex);
ArrayList<String> finalList = new ArrayList<>();
return finalList;
}
finally{
try {
if (stmt != null) stmt.close();
if (conn != null) conn.close();
} catch (SQLException ex) {
Logger.getLogger(Search_analysis.class.getName()).log(Level.SEVERE, null, ex);
}
}
}
}