/** * Mad-Advertisement * Copyright (C) 2011 Thorsten Marx <thmarx@gmx.net> * * This program is free software: you can redistribute it and/or modify it under * the terms of the GNU General Public License as published by the Free Software * Foundation, either version 3 of the License, or (at your option) any later * version. * * This program is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more * details. * * You should have received a copy of the GNU General Public License along with * this program. If not, see <http://www.gnu.org/licenses/>. */ package net.mad.ads.server.utils.http; import java.io.IOException; import java.io.StringReader; import java.io.UnsupportedEncodingException; import java.net.MalformedURLException; import java.net.URL; import java.net.URLDecoder; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Set; import javax.servlet.http.HttpServletRequest; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.de.GermanAnalyzer; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.util.Version; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import net.mad.ads.common.util.Strings; import net.mad.ads.server.utils.helper.EncodeHelper; import net.mad.ads.server.utils.request.RequestHelper; public class KeywordUtils { private static final Logger logger = LoggerFactory.getLogger(KeywordUtils.class); private static final Map<String, String> seParams = new HashMap<String, String>(); static { seParams.put("google", "q"); seParams.put("bing", "q"); seParams.put("yahoo", "p"); seParams.put("localhost", "q"); } public static List<String> getKeywords (HttpServletRequest request) { String query = null; if (request.getParameter(RequestHelper.keywords) != null) { query = request.getParameter(RequestHelper.keywords); } else if (request.getParameter(RequestHelper.referrer) != null) { String referrer = request.getParameter(RequestHelper.referrer); query = getSearchEngineQueryString(request, referrer); } else { query = getSearchEngineQueryString(request); } if (Strings.isEmpty(query)) { return null; } query = decodeUrlString(query); return getTokens(query); } public static String getQueryStringParameter( String queryString, String parameterName) { String[] parametersArray = queryString.split("&"); for (String parameter : parametersArray) { if (parameter.contains(parameterName + "=")) { return parameter.split("=")[1]; } } return null; } public static String decodeUrlString (String urlString) { return EncodeHelper.decodeURIComponent(urlString); } public static List<String> getTokens (String queryString) { try { GermanAnalyzer a = new GermanAnalyzer(Version.LUCENE_33); TokenStream ts = a.tokenStream("", new StringReader(queryString)); List<String> tokens = new ArrayList<String>(); CharTermAttribute termAtt = ts.getAttribute(CharTermAttribute.class); ts.reset(); while (ts.incrementToken()) { String token = termAtt.toString(); tokens.add(token); } ts.end(); ts.close(); return tokens; } catch (IOException e) { logger.error("", e); } return null; } public static String getSearchEngineQueryString (HttpServletRequest request) { String referrer = request.getHeader("Referer"); return getSearchEngineQueryString(request, referrer); } public static String getSearchEngineQueryString ( HttpServletRequest request, String referrer) { String queryString = null; String hostName = null; if (referrer != null) { //Validate that Referer header value is a correct URL, a //MalformedURLException is thrown if not URL refererURL; try { refererURL = new URL(referrer); } catch (MalformedURLException e) { return null; } hostName = refererURL.getHost(); queryString = refererURL.getQuery(); if (Strings.isEmpty(queryString)) { return null; } Set<String> keys = seParams.keySet(); for (String se : keys) { if (hostName.toLowerCase().contains(se)) { queryString = getQueryStringParameter(queryString, seParams.get(se)); } } // if (hostName.toLowerCase().contains("google")) { // // queryString = getQueryStringParameter(queryString, "q"); // // } else if (hostName.toLowerCase().contains("yahoo")) { // // queryString = getQueryStringParameter(queryString, "p"); // // } else if (hostName.toLowerCase().contains("bing")) { // // queryString = getQueryStringParameter(queryString, "q"); // // } else { // // //Search engine not recognized // return null; // } return queryString; } //No Referer header found return null; } }