/* * This file is part of the Wayback archival access software * (http://archive-access.sourceforge.net/projects/wayback/). * * Licensed to the Internet Archive (IA) by one or more individual * contributors. * * The IA licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.archive.wayback.requestparser; import java.util.Map; import java.util.regex.Pattern; import java.net.URLDecoder; import java.io.UnsupportedEncodingException; import javax.servlet.http.HttpServletRequest; import org.archive.wayback.core.WaybackRequest; import org.archive.wayback.exception.BadQueryException; import org.archive.wayback.webapp.AccessPoint; /** * RequestParser which attempts to extract data from an HTML form, that is, from * HTTP GET request arguments containing a query, an optional count (results * per page), and an optional current page argument. All other reqeust fields * are expected to be encoded within the query ("q") field. * * @author brad */ public class OpenSearchRequestParser extends WrappedRequestParser { /** * @param wrapped the BaseRequestParser being wrapped */ public OpenSearchRequestParser(BaseRequestParser wrapped) { super(wrapped); } /** * CGI argument name for query arguments */ public final static String SEARCH_QUERY = "q"; /** * CGI argument name for number of results per page, 1 based */ public final static String SEARCH_RESULTS = "count"; /** * CGI argument name for page number of results, 1 based */ public final static String START_PAGE = "start_page"; // private final static String START_INDEX = "start_index"; private final static Pattern WHITESPACE_PATTERN = Pattern.compile("\\s+"); // singles consume the next non-whitespace token following the term // private String[] singleTokens = { "url", "site", "mimetype", "noredirect" }; // lines consume the entire rest of the query private String[] lineTokens = { "terms" }; /* * If the request includes a 'q' (query) argument, treat the request * as an OpenSearch query, and extract all query terms, plus pagination * info from the httpRequest object. */ public WaybackRequest parse(HttpServletRequest httpRequest, AccessPoint wbContext) throws BadQueryException { WaybackRequest wbRequest = null; @SuppressWarnings("unchecked") Map<String,String[]> queryMap = httpRequest.getParameterMap(); String query = AccessPoint.getMapParam(queryMap, SEARCH_QUERY); if (query == null) { return null; } wbRequest = new WaybackRequest(); String base = wbContext.translateRequestPath(httpRequest); if (base.startsWith(REPLAY_BASE)) { wbRequest.setReplayRequest(); } else if(base.startsWith(QUERY_BASE)){ wbRequest.setCaptureQueryRequest(); } else if(base.startsWith(XQUERY_BASE)){ wbRequest.setCaptureQueryRequest(); wbRequest.setXMLMode(true); } else { return null; } String numResults = AccessPoint.getMapParam(queryMap, SEARCH_RESULTS); String startPage = AccessPoint.getMapParam(queryMap, START_PAGE); if (numResults != null) { int nr = Integer.parseInt(numResults); wbRequest.setResultsPerPage(nr); } else { wbRequest.setResultsPerPage(getMaxRecords()); } if (startPage != null) { int sp = Integer.parseInt(startPage); wbRequest.setPageNum(sp); } else { wbRequest.setPageNum(1); } // first try the entire line_tokens: for (int i = 0; i < lineTokens.length; i++) { String token = lineTokens[i] + ":"; int index = query.indexOf(token); if (index > -1) { // found it, take value as the remainder of the query String value = query.substring(index + token.length()); // TODO: trim trailing whitespace? wbRequest.put(lineTokens[i], value); query = query.substring(0, index); } } // now split whatever is left on whitespace: String[] parts = WHITESPACE_PATTERN.split(query); for (int i = 0; i < parts.length; i++) { String token = parts[i]; int colonIndex = token.indexOf(":"); if (colonIndex == -1) { throw new BadQueryException("Bad search token(" + token + ")"); } try { String key = URLDecoder.decode(token.substring(0, colonIndex), "UTF-8"); String value = URLDecoder.decode( token.substring(colonIndex + 1), "UTF-8"); // TODO: make sure key is in singleTokens? // let's just let em all thru for now: wbRequest.put(key, value); } catch (UnsupportedEncodingException e) { throw new BadQueryException("Unsupported encoding: UTF-8"); } } if (wbRequest.getStartTimestamp() == null) { wbRequest.setStartTimestamp(getEarliestTimestamp()); } if (wbRequest.getEndTimestamp() == null) { wbRequest.setEndTimestamp(getLatestTimestamp()); } return wbRequest; } }