/* * This file is part of the Wayback archival access software * (http://archive-access.sourceforge.net/projects/wayback/). * * Licensed to the Internet Archive (IA) by one or more individual * contributors. * * The IA licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.archive.wayback.resourcestore.locationdb; import java.io.BufferedReader; import java.io.File; import java.io.FileFilter; import java.io.IOException; import java.io.InputStreamReader; import java.util.Arrays; import java.util.Iterator; import java.util.logging.Logger; import org.apache.commons.httpclient.HttpClient; import org.apache.commons.httpclient.HttpStatus; import org.apache.commons.httpclient.NameValuePair; import org.apache.commons.httpclient.methods.GetMethod; import org.apache.commons.httpclient.methods.PostMethod; import org.apache.commons.httpclient.util.ParameterFormatter; import org.archive.util.iterator.CloseableIterator; import org.archive.wayback.util.ByteOp; import org.archive.wayback.util.WrappedCloseableIterator; /** * * * @author brad * @version $Date$, $Revision$ */ public class RemoteResourceFileLocationDB implements ResourceFileLocationDB { private static final Logger LOGGER = Logger.getLogger(RemoteResourceFileLocationDB .class.getName()); private final static String ARC_SUFFIX = ".arc"; private final static String ARC_GZ_SUFFIX = ".arc.gz"; private final static String WARC_SUFFIX = ".warc"; private final static String WARC_GZ_SUFFIX = ".warc.gz"; private final static String OK_RESPONSE_PREFIX = "OK "; private HttpClient client = null; private String serverUrl = null; /** * @param serverUrl */ public RemoteResourceFileLocationDB(final String serverUrl) { super(); this.serverUrl = serverUrl; this.client = new HttpClient(); } /** * @return long value representing the current end "mark" of the db log * @throws IOException */ public long getCurrentMark() throws IOException { NameValuePair[] args = { new NameValuePair( ResourceFileLocationDBServlet.OPERATION_ARGUMENT, ResourceFileLocationDBServlet.GETMARK_OPERATION), }; return Long.parseLong(doGetMethod(args)); } /** * @param start * @param end * @return Iterator of file names between marks start and end * @throws IOException */ public CloseableIterator<String> getNamesBetweenMarks(long start, long end) throws IOException { NameValuePair[] args = { new NameValuePair( ResourceFileLocationDBServlet.OPERATION_ARGUMENT, ResourceFileLocationDBServlet.GETRANGE_OPERATION), new NameValuePair( ResourceFileLocationDBServlet.START_ARGUMENT, String.valueOf(start)), new NameValuePair( ResourceFileLocationDBServlet.END_ARGUMENT, String.valueOf(end)) }; return new WrappedCloseableIterator<String>( Arrays.asList(doGetMethod(args).split("\n")).iterator()); } /** * return an array of String URLs for all known locations of the file * in the DB. * @param name * @return String[] of URLs to arcName * @throws IOException */ public String[] nameToUrls(final String name) throws IOException { NameValuePair[] args = { new NameValuePair( ResourceFileLocationDBServlet.OPERATION_ARGUMENT, ResourceFileLocationDBServlet.LOOKUP_OPERATION), new NameValuePair( ResourceFileLocationDBServlet.NAME_ARGUMENT, name) }; String locations = doGetMethod(args); if(locations != null) { return locations.split("\n"); } return null; } /** * add an Url location for an arcName, unless it already exists * @param name * @param url * @throws IOException */ public void addNameUrl(final String name, final String url) throws IOException { doPostMethod(ResourceFileLocationDBServlet.ADD_OPERATION, name, url); } /** * remove a single url location for a name, if it exists * @param name * @param url * @throws IOException */ public void removeNameUrl(final String name, final String url) throws IOException { doPostMethod(ResourceFileLocationDBServlet.REMOVE_OPERATION, name, url); } private String doGetMethod(NameValuePair[] data) throws IOException { ParameterFormatter formatter = new ParameterFormatter(); formatter.setAlwaysUseQuotes(false); StringBuilder finalUrl = new StringBuilder(serverUrl); if(data.length > 0) { finalUrl.append("?"); } for(int i = 0; i < data.length; i++) { if(i == 0) { finalUrl.append("?"); } else { finalUrl.append("&"); } finalUrl.append(formatter.format(data[i])); } GetMethod method = new GetMethod(finalUrl.toString()); int statusCode = client.executeMethod(method); if (statusCode != HttpStatus.SC_OK) { throw new IOException("Method failed: " + method.getStatusLine()); } String responseString = method.getResponseBodyAsString(); if(!responseString.startsWith(OK_RESPONSE_PREFIX)) { if(responseString.startsWith(ResourceFileLocationDBServlet.NO_LOCATION_PREFIX)) { return null; } throw new IOException(responseString); } return responseString.substring(OK_RESPONSE_PREFIX.length()+1); } private void doPostMethod(final String operation, final String arcName, final String arcUrl) throws IOException { PostMethod method = new PostMethod(serverUrl); NameValuePair[] data = { new NameValuePair(ResourceFileLocationDBServlet.OPERATION_ARGUMENT, operation), new NameValuePair(ResourceFileLocationDBServlet.NAME_ARGUMENT, arcName), new NameValuePair(ResourceFileLocationDBServlet.URL_ARGUMENT, arcUrl) }; method.setRequestBody(data); int statusCode = client.executeMethod(method); if (statusCode != HttpStatus.SC_OK) { throw new IOException("Method failed: " + method.getStatusLine()); } String responseString = method.getResponseBodyAsString(); if(!responseString.startsWith(OK_RESPONSE_PREFIX)) { throw new IOException(responseString); } } /* (non-Javadoc) * @see org.archive.wayback.resourcestore.locationdb.ResourceFileLocationDB#shutdown() */ public void shutdown() throws IOException { // NO-OP } private static void USAGE(String message) { System.err.print("USAGE: " + message + "\n" + "\t[lookup|add|remove|sync] ...\n" + "\n" + "\t lookup LOCATION-DB-URL ARC\n" + "\t\temit all known URLs for arc ARC\n" + "\n" + "\t add LOCATION-DB-URL ARC URL\n" + "\t\tinform locationDB that ARC is located at URL\n" + "\n" + "\t remove LOCATION-DB-URL ARC URL\n" + "\t\tremove reference to ARC at URL in locationDB\n" + "\n" + "\t sync LOCATION-DB-URL DIR DIR-URL\n" + "\t\tscan directory DIR, and submit all ARC files therein\n" + "\t\tto locationDB at url DIR-URL/ARC\n" + "\n" + "\t get-mark LOCATION-DB-URL\n" + "\t\temit an identifier for the current marker in the \n" + "\t\tlocationDB log. These identifiers can be used with the\n" + "\t\tmark-range operation.\n" + "\n" + "\t mark-range LOCATION-DB-URL START END\n" + "\t\temit to STDOUT one line with the name of all ARC files\n" + "\t\tadded to the locationDB between marks START and END\n" + "\n" + "\t add-stream LOCATION-DB-URL\n" + "\t\tread lines from STDIN formatted like:\n" + "\t\t\tNAME<SPACE>URL\n" + "\t\tand for each line, inform locationDB that file NAME is\n" + "\t\tlocated at URL\n" ); System.exit(2); } /** * @param args */ public static void main(String[] args) { if(args.length < 2) { USAGE(""); System.exit(1); } String operation = args[0]; String dbUrl = args[1]; if(!dbUrl.startsWith("http://")) { USAGE("URL argument 1 must begin with http://"); } RemoteResourceFileLocationDB locationClient = new RemoteResourceFileLocationDB(dbUrl); if(operation.equalsIgnoreCase("add-stream")) { BufferedReader r = new BufferedReader( new InputStreamReader(System.in,ByteOp.UTF8)); String line; try { while((line = r.readLine()) != null) { String parts[] = line.split(" "); if(parts.length != 2) { System.err.println("Bad input(" + line + ")"); System.exit(2); } locationClient.addNameUrl(parts[0],parts[1]); System.out.println("Added\t" + parts[0] + "\t" + parts[1]); } } catch (IOException e) { e.printStackTrace(); System.exit(1); } } else { if(args.length < 3) { USAGE(""); System.exit(1); } String name = args[2]; if(operation.equalsIgnoreCase("lookup")) { if(args.length < 3) { USAGE("lookup LOCATION-URL ARC"); } try { String[] locations = locationClient.nameToUrls(name); if(locations == null) { System.err.println("No locations for " + name); System.exit(1); } for(int i=0; i <locations.length; i++) { System.out.println(locations[i]); } } catch (IOException e) { System.err.println(e.getMessage()); System.exit(1); } } else if(operation.equalsIgnoreCase("get-mark")) { if(args.length != 2) { USAGE("get-mark LOCATION-URL"); } try { long mark = locationClient.getCurrentMark(); System.out.println(mark); } catch (IOException e) { System.err.println(e.getMessage()); System.exit(1); } } else if(operation.equalsIgnoreCase("mark-range")) { if(args.length != 4) { USAGE("mark-range LOCATION-URL START END"); } long start = Long.parseLong(args[3]); long end = Long.parseLong(args[4]); try { Iterator<String> it = locationClient.getNamesBetweenMarks(start,end); while(it.hasNext()) { String next = (String) it.next(); System.out.println(next); } } catch (IOException e) { System.err.println(e.getMessage()); System.exit(1); } } else if(operation.equalsIgnoreCase("add")) { if(args.length != 4) { USAGE("add LOCATION-URL ARC ARC-URL"); } String url = args[3]; if(!url.startsWith("http://")) { USAGE("ARC-URL argument 4 must begin with http://"); } try { locationClient.addNameUrl(name,url); System.out.println("OK"); } catch (IOException e) { System.err.println(e.getMessage()); System.exit(1); } } else if(operation.equalsIgnoreCase("remove")) { if(args.length != 4) { USAGE("remove LOCATION-URL FILE-NAME FILE-URL"); } String url = args[3]; if(!url.startsWith("http://")) { USAGE("URL argument 4 must begin with http://"); } try { locationClient.removeNameUrl(name,url); System.out.println("OK"); } catch (IOException e) { System.err.println(e.getMessage()); System.exit(1); } } else if(operation.equalsIgnoreCase("sync")) { if(args.length != 4) { USAGE("sync LOCATION-URL DIR DIR-URL"); } File dir = new File(name); String dirUrl = args[3]; if(!dirUrl.startsWith("http://")) { USAGE("DIR-URL argument 4 must begin with http://"); } try { if(!dir.isDirectory()) { USAGE("DIR " + name + " is not a directory"); } FileFilter filter = new FileFilter() { public boolean accept(File daFile) { return daFile.isFile() && (daFile.getName().endsWith(ARC_SUFFIX) || daFile.getName().endsWith(ARC_GZ_SUFFIX) || daFile.getName().endsWith(WARC_SUFFIX) || daFile.getName().endsWith(WARC_GZ_SUFFIX)); } }; File[] files = dir.listFiles(filter); if(files == null) { throw new IOException("Directory " + dir.getAbsolutePath() + " is not a directory or had an IO error"); } for(int i = 0; i < files.length; i++) { File file = files[i]; String fileName = file.getName(); String fileUrl = dirUrl + fileName; LOGGER.info("Adding location " + fileUrl + " for file " + fileName); locationClient.addNameUrl(fileName,fileUrl); } } catch (IOException e) { System.err.println(e.getMessage()); System.exit(1); } } else { USAGE(" unknown operation " + operation); } } } }