/*
* To change this template, choose Tools | Templates
* and open the template in the editor.
*/
package edu.isi.pegasus.common.util;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* A common PegasusURL class to use by the planner and other components.
*
* The class parses the PegasusURL into 3 main components
* - protocol
* - hostname
* - path
*
* The class is consistent with the PegasusURL parsing scheme used in pegasus-transfer.
*
* @author Karan Vahi
* @author Mats Rynge
*/
public class PegasusURL {
/**
* The scheme name for file url.
*/
public static final String FILE_URL_SCHEME = "file:";
/**
* The scheme name for file url.
*/
public static final String SYMLINK_URL_SCHEME = "symlink:";
/**
* The scheme name for gsift url.
*/
public static final String GSIFTP_URL_SCHEME = "gsiftp:";
/**
* The default protocol if none is determined from the PegasusURL or path
*/
public static final String DEFAULT_PROTOCOL = "file";
/**
*
* Stores the regular expressions necessary to parse a PegasusURL into 3 components
* protocol, host and path
*/
private static final String mRegexExpression = "([\\w]+)://([\\w\\.\\-:@#]*)(/?[\\S]*)";
/**
* Stores compiled patterns at first use, quasi-Singleton.
*/
private static Pattern mPattern = null;
/**
* The protocol referred to by the PegasusURL
*/
private String mProtocol;
/**
* The hostname referred to by the PegasusURL.
* Can include the port also
*/
private String mHost;
/**
* The path referred to by the PegasusURL
*/
private String mPath;
/**
* The URL.
*/
private String mURL;
/**
* The default constructor.
*/
public PegasusURL(){
if( mPattern == null ){
mPattern = Pattern.compile( mRegexExpression );
}
reset();
}
/**
* The overloaded constructor.
*
* @param url the url to be parsed internally
*/
public PegasusURL( String url ){
this();
this.parse( url );
}
/**
* Parses the url and populates the internal member variables that can
* be accessed via the appropriate accessor methods
*
* @param url
*/
public void parse( String url ){
//reset internal variables
reset();
//special case for file url's
if( url.indexOf( ":" ) == -1 ){
url = PegasusURL.DEFAULT_PROTOCOL + "://" + url;
}
mURL = url;
Matcher m = mPattern.matcher( url );
if( m.matches() ){
mProtocol = m.group( 1 );
mHost = m.group( 2 );
mPath = m.group( 3 );
}
else{
throw new RuntimeException( "Unable to parse URL " + url );
}
}
/**
* Returns the protocol associated with the PegasusURL
*
* @return the protocol else empty
*/
public String getProtocol(){
return mProtocol;
}
/**
* Returns the host asscoiated with the PegasusURL
*
* @return the host else empty
*/
public String getHost(){
return mHost;
}
/**
* Returns the path associated with the PegasusURL
*
* @return the host else empty
*/
public String getPath(){
return mPath;
}
/**
* Returns the url prefix associated with the PegasusURL. The PegasusURL prefix is the part
* of the PegasusURL composed of protocol and the hostname
*
* For example PegasusURL prefix for
* <pre>
* gsiftp://dataserver.phys.uwm.edu/~/griphyn_test/ligodemo_output
* </pre>
*
* is gsiftp://dataserver.phys.uwm.edu
*
* @return the host else empty
*/
public String getURLPrefix(){
StringBuffer prefix = new StringBuffer();
prefix.append( this.getProtocol() ).
append( "://" ).
append( this.getHost() );
return prefix.toString();
}
/**
* Returns the full URL denoted by this object
*
*
* @return
*/
public String getURL(){
return mURL;
}
/**
* Resets the internal member variables
*/
public void reset() {
mProtocol = "";
mHost = "";
mPath = "";
mURL = "";
}
/**
* The contents represented as a string
*
* @return
*/
public String toString(){
StringBuffer sb = new StringBuffer();
sb.append( "url -> " ).append( this.getURL() ).append( " , " ).
append( "protocol -> " ).append( this.getProtocol() ).append( " , " ).
append( "host -> " ).append( this.getHost() ).append( " , " ).
append( "path -> " ).append( this.getPath() ).append( " , " ).
append( "url-prefix -> ").append( this.getURLPrefix() );
return sb.toString();
}
/**
* Test program
*
* @param args
*/
public static void main( String[] args ){
//should print
//protocol -> gsiftp , host -> sukhna.isi.edu , path -> /tmp/test.file , url-prefix -> gsiftp://sukhna.isi.edu
String url = "gsiftp://sukhna.isi.edu/tmp/test.file";
System.out.println( url );
System.out.println( new PegasusURL(url) );
//should print
//protocol -> file , host -> , path -> /tmp/test/k , url-prefix -> file://
url = "file:///tmp/test/k";
System.out.println( url );
System.out.println( new PegasusURL(url) );
//should print
//protocol -> gsiftp , host -> dataserver.phys.uwm.edu , path -> /~/griphyn_test/ligodemo_output/ , url-prefix -> gsiftp://dataserver.phys.uwm.edu
url = "gsiftp://dataserver.phys.uwm.edu/~/griphyn_test/ligodemo_output/" ;
System.out.println( url );
System.out.println( new PegasusURL(url) );
//should print
//protocol -> file , host -> , path -> /tmp/path/to/input/file , url-prefix -> file://
url = "/tmp/path/to/input/file" ;
System.out.println( url );
System.out.println( new PegasusURL(url) );
url = "http://isis.isi.edu/" ;
System.out.println( url );
System.out.println( new PegasusURL(url) );
url = "http://isis.isi.edu/filename" ;
System.out.println( url );
System.out.println( new PegasusURL(url) );
url = "http://isis.isi.edu/directory/filename" ;
System.out.println( url );
System.out.println( new PegasusURL(url) );
}
}