/*
org.org.lib.repository.crawler is a java library/OSGI Bundle
Providing Crawling capabilities for Maven 2 HTTP exposed repositories
Copyright (C) 2007 Pierre-Antoine Grégoire
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
package org.org.repository.crawler.maven2.model.protocolplugins;
import java.io.BufferedReader;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.InetSocketAddress;
import java.net.MalformedURLException;
import java.net.Proxy;
import java.net.URL;
import java.util.Queue;
import java.util.concurrent.LinkedBlockingQueue;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.org.repository.crawler.RepositoryCrawlingException;
import org.org.repository.crawler.items.IHttpCrawledRepositorySetup;
import org.org.repository.crawler.items.IPatternSet;
import org.org.repository.crawler.items.mutable.PatternSet;
import org.org.repository.crawler.mapping.Entry;
import org.org.repository.crawler.mapping.Entry.MavenType;
import org.org.repository.crawler.mapping.Entry.RawType;
import org.org.repository.crawler.maven2.RepositoryCrawlerService;
import org.org.repository.crawler.maven2.model.ArtifactVersion;
/**
* @author pagregoire
*/
public class HttpRepositoryBrowserPlugin implements IRepositoryBrowserPlugin<IHttpCrawledRepositorySetup> {
public final static IPatternSet TOMCAT6_PATTERNSET;
public final static IPatternSet APACHE2_PATTERNSET;
public final static IPatternSet ARTIFACTORY_PATTERNSET;
public final static IPatternSet S3BROWSE_PATTERNSET;
static {
PatternSet tmpTomcat6PatternSet = new PatternSet();
tmpTomcat6PatternSet.setLabel("Tomcat 6 parsing patterns");
tmpTomcat6PatternSet.setEntryPattern(".*<a href=\".+\"><.*>(.+)<.*></a>.*");
tmpTomcat6PatternSet.setParentDirectoryPattern(".*<a href=\".+\"><.*>Up To.*<.*></a>.*");
tmpTomcat6PatternSet.setFileEntryPattern(".*<a href=\".*\"><.*>(.+)<.*></a>.*");
tmpTomcat6PatternSet.setDirectoryEntryPattern(".*<a href=\".*\"><.*>(.+)/<.*></a>.*");
PatternSet tmpApache2PatternSet = new PatternSet();
tmpApache2PatternSet.setLabel("Apache 2 parsing patterns");
tmpApache2PatternSet.setEntryPattern(".*<a href=\".+\">(.+)</a>.*");
tmpApache2PatternSet.setParentDirectoryPattern(".*<a href=\".+\">Parent Directory</a>.*");
tmpApache2PatternSet.setFileEntryPattern(".*<a href=\".*\">(.+)</a>.*");
tmpApache2PatternSet.setDirectoryEntryPattern(".*<a href=\".*\">(.+)/</a>.*");
PatternSet tmpArtifactoryPatternSet = new PatternSet();
tmpArtifactoryPatternSet.setLabel("Artifactory parsing patterns");
tmpArtifactoryPatternSet.setEntryPattern("[^>]*<a href=\"[^#?]+\">(.+)</a>[^&]*");
tmpArtifactoryPatternSet.setParentDirectoryPattern(".*<a href=\"[^#]+\">..</a>.*");
tmpArtifactoryPatternSet.setFileEntryPattern(".*<a href=\"[^#]*\">(.+)</a>.*");
tmpArtifactoryPatternSet.setDirectoryEntryPattern(".*<a href=\"[^#]*/\">(.+)</a>.*");
PatternSet tmpS3BrowsePatternSet = new PatternSet();
tmpS3BrowsePatternSet.setLabel("S3 Browse parsing patterns");
tmpS3BrowsePatternSet.setEntryPattern("[^>]*<(p|td)><a href=\"[^#?]+\">(.+)</a>.*");
tmpS3BrowsePatternSet.setParentDirectoryPattern("[^>]*<p><a href=\"[^#]+\">ROOT</a>.*");
tmpS3BrowsePatternSet.setFileEntryPattern("[^>]*<td><a href=\"[^#]*\">(.+)</a>.*");
tmpS3BrowsePatternSet.setDirectoryEntryPattern("[^>]*<td><a href=\"[^#]*/\">(.+)</a>.*");
TOMCAT6_PATTERNSET = tmpTomcat6PatternSet.getImmutable();
APACHE2_PATTERNSET = tmpApache2PatternSet.getImmutable();
ARTIFACTORY_PATTERNSET = tmpArtifactoryPatternSet.getImmutable();
S3BROWSE_PATTERNSET = tmpS3BrowsePatternSet.getImmutable();
}
private IPatternSet patternSet = APACHE2_PATTERNSET;
private Pattern parentPatternCompiled = Pattern.compile(patternSet.getParentDirectoryPattern(), Pattern.CASE_INSENSITIVE);
private Pattern entryPatternCompiled = Pattern.compile(patternSet.getEntryPattern(), Pattern.CASE_INSENSITIVE);
private Pattern directoryPatternCompiled = Pattern.compile(patternSet.getDirectoryEntryPattern(), Pattern.CASE_INSENSITIVE);
private Pattern filePatternCompiled = Pattern.compile(patternSet.getFileEntryPattern(), Pattern.CASE_INSENSITIVE);
public static final int DIRECTORY_PATTERN_GROUP_INDEX = 1;
public static final int FILE_PATTERN_GROUP_INDEX = 1;
private Proxy proxy;
public IPatternSet getPatternSet() {
return patternSet;
}
public Pattern getParentPatternCompiled() {
return parentPatternCompiled;
}
public Pattern getEntryPatternCompiled() {
return entryPatternCompiled;
}
public Pattern getDirectoryPatternCompiled() {
return directoryPatternCompiled;
}
public Pattern getFilePatternCompiled() {
return filePatternCompiled;
}
public Proxy getProxy() {
return proxy;
}
/**
* init this class with the repository info
*
* @param httpBrowsedRepository
*/
public void init(IHttpCrawledRepositorySetup repositorySetup) {
if (repositorySetup.getProxyHost() != null) {
proxy = new Proxy(Proxy.Type.HTTP, new InetSocketAddress(repositorySetup.getProxyHost(), repositorySetup.getProxyPort()));
} else {
proxy = Proxy.NO_PROXY;
}
setPatternSet(repositorySetup.getPatternSet());
}
/**
* Checks the repository's validity
*
* @param repositorySetup
* @return
*/
public void checkRepositorySetup(IHttpCrawledRepositorySetup repositorySetup) {
if (repositorySetup.getBaseUrl() == null) {
throw new RepositoryCrawlingException("An url should be defined for the repository.");
}
}
/**
* @param requestedUrl
* @return
* @throws IOException
*/
public Queue<Entry> getEntryList(String requestedUrl) throws IOException {
Queue<Entry> entryList = new LinkedBlockingQueue<Entry>();
BufferedReader response = null;
StringBuilder buffer = new StringBuilder();
try {
response = open(requestedUrl);
String line = null;
while ((line = response.readLine()) != null) {
buffer.append(line + "\n");
Entry entry = new Entry();
entry.setValue(line);
Matcher entryMatcher = entryPatternCompiled.matcher(entry.getValue());
Matcher parentMatcher = parentPatternCompiled.matcher(entry.getValue());
boolean isEntry = entryMatcher.matches();
boolean isParent = parentMatcher.matches();
if (isEntry && !isParent) {
Matcher directoryMatcher = directoryPatternCompiled.matcher(entry.getValue());
Matcher fileMatcher = filePatternCompiled.matcher(entry.getValue());
if (directoryMatcher.matches()) {
entry.setRawType(RawType.DIRECTORY);
}
if (entry.isRawType(RawType.DIRECTORY)) {
entry.setResolvedName(directoryMatcher.group(DIRECTORY_PATTERN_GROUP_INDEX));
if (RepositoryCrawlerService.ARTIFACT_VERSION_FOLDER_PATTERN.matcher(entry.getResolvedName()).matches()) {
entry.setMavenType(MavenType.ARTIFACT_VERSION_FOLDER);
}
} else {
fileMatcher.matches();
entry.setResolvedName(fileMatcher.group(FILE_PATTERN_GROUP_INDEX));
if (fileMatcher.matches()) {
entry.setRawType(RawType.FILE);
}
if (entry.isRawType(RawType.FILE)) {
if (entry.getResolvedName().startsWith("maven-metadata")) {
entry.setMavenType(MavenType.METADATA_FILE);
}
}
}
entryList.add(entry);
}
}
} catch (FileNotFoundException e) {
if (response != null) {
close(response);
}
}
return entryList;
}
private BufferedReader open(String url) throws IOException {
if (!url.endsWith("/")) {
url = url + "/";
}
InputStream inputStream = null;
if (proxy == null) {
inputStream = (new URL(url)).openConnection().getInputStream();
} else {
inputStream = (new URL(url)).openConnection(proxy).getInputStream();
}
return new BufferedReader(new InputStreamReader(inputStream));
}
private void close(BufferedReader in) {
if (in != null) {
try {
in.close();
} catch (IOException e1) {
// ignore.
}
}
}
/**
* @param repositorySetup
* @param upperGroupName
* @param folderName
* @return
*/
public String buildUrl(IHttpCrawledRepositorySetup repositorySetup, String upperGroupName, String folderName) {
StringBuffer buffer = new StringBuffer(repositorySetup.getBaseUrl());
buffer.append((repositorySetup.getBaseUrl().endsWith("/") ? "" : "/"));
// if (!upperGroupName.equals("")) {
// buffer.append(upperGroupName.replace('.', '/'));
// }
if (!upperGroupName.equals("")) {
buffer.append(upperGroupName);
}
if (!folderName.equals("")) {
buffer.append("/" + (folderName.endsWith("/") ? folderName : folderName + "/"));
}
return buffer.toString();
}
/**
* @param versionFolder
* @param version
* @return
*/
public String cleanFolderName(String folderName) {
return folderName;
}
private void setPatternSet(IPatternSet patternSet) {
this.patternSet = patternSet;
entryPatternCompiled = Pattern.compile(patternSet.getEntryPattern());
directoryPatternCompiled = Pattern.compile(patternSet.getDirectoryEntryPattern());
filePatternCompiled = Pattern.compile(patternSet.getFileEntryPattern());
parentPatternCompiled = Pattern.compile(patternSet.getParentDirectoryPattern());
}
/**
* @see org.org.repository.crawler.maven2.RepositoryCrawlerService#setUrlToArtifactVersion(org.org.maven2.crawler.items.ArtifactVersion)
*/
public void setUrlForArtifactVersion(ArtifactVersion artifactVersion, String requestedUrl, String version) throws MalformedURLException {
artifactVersion.setUrl(new URL((requestedUrl.endsWith("/") ? requestedUrl : (requestedUrl + "/")) + version + "/" + artifactVersion.getId()));
}
}