/******************************************************************************* * Copyright (c) 2004 - 2007 * Thomas Hallgren, Kenneth Olwing, Mitch Sonies * Pontus Rydin, Nils Unden, Peer Torngren * The code, documentation and other materials contained herein have been * licensed under the Eclipse Public License - v 1.0 by the individual * copyright holders listed above, as Initial Contributors under such license. * The text of such license is available at www.eclipse.org. *******************************************************************************/ package org.eclipse.buckminster.core.reader; import java.io.File; import java.io.FileNotFoundException; import java.io.IOException; import java.net.MalformedURLException; import java.net.URI; import java.net.URISyntaxException; import java.net.URL; import java.util.ArrayList; import java.util.Date; import java.util.List; import java.util.Map; import java.util.Scanner; import java.util.regex.MatchResult; import java.util.regex.Matcher; import java.util.regex.Pattern; import javax.xml.parsers.DocumentBuilder; import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.parsers.ParserConfigurationException; import org.eclipse.buckminster.core.CorePlugin; import org.eclipse.buckminster.core.RMContext; import org.eclipse.buckminster.core.cspec.model.ComponentRequest; import org.eclipse.buckminster.core.ctype.IComponentType; import org.eclipse.buckminster.core.helpers.AccessibleByteArrayOutputStream; import org.eclipse.buckminster.core.helpers.FileUtils; import org.eclipse.buckminster.core.metadata.model.Resolution; import org.eclipse.buckminster.core.query.builder.ComponentQueryBuilder; import org.eclipse.buckminster.core.resolver.NodeQuery; import org.eclipse.buckminster.core.resolver.ResolutionContext; import org.eclipse.buckminster.core.rmap.model.Provider; import org.eclipse.buckminster.core.rmap.model.ProviderScore; import org.eclipse.buckminster.core.version.ProviderMatch; import org.eclipse.buckminster.core.version.VersionMatch; import org.eclipse.buckminster.download.DownloadManager; import org.eclipse.buckminster.runtime.BuckminsterException; import org.eclipse.buckminster.runtime.MonitorUtils; import org.eclipse.buckminster.runtime.Trivial; import org.eclipse.buckminster.runtime.URLUtils; import org.eclipse.core.resources.IResource; import org.eclipse.core.resources.IResourceVisitor; import org.eclipse.core.resources.IWorkspaceRoot; import org.eclipse.core.resources.ResourcesPlugin; import org.eclipse.core.runtime.CoreException; import org.eclipse.core.runtime.IPath; import org.eclipse.core.runtime.IProgressMonitor; import org.eclipse.core.runtime.Path; import org.eclipse.ecf.core.security.IConnectContext; import org.w3c.dom.Document; import org.w3c.dom.Element; import org.w3c.dom.Node; import org.xml.sax.ErrorHandler; import org.xml.sax.InputSource; import org.xml.sax.SAXException; import org.xml.sax.SAXParseException; /** * @author Thomas Hallgren */ public class URLCatalogReaderType extends CatalogReaderType { private class LastModficationTimeFinder implements IResourceVisitor { long timestamp = -1; @Override public boolean visit(IResource resource) throws CoreException { if (resource.isDerived() || resource.isHidden()) return false; long modstamp = resource.getLocalTimeStamp(); if (modstamp > timestamp) timestamp = modstamp; return true; } Date getTimestamp() { return timestamp == -1 ? null : new Date(timestamp); } } private static final DocumentBuilderFactory documentBuilderFactory = DocumentBuilderFactory.newInstance(); /** * Pattern that scans for href's that are relative and don't start with ? */ private static final Pattern htmlPattern = Pattern.compile("<A\\s+HREF=\"([^?][^:\"]+)\"\\s*>[^<]+</A>", //$NON-NLS-1$ Pattern.CASE_INSENSITIVE); /** * Scan a listing obtained using FTP. The file name comes after a timestamp * that ends with <hh:mm> or <year> and might contain a link, i.e. xxx -> * yyy. */ private static final Pattern ftpPattern = Pattern.compile( "[a-z]+\\s+[0-9]+\\s+(?:(?:[0-9]+:[0-9]+)|(?:[0-9]{4}))\\s+(.+?)(?:([\\r|\\n])|(\\s+->\\s+))", //$NON-NLS-1$ Pattern.CASE_INSENSITIVE); /** * Check if pattern matches an index.html or other index.xxx. We transform * such URL's to denote folders instead. */ private static final Pattern indexPath = Pattern.compile("^(.*/)?index\\.[a-z][a-z0-9]+$"); //$NON-NLS-1$ private static final ThreadLocal<ProviderMatch> currentProviderMatch = new InheritableThreadLocal<ProviderMatch>(); static { documentBuilderFactory.setIgnoringComments(true); documentBuilderFactory.setValidating(false); documentBuilderFactory.setNamespaceAware(false); } public static URL[] extractHTMLLinks(URL urlToHTML, IConnectContext cctx, IProgressMonitor monitor) throws CoreException { ArrayList<URL> links = new ArrayList<URL>(); try { AccessibleByteArrayOutputStream buffer = new AccessibleByteArrayOutputStream(0x2000, 0x200000); DownloadManager.readInto(urlToHTML, cctx, buffer, monitor); try { final DocumentBuilder builder = documentBuilderFactory.newDocumentBuilder(); // Use a very silent error handler // builder.setErrorHandler(new ErrorHandler() { @Override public void error(SAXParseException ex) throws SAXException { throw ex; } @Override public void fatalError(SAXParseException ex) throws SAXException { throw ex; } @Override public void warning(SAXParseException ex) throws SAXException { } }); InputSource source = new InputSource(buffer.getInputStream()); source.setSystemId(urlToHTML.toString()); Document document = builder.parse(source); collectLinks(document.getDocumentElement(), urlToHTML, links); } catch (SAXException e) { // HTML was not well formed. Use a scanner instead // Scanner scanner = new Scanner(buffer.getInputStream()); URL parent = URLUtils.appendTrailingSlash(urlToHTML); while (scanner.findWithinHorizon(htmlPattern, 0) != null) { MatchResult mr = scanner.match(); addLink(links, parent, mr.group(1)); } scanner.close(); } } catch (IllegalStateException e) { CorePlugin.getLogger().warning(e, e.getMessage()); return Trivial.EMPTY_URL_ARRAY; } catch (FileNotFoundException e) { return Trivial.EMPTY_URL_ARRAY; } catch (IOException e) { CorePlugin.getLogger().warning(e, e.getMessage()); return Trivial.EMPTY_URL_ARRAY; } catch (ParserConfigurationException e) { CorePlugin.getLogger().warning(e, e.getMessage()); return Trivial.EMPTY_URL_ARRAY; } return links.toArray(new URL[links.size()]); } public static ProviderMatch getCurrentProviderMatch() { return currentProviderMatch.get(); } public static IComponentReader getReader(URL catalog, IProgressMonitor monitor) throws CoreException { return getDirectReader(catalog, URL_CATALOG, monitor); } public static URL[] list(URL url, IConnectContext cctx, IProgressMonitor monitor) throws CoreException { File dir = FileUtils.getFile(url); if (dir != null) { File[] list = dir.listFiles(); if (list == null) return Trivial.EMPTY_URL_ARRAY; int top = list.length; if (top == 0) return Trivial.EMPTY_URL_ARRAY; URL[] result = new URL[top]; while (--top >= 0) { File file = list[top]; URI uri = URLUtils.normalizeToURI(file.toString(), file.isDirectory()); try { result[top] = uri.toURL(); } catch (MalformedURLException e) { throw BuckminsterException.wrap(e); } } MonitorUtils.complete(monitor); return result; } String proto = url.getProtocol(); if (proto.equalsIgnoreCase("ftp") || proto.equalsIgnoreCase("sftp")) //$NON-NLS-1$ //$NON-NLS-2$ { final ArrayList<URL> result = new ArrayList<URL>(); Scanner scanner = null; try { scanner = new Scanner(DownloadManager.read(url, cctx)); url = URLUtils.appendTrailingSlash(url); while (scanner.findWithinHorizon(ftpPattern, 0) != null) { MatchResult mr = scanner.match(); result.add(new URL(url, mr.group(1))); } return result.toArray(new URL[result.size()]); } catch (CoreException e) { CorePlugin.getLogger().warning(e, e.getMessage()); return Trivial.EMPTY_URL_ARRAY; } catch (FileNotFoundException e) { return Trivial.EMPTY_URL_ARRAY; } catch (IOException e) { CorePlugin.getLogger().warning(e, e.getMessage()); return Trivial.EMPTY_URL_ARRAY; } finally { if (scanner != null) scanner.close(); } } return extractHTMLLinks(url, cctx, monitor); } static IComponentReader getDirectReader(URL url, String readerType, IProgressMonitor monitor) throws CoreException { String urlString = url.toString(); ComponentRequest rq = new ComponentRequest(urlString, null, null); ComponentQueryBuilder queryBld = new ComponentQueryBuilder(); queryBld.setRootRequest(rq); queryBld.setPlatformAgnostic(true); ResolutionContext context = new ResolutionContext(queryBld.createComponentQuery()); NodeQuery nq = new NodeQuery(context, rq, null); IComponentType ctype = CorePlugin.getDefault().getComponentType(IComponentType.UNKNOWN); Provider provider = Provider.immutableProvider(readerType, ctype.getId(), urlString); ProviderMatch pm = new ProviderMatch(provider, ctype, VersionMatch.DEFAULT, ProviderScore.GOOD, nq); return pm.getReader(monitor); } private static void addLink(List<URL> links, URL parent, String link) throws MalformedURLException { Matcher m = indexPath.matcher(link.toString()); if (m.matches()) { link = m.group(1); if (link == null) return; } if (link.equals("../")) //$NON-NLS-1$ return; links.add(new URL(parent, link)); } private static void collectLinks(Element element, URL parent, ArrayList<URL> links) { if (element.getNodeName().equals("a")) //$NON-NLS-1$ { try { addLink(links, parent, element.getAttribute("href")); //$NON-NLS-1$ } catch (MalformedURLException e) { // Invalid href. Just skip it. } } else { for (Node child = element.getFirstChild(); child != null; child = child.getNextSibling()) { if (child.getNodeType() == Node.ELEMENT_NODE) collectLinks((Element) child, parent, links); } } } @Override public URL convertToURL(String repositoryLocator, VersionMatch versionSelector) throws CoreException { try { return URLUtils.normalizeToURL(repositoryLocator); } catch (MalformedURLException e) { throw BuckminsterException.wrap(e); } } @Override public URI getArtifactURL(Resolution resolution, RMContext context) throws CoreException { try { return new URI(resolution.getRepository()); } catch (URISyntaxException e) { return null; } } @Override public Date getLastModification(File workingCopy, IProgressMonitor monitor) throws CoreException { IWorkspaceRoot wsRoot = ResourcesPlugin.getWorkspace().getRoot(); IPath workingCopyPath = Path.fromOSString(workingCopy.getAbsolutePath()); IResource resource = wsRoot.getContainerForLocation(workingCopyPath); if (resource == null) { resource = wsRoot.getFileForLocation(workingCopyPath); if (resource == null) return null; } LastModficationTimeFinder timeFinder = new LastModficationTimeFinder(); resource.accept(timeFinder); return timeFinder.getTimestamp(); } public IReaderType getLocalReaderType() { return this; } @Override public IComponentReader getReader(ProviderMatch providerMatch, IProgressMonitor monitor) throws CoreException { MonitorUtils.complete(monitor); return new URLCatalogReader(this, providerMatch); } @Override public String getRemotePath(String repositoryLocation) throws CoreException { return getURI(repositoryLocation).getPath(); } public URI getURI(Provider provider, Map<String, ? extends Object> properties) throws CoreException { return getURI(provider.getURI(properties)); } public URI getURI(ProviderMatch providerMatch) throws CoreException { return getURI(providerMatch.getRepositoryURI()); } public URI getURI(String repository) throws CoreException { return URLUtils.normalizeToURI(repository, true); } }