package org.jggug.hudson.plugins.gcrawler.crawlers; import static org.jggug.hudson.plugins.gcrawler.util.HttpUtils.getFile; import java.io.ByteArrayInputStream; import java.util.ArrayList; import java.util.Arrays; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.regex.Matcher; import java.util.regex.Pattern; import javax.xml.parsers.DocumentBuilder; import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.xpath.XPath; import javax.xml.xpath.XPathConstants; import javax.xml.xpath.XPathFactory; import org.jggug.hudson.plugins.gcrawler.CrawlContext; import org.jggug.hudson.plugins.gcrawler.GrailsProjectInfo; import org.jggug.hudson.plugins.gcrawler.scm.RepositoryException; import org.jggug.hudson.plugins.gcrawler.scm.SubversionRepository; import org.jggug.hudson.plugins.gcrawler.scm.TrunkNotFoundException; import org.jggug.hudson.plugins.gcrawler.util.JobTemplate; import org.w3c.dom.Element; import org.w3c.dom.Node; import org.w3c.dom.NodeList; public class GrailsPluginsCrawler extends CrawlerBase { private static final String REPO_URL = "http://plugins.grails.org/"; private static final Pattern LINK_PATTERN = Pattern.compile("<li><a href=\"(.*)/\">"); private static final List<String> IGNORE_NAMES = Arrays.asList(".plugin-meta", "trunk"); private static final JobTemplate JOB_DESCRIPTION = JobTemplate.createTemplate("grails_plugins_description.txt"); private static final String PLUGIN_METADATA_URL = "http://plugins.grails.org/.plugin-meta/plugins-list.xml"; public GrailsPluginsCrawler(CrawlContext context) { super(context); } public List<GrailsProjectInfo> crawl() throws Exception { List<String> pluginNames; pluginNames = parseHTML(getFile(REPO_URL).getText()); GrailsCrawlerTaskService service = new GrailsCrawlerTaskService(); // TODO add context mapPluginInfo(getFile(PLUGIN_METADATA_URL).getText()); for (String name : pluginNames) { try { SubversionRepository repository = new SubversionRepository( String.format("http://svn.codehaus.org/grails-plugins/%s/", name)); GrailsProjectCrawlerTask crawlerTask = new GrailsPluginsCrawlerTask(name, context, JOB_DESCRIPTION, repository); service.submit(crawlerTask); } catch (TrunkNotFoundException e) { } catch (RepositoryException e) { logger.warn(e); } } return service.getResults(); } protected List<String> parseHTML(String html) { List<String> result = new ArrayList<String>(); Matcher m = LINK_PATTERN.matcher(html); while (m.find()) { String name = m.group(1); if (!IGNORE_NAMES.contains(name)) { result.add(m.group(1)); } } return result; } private static final Pattern REPO_URL_NAME_PATTERN = Pattern.compile("http://plugins\\.grails\\.org/(.*?)/.*"); protected Map<String, GrailsPluginInfo> mapPluginInfo(String xml) throws Exception { Map<String, GrailsPluginInfo> result = new HashMap<String, GrailsPluginInfo>(); DocumentBuilder builder = DocumentBuilderFactory.newInstance().newDocumentBuilder(); ByteArrayInputStream in = new ByteArrayInputStream(xml.getBytes("UTF-8")); Element element = builder.parse(in).getDocumentElement(); XPath xpath = XPathFactory.newInstance().newXPath(); NodeList releases = (NodeList) xpath.evaluate( "//release[../@latest-release=@version]", element, XPathConstants.NODESET); for (int i=0,n=releases.getLength(); i<n; i++) { GrailsPluginInfo info = new GrailsPluginInfo(); Node release = releases.item(i); String file = xpath.evaluate("file", release); Matcher m = REPO_URL_NAME_PATTERN.matcher(file); if (m.matches()) { result.put(m.group(1), info); } else { System.out.println("Erorr! " + file); } info.setName(xpath.evaluate("../@name", release)); info.setTitle(xpath.evaluate("title", release)); info.setAuthor(xpath.evaluate("author", release)); info.setDocumentation(xpath.evaluate("documentation", release)); info.setDescription(xpath.evaluate("description", release)); } return result; } }