/** * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.jboss.elasticsearch.river.remote.sitemap; import java.net.MalformedURLException; import java.net.URL; import java.util.Collection; import java.util.Date; import java.util.LinkedHashMap; import java.util.Map; /** * @author http://code.google.com/p/crawler-commons */ public class SiteMap extends AbstractSiteMap { /** * The base URL for the Sitemap is where the Sitemap was found If found at http://foo.org/abc/sitemap.xml then baseUrl * is http://foo.org/abc/ Sitemaps can only contain URLs that are under the base URL. */ private String baseUrl; /** URLs found in this Sitemap */ private Map<String, SiteMapURL> urlList; public SiteMap() { super(); urlList = new LinkedHashMap<String, SiteMapURL>(); setProcessed(false); } public SiteMap(URL url) { this(); setUrl(url); } public SiteMap(String url) { this(); setUrl(url); } public SiteMap(URL url, Date lastModified) { this(url); setLastModified(lastModified); } public SiteMap(String url, String lastModified) { this(url); setLastModified(lastModified); } /** * @return the Collection of SitemapUrls in this Sitemap. */ public Collection<SiteMapURL> getSiteMapUrls() { return urlList.values(); } /** * @param url - the URL of the Sitemap */ private void setUrl(URL url) { this.url = url; setBaseUrl(url); } /** * @param url - the URL of the Sitemap */ private void setUrl(String url) { try { this.url = new URL(url); setBaseUrl(this.url); } catch (MalformedURLException e) { this.url = null; } } public String toString() { String s = "url=\"" + url + "\",lastMod="; s += (getLastModified() == null) ? "null" : SiteMap.getFullDateFormat().format(getLastModified()); s += ",type=" + getType() + ",processed=" + isProcessed() + ",urlListSize=" + urlList.size(); return s; } /** * This is private because only once we know the Sitemap's URL can we determine the base URL. * * @param sitemapUrl */ private void setBaseUrl(URL sitemapUrl) { baseUrl = sitemapUrl.toString().toLowerCase(); // Remove everything back to last slash. // So http://foo.org/abc/sitemap.xml becomes http://foo.org/abc/ baseUrl = baseUrl.replaceFirst("/[^/]*$", "/"); } /** * @return the baseUrl for this Sitemap. */ public String getBaseUrl() { return baseUrl; } /** * @param url The SitemapUrl to be added to the Sitemap. */ public void addSiteMapUrl(SiteMapURL url) { urlList.put(url.getUrl().toString(), url); } public boolean isIndex() { return false; } }