/**
* EasySOA Registry
* Copyright 2011 Open Wide
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*
* Contact : easysoa-dev@googlegroups.com
*/
package org.easysoa.registry.dbb.strategies;
import java.net.URL;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.easysoa.registry.dbb.BrowsingContext;
import org.easysoa.registry.dbb.ResourceDownloadService;
import org.easysoa.registry.dbb.ServiceFinderStrategy;
import org.easysoa.registry.dbb.HttpDownloader;
import org.easysoa.registry.dbb.HttpDownloaderService;
import org.easysoa.registry.dbb.HttpDownloaderServiceImpl;
import org.htmlcleaner.HtmlCleaner;
import org.htmlcleaner.TagNode;
import org.nuxeo.runtime.api.Framework;
/**
*
* Abstract strategy which provides a way to retrieve the application name.
*
* @author mkalam-alami
*
*/
public abstract class DefaultAbstractStrategy implements ServiceFinderStrategy {
private static final Log log = LogFactory.getLog(DefaultAbstractStrategy.class);
private static HtmlCleaner cleaner = new HtmlCleaner();
/**
* Guesses an application name from the given URL, by retrieving
* the site's root title, else the given page title.
* @param url
* @return The application name or null if neither of the pages has a title tag.
* @throws Exception
*/
protected static String guessApplicationName(BrowsingContext browsingContext) throws Exception {
URL siteRootUrl = new URL(browsingContext.getURL().getProtocol() + "://" + browsingContext.getURL().getHost()
+ ":" + ((browsingContext.getURL().getPort() == -1) ? 80 : browsingContext.getURL().getPort()));
String applicationName = extractApplicationNameFromUrl(siteRootUrl);
if (applicationName == null) {
applicationName = extractApplicationNameFromUrl(browsingContext.getURL());
}
return applicationName;
}
private static String extractApplicationNameFromUrl(URL url) throws Exception {
ResourceDownloadService resourceDownloadService = Framework.getService(ResourceDownloadService.class);
java.io.File siteRootFile = resourceDownloadService.get(url).getFile();
if (siteRootFile == null) {
// ex. if site root url returns something else than 200 (ex. 403)
return null; // else cleaner.clean(siteRootFile) throws NullPointerException
}
try {
TagNode siteRootCleanHtml = cleaner.clean(siteRootFile);
return extractApplicationName(siteRootCleanHtml);
}
catch (StackOverflowError e) {
log.warn("HtmlCleaner stack overflow while parsing " + url + ", cannot fetch app name");
return null;
}
}
private static String extractApplicationName(TagNode html) {
TagNode[] titles = html.getElementsByName("title", true);
if (titles.length > 0) {
return titles[0].getText().toString().trim();
}
else {
return null;
}
}
}