/* * Copyright (c) 2010-2011 Lockheed Martin Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.eurekastreams.server.action.execution.stream; import java.io.Serializable; import java.util.Date; import java.util.List; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.apache.commons.logging.Log; import org.eurekastreams.commons.actions.ExecutionStrategy; import org.eurekastreams.commons.actions.context.PrincipalActionContext; import org.eurekastreams.commons.exceptions.ExecutionException; import org.eurekastreams.commons.logging.LogFactory; import org.eurekastreams.server.domain.stream.LinkInformation; import org.eurekastreams.server.persistence.mappers.FindLinkInformationByUrl; import org.eurekastreams.server.persistence.mappers.InsertMapper; import org.eurekastreams.server.persistence.mappers.requests.PersistenceRequest; import org.eurekastreams.server.persistence.mappers.requests.UniqueStringRequest; import org.eurekastreams.server.service.actions.strategies.links.ConnectionFacade; import org.eurekastreams.server.service.actions.strategies.links.HtmlLinkParser; /** * Retrieve the {@link LinkInformation} for a given url. * */ public class GetParsedLinkInformationExecution implements ExecutionStrategy<PrincipalActionContext> { /** * Logger. */ private final Log log = LogFactory.make(); /** * List of parsing strategies. */ List<HtmlLinkParser> parsingStrategies = null; /** * The HTTP connection facade. */ private final ConnectionFacade connection; /** * Link mapper. */ private FindLinkInformationByUrl mapper = null; /** * Insert mapper. */ private InsertMapper<LinkInformation> insertMapper = null; /** * Constructor. * * @param inConnection * the file downloader. * @param inMapper * the link URL mapper. * @param inInsertMapper * the insert mapper. * @param inParsingStrategies * the parsing strategies. */ public GetParsedLinkInformationExecution(final ConnectionFacade inConnection, final FindLinkInformationByUrl inMapper, final InsertMapper<LinkInformation> inInsertMapper, final List<HtmlLinkParser> inParsingStrategies) { connection = inConnection; mapper = inMapper; insertMapper = inInsertMapper; parsingStrategies = inParsingStrategies; } /** * {@inheritDoc} * * Retrieve the {@link LinkInformation} associated with the provided url. */ @Override public Serializable execute(final PrincipalActionContext inActionContext) throws ExecutionException { String url = (String) inActionContext.getParams(); LinkInformation theLink = null; try { // First see if the user supplied a url with a protocol. If they didn't // prepend http:// onto it. if (!Pattern.matches("^([a-z]+://.+)", url)) { url = "http://" + url; } url = connection.getFinalUrl(url, inActionContext.getPrincipal().getAccountId()); UniqueStringRequest req = new UniqueStringRequest(url); theLink = mapper.execute(req); if (null == theLink) { theLink = new LinkInformation(); theLink.setUrl(url); // set the source to the protocol + authority // (Take everything up to the first slash beyond the protocol-authority separator ://) int postAuthorityIndex = url.indexOf("/", url.indexOf("://") + "://".length()); theLink.setSource(postAuthorityIndex == -1 ? url : url.substring(0, postAuthorityIndex)); // Attempt to retrieve the contents of the resource. log.debug("Downloading resource: " + url); try { String htmlString = connection.downloadFile(url, inActionContext.getPrincipal().getAccountId()); htmlString = htmlString.replace("\\s+", " "); String host = connection.getHost(url); for (HtmlLinkParser strategy : parsingStrategies) { Matcher match = Pattern.compile(strategy.getRegex()).matcher(host); if (match.find()) { log.debug("Found: " + strategy.getRegex()); strategy.parseLinkInformation(htmlString, theLink, inActionContext.getPrincipal() .getAccountId()); break; } else { log.debug("Didn't find: " + strategy.getRegex()); } } } catch (Exception e) { log.info("Failed to download resource and extract link information from it.", e); } theLink.setCreated(new Date()); insertMapper.execute(new PersistenceRequest<LinkInformation>(theLink)); insertMapper.flush(); } } catch (Exception ex) { // no reason to tell the user, s/he wont' care. int dontCare = 0; } return theLink; } }