// Copyright 2009 Google Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package com.google.enterprise.connector.pusher;
import com.google.common.base.Preconditions;
import com.google.common.base.Strings;
import com.google.enterprise.connector.manager.Context;
import com.google.enterprise.connector.servlet.ServletUtil;
import com.google.enterprise.connector.spi.Document;
import com.google.enterprise.connector.spi.RepositoryDocumentException;
import com.google.enterprise.connector.spi.RepositoryException;
import com.google.enterprise.connector.spi.SpiConstants;
import com.google.enterprise.connector.spi.SpiConstants.DocumentType;
import com.google.enterprise.connector.spi.SpiConstants.FeedType;
import java.io.UnsupportedEncodingException;
import java.net.MalformedURLException;
import java.net.URL;
import java.net.URLEncoder;
/**
* Extracts specific URLs from a {@link Document} or constructs a URL
* from various document properties.
*/
public class UrlConstructor {
private final String dataSource;
private final FeedType defaultFeedType;
private final String contentUrlPrefix;
/**
* Gets a new UrlConstructor.
*
* @param dataSource the dataSource for documents
* @param defaultFeedType the FeedType to use if a document does not
* specify an explicit FeedType
*/
public UrlConstructor(String dataSource, FeedType defaultFeedType) {
this.dataSource = dataSource;
this.defaultFeedType = defaultFeedType;
this.contentUrlPrefix = Context.getInstance().getContentUrlPrefix();
}
/**
* Constructs the record URL for the given doc id, feed type and search URL.
*
* @throws RepositoryDocumentException if searchUrl is invalid.
*/
public String getRecordUrl(Document document, DocumentType documentType)
throws RepositoryException, RepositoryDocumentException {
String url = getOrConstructUrl(document, SpiConstants.PROPNAME_SEARCHURL,
SpiConstants.PROPNAME_DOCID, SpiConstants.PROPNAME_FRAGMENT,
defaultFeedType, documentType);
if (url == null) {
throw new RepositoryDocumentException(
"Document has neither property " + SpiConstants.PROPNAME_DOCID
+ " nor property " + SpiConstants.PROPNAME_SEARCHURL);
}
return url;
}
/**
* Constructs the record URL for the inherited ACL document.
*
* @return inheritFrom URL, or null if there is none.
*/
public String getInheritFromUrl(Document document)
throws RepositoryException, RepositoryDocumentException {
return getOrConstructUrl(document, SpiConstants.PROPNAME_ACLINHERITFROM,
SpiConstants.PROPNAME_ACLINHERITFROM_DOCID,
SpiConstants.PROPNAME_ACLINHERITFROM_FRAGMENT,
getInheritFromFeedType(document), DocumentType.ACL);
}
/**
* Determines the FeedType for the inherited ACL document.
* The FeedType comes from (in order):
* 1. SpiConstants.PROPNAME_ACLINHERITFROM_FEEDTYPE
* 2. SpiConstants.PROPNAME_FEEDTYPE
* 3. FeedType of this XmlFeed
*
* @return the FeedType for the inherited ACL document
*/
private FeedType getInheritFromFeedType(Document document)
throws RepositoryException, RepositoryDocumentException {
String feedType = DocUtils.getOptionalString(document,
SpiConstants.PROPNAME_ACLINHERITFROM_FEEDTYPE);
if (feedType == null) {
feedType = DocUtils.getOptionalString(document,
SpiConstants.PROPNAME_FEEDTYPE);
}
return (feedType == null) ? defaultFeedType
: FeedType.findFeedType(feedType);
}
/**
* Constructs the URL for the given doc id, feed type and URL.
*
* @throws RepositoryDocumentException if searchUrl is invalid.
*/
private String getOrConstructUrl(Document document, String urlProperty,
String docidProperty, String fragmentProperty, FeedType feedType,
DocumentType documentType)
throws RepositoryException, RepositoryDocumentException {
String recordUrl = DocUtils.getOptionalString(document, urlProperty);
if (recordUrl != null) {
if (documentType != DocumentType.ACL) {
validateUrl(recordUrl, urlProperty);
}
} else {
String docId = DocUtils.getOptionalString(document, docidProperty);
String fragment = DocUtils.getOptionalString(document, fragmentProperty);
if (docId != null) {
// Avoid issue 214 (b/6514016).
// Do not append a fragment to a non-ACL googleconnector URL.
if (!Strings.isNullOrEmpty(fragment) && feedType == FeedType.CONTENT
&& documentType != DocumentType.ACL) {
throw new IllegalArgumentException(
"URL fragments are not permitted on googleconnector URLs");
}
// Fabricate a URL from the docid and feedType.
recordUrl = constructUrl(docId, fragment, feedType);
}
}
return recordUrl;
}
/**
* Form either a Google connector URL or a Content URL, based on
* feed type.
*/
private String constructUrl(String docid, String fragment,
FeedType feedType) {
switch (feedType) {
case CONTENTURL:
return constructContentUrl(docid, fragment);
case CONTENT:
return constructGoogleConnectorUrl(docid, fragment);
case WEB:
return docid;
default:
throw new AssertionError(feedType);
}
}
/**
* Form a Google connector URL.
*
* @param docid
* @return the connector url
*/
private String constructGoogleConnectorUrl(String docid, String fragment) {
StringBuilder buf = new StringBuilder(ServletUtil.PROTOCOL);
buf.append(dataSource);
buf.append(".localhost").append(ServletUtil.DOCID);
buf.append(docid);
appendPseudoFragment(buf, fragment);
return buf.toString();
}
/**
* Form a Content URL.
*
* @param docid
* @return the contentUrl
*/
private String constructContentUrl(String docid, String fragment) {
Preconditions.checkState(!Strings.isNullOrEmpty(contentUrlPrefix),
"contentUrlPrefix must not be null or empty");
StringBuilder buf = new StringBuilder(contentUrlPrefix);
ServletUtil.appendQueryParam(buf, ServletUtil.XMLTAG_CONNECTOR_NAME,
dataSource);
ServletUtil.appendQueryParam(buf, ServletUtil.QUERY_PARAM_DOCID, docid);
appendPseudoFragment(buf, fragment);
return buf.toString();
}
/**
* Verify that a supplied URL is at least syntactically valid.
*
* @param url the URL to validate
* @param description description of the URL
* @throws RepositoryDocumentException if the URL is invalid
*/
private static void validateUrl(String url, String description)
throws RepositoryDocumentException {
// Check that this looks like a URL.
try {
// The GSA supports SMB URLs, but Java does not.
if (url != null && url.startsWith("smb:")) {
new URL(null, url, SmbURLStreamHandler.getInstance());
} else {
new URL(url);
}
} catch (MalformedURLException e) {
throw new RepositoryDocumentException(
"Supplied " + description + " URL " + url + " is malformed.", e);
}
}
/**
* Append a fragment to a URL, not as a fragment, but as an additional
* query parameter.
*
* @param url an Appendable with URL under contruction
* @param fragment the fragment to append to the URL.
*/
// TODO(bmj): GSA 7.0 strips fragments off of URLs in the feed, so
// append the fragment as another query parameter until that is fixed.
// Then delete this and use ServletUtil.appendFragment() instead.
private static void appendPseudoFragment(StringBuilder url, String fragment) {
if (!Strings.isNullOrEmpty(fragment)) {
try {
url.append(((url.indexOf("?") == -1) ? '?' : '&'));
url.append(URLEncoder.encode(fragment, "UTF-8"));
} catch (UnsupportedEncodingException e) {
// Can't happen with UTF-8.
throw new AssertionError(e);
}
}
}
}