// Copyright 2006 Google Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package com.google.enterprise.connector.pusher; import com.google.common.base.Charsets; import com.google.common.base.Strings; import com.google.enterprise.connector.servlet.ServletUtil; import com.google.enterprise.connector.spi.SpiConstants.ContentEncoding; import com.google.enterprise.connector.util.Clock; import com.google.enterprise.connector.util.SslUtil; import com.google.enterprise.connector.util.SystemClock; import java.io.BufferedReader; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.io.OutputStream; import java.net.HttpURLConnection; import java.net.MalformedURLException; import java.net.URL; import java.security.GeneralSecurityException; import java.util.logging.Level; import java.util.logging.Logger; import javax.net.ssl.HttpsURLConnection; /** * Opens a connection to a url and sends data to it. */ public class GsaFeedConnection implements FeedConnection { private static final Logger LOGGER = Logger.getLogger(GsaFeedConnection.class.getName()); /** * The GSA's response when it successfully receives a feed. */ public static final String SUCCESS_RESPONSE = "Success"; /** * The GSA's response when the client is not authorized to send feeds. */ public static final String UNAUTHORIZED_RESPONSE = "Error - Unauthorized Request"; /** * The GSA's response when it runs out of disk space. */ public static final String DISKFULL_RESPONSE = "Feed not accepted due to insufficient disk space."; /** * The GSA's response when there was an internal error. */ public static final String INTERNAL_ERROR_RESPONSE = "Internal Error"; // Multipart/form-data uploads require a boundary to delimit controls. // Since we XML-escape or base64-encode all data provided by the connector, // the feed XML will never contain "<<". private static final String BOUNDARY = "<<"; private static final String CRLF = "\r\n"; // Content encodings supported by GSA. private String contentEncodings = ContentEncoding.BASE64COMPRESSED + "," + ContentEncoding.BASE64BINARY; /** Are inherited ACLs supported by the GSA? */ private Boolean supportsInheritedAcls; // True if we recently got a feed error of some sort. private boolean gotFeedError = false; // XmlFeed URL private URL feedUrl = null; // XmlFeed DTD URL private URL dtdUrl = null; // XmlFeed DTD private String feedDtd = null; // BacklogCount URL private URL backlogUrl = null; // BacklogCount Ceiling. Throttle back feed if backlog exceeds the ceiling. private int backlogCeiling = 4000; // BacklogCount Floor. Stop throttling feed if backlog drops below floor. private int backlogFloor = 1000; // True if the feed is throttled back due to excessive backlog. private boolean isBacklogged = false; // Clock used for backlog checks. private Clock clock = new SystemClock(); // Time of last backlog check. private long lastBacklogCheck; // How often to check for backlog (in milliseconds). private long backlogCheckInterval = 2 * 60 * 1000L; /** Whether HTTPS connections validate the server certificate. */ private boolean validateCertificate = true; public GsaFeedConnection(String protocol, String host, int port, int securePort) throws MalformedURLException { if (Strings.isNullOrEmpty(protocol)) { protocol = (securePort < 0) ? "http" : "https"; } this.setFeedHostAndPort(protocol, host, port, securePort); } @Override public String toString() { return "FeedConnection: feedUrl = " + feedUrl; } public synchronized void setFeedHostAndPort(String protocol, String host, int port, int securePort) throws MalformedURLException { setUrls(protocol, host, (protocol.equals("https")) ? securePort : port); } /** * Sets the URLs. This separate helper method ensures that only one * port value is available, to avoid grabbing the wrong port by * accident. */ private void setUrls(String protocol, String host, int port) throws MalformedURLException { feedUrl = new URL(protocol, host, port, "/xmlfeed"); gotFeedError = false; dtdUrl = new URL(protocol, host, port, "/getdtd"); feedDtd = null; backlogUrl = new URL(protocol, host, port, "/getbacklogcount"); lastBacklogCheck = 0L; } /** For the unit tests to verify the correct URLs. */ public synchronized URL getFeedUrl() { return feedUrl; } public void setClock(Clock clock) { this.clock = clock; } /** * Set the backlog check parameters. The Feed connection can check to see * if the GSA is falling behind processing feeds by calling the GSA's * {@code getbacklogcount} servlet. If the number of outstanding feed * items exceeds the {@code ceiling}, then the GSA is considered * backlogged. If the number of outstanding feed items then drops below * the {@code floor}, it may be considered no longer backlogged. * * @param floor backlog count floor value, below which the GSA is no * longer considered backlogged. * @param ceiling backlog count ceiling value, above which the GSA is * considered backlogged. * @param interval number of seconds to wait between backlog count checks. */ public void setBacklogCheck(int floor, int ceiling, int interval) { backlogFloor = floor; backlogCeiling = ceiling; backlogCheckInterval = interval * 1000L; } public void setContentEncodings(String contentEncodings) { this.contentEncodings = contentEncodings; } /** * Sets whether HTTPS connections to the GSA validate the GSA certificate. */ public void setValidateCertificate(boolean validateCertificate) { this.validateCertificate = validateCertificate; } /** For the unit tests. */ public boolean getValidateCertificate() { return validateCertificate; } private static final void controlHeader(StringBuilder builder, String name, String mimetype) { builder.append("--").append(BOUNDARY).append(CRLF); builder.append("Content-Disposition: form-data;"); builder.append(" name=\"").append(name).append("\"").append(CRLF); builder.append("Content-Type: ").append(mimetype).append(CRLF); builder.append(CRLF); } @Override public String sendData(FeedData feedData) throws FeedException { try { String response = sendFeedData((XmlFeed)feedData); gotFeedError = !response.equalsIgnoreCase(SUCCESS_RESPONSE); return response; } catch (FeedException fe) { gotFeedError = true; throw fe; } } private String sendFeedData(XmlFeed feed) throws FeedException { String feedType = feed.getFeedType().toLegacyString(); String dataSource = feed.getDataSource(); OutputStream outputStream; HttpURLConnection uc; StringBuilder buf = new StringBuilder(); byte[] prefix; byte[] suffix; try { // Build prefix. controlHeader(buf, "datasource", ServletUtil.MIMETYPE_TEXT_PLAIN); buf.append(dataSource).append(CRLF); controlHeader(buf, "feedtype", ServletUtil.MIMETYPE_TEXT_PLAIN); buf.append(feedType).append(CRLF); controlHeader(buf, "data", ServletUtil.MIMETYPE_XML); prefix = buf.toString().getBytes(Charsets.UTF_8); // Build suffix. buf.setLength(0); buf.append(CRLF).append("--").append(BOUNDARY).append("--").append(CRLF); suffix = buf.toString().getBytes(Charsets.UTF_8); LOGGER.finest("Opening feed connection to " + feedUrl); synchronized (this) { uc = (HttpURLConnection) feedUrl.openConnection(); } if (uc instanceof HttpsURLConnection && !validateCertificate) { SslUtil.setTrustingHttpsOptions((HttpsURLConnection) uc); } uc.setDoInput(true); uc.setDoOutput(true); uc.setFixedLengthStreamingMode(prefix.length + feed.size() + suffix.length); uc.setRequestProperty("Content-Type", "multipart/form-data; boundary=" + BOUNDARY); outputStream = uc.getOutputStream(); } catch (IOException ioe) { throw new FeedException(feedUrl.toString(), ioe); } catch (GeneralSecurityException e) { throw new FeedException(feedUrl.toString(), e); } boolean isThrowing = false; buf.setLength(0); try { LOGGER.finest("Writing feed data to feed connection."); // If there is an exception during this read/write, we do our // best to close the url connection and read the result. try { outputStream.write(prefix); feed.writeTo(outputStream); outputStream.write(suffix); outputStream.flush(); } catch (IOException e) { LOGGER.log(Level.SEVERE, "IOException while posting: will retry later", e); isThrowing = true; throw new FeedException(e); } catch (RuntimeException e) { isThrowing = true; throw e; } catch (Error e) { isThrowing = true; throw e; } finally { try { outputStream.close(); } catch (IOException e) { LOGGER.log(Level.SEVERE, "IOException while closing after post: will retry later", e); if (!isThrowing) { isThrowing = true; throw new FeedException(e); } } } } finally { BufferedReader br = null; try { LOGGER.finest("Waiting for response from feed connection."); InputStream inputStream = uc.getInputStream(); br = new BufferedReader(new InputStreamReader(inputStream, "UTF8")); String line; while ((line = br.readLine()) != null) { buf.append(line); } } catch (IOException ioe) { if (!isThrowing) { throw new FeedException(ioe); } } finally { try { if (br != null) { br.close(); } } catch (IOException e) { LOGGER.log(Level.SEVERE, "IOException while closing after post: continuing", e); } if (uc != null) { uc.disconnect(); } if (LOGGER.isLoggable(Level.FINEST)) { LOGGER.finest("Received response from feed connection: " + buf.toString()); } } } return buf.toString(); } @Override public synchronized String getContentEncodings() { return contentEncodings; } /** * Checks the DTD for the acl element. The answer is cached if the * DTD was successfully read. */ @Override public synchronized boolean supportsInheritedAcls() { if (supportsInheritedAcls == null) { String dtd = getFeedDtd(); if (dtd == null) { return false; } supportsInheritedAcls = dtd.contains("<!ELEMENT acl "); } return supportsInheritedAcls; } @Override public synchronized boolean isBacklogged() { if (lastBacklogCheck != Long.MAX_VALUE) { long now = clock.getTimeMillis(); if ((now - lastBacklogCheck) > backlogCheckInterval) { lastBacklogCheck = now; try { int backlogCount = getBacklogCount(); if (backlogCount >= 0) { if (gotFeedError) { gotFeedError = false; LOGGER.info("Feed connection seems to be accepting new feeds."); } if (isBacklogged) { // If we were backlogged, but have dropped below the // floor value, then we are no longer backlogged. if (backlogCount < backlogFloor) { isBacklogged = false; LOGGER.info("Resuming traversal after feed backlog clears."); } } else if (backlogCount > backlogCeiling) { // If the backlogcount exceeds the ceiling value, // then we are definitely backlogged. isBacklogged = true; LOGGER.info("Pausing traversal due to excessive feed backlog."); } } } catch (FeedException e) { if (gotFeedError) { LOGGER.finest( "Feed connection still does not seem to be accepting feeds. " + e.getMessage()); } else { LOGGER.log(Level.WARNING, "Feed connection does not seem to be accepting feeds.", e); gotFeedError = true; } } catch (UnsupportedOperationException e) { // This older GSA does not support getbacklogcount. // Assume never backlogged and don't check again. isBacklogged = false; lastBacklogCheck = Long.MAX_VALUE; LOGGER.warning("Unsupported GSA version, unable to check for feed" + " backlog or errors."); } } } return isBacklogged || gotFeedError; } /** * @return the current feed backlog count of the GSA, * or -1 if the count is unavailable. * @throws UnsupportedOperationException if the GSA does * not support getbacklogcount. * @throws FeedException if there was any other error retrieving the count */ private int getBacklogCount() throws FeedException { String response = doGet(backlogUrl, "backlogcount"); try { return Strings.isNullOrEmpty(response) ? -1 : Integer.parseInt(response); } catch (NumberFormatException nfe) { // Got a non-integer backlog count - probably an error message. throw new FeedException(response); } } /** * @return the GSA's Feed DTD, or null if unavailable. */ private String getFeedDtd() { if (feedDtd == null) { try { feedDtd = Strings.emptyToNull(doGet(dtdUrl, "Feed DTD")); } catch (FeedException e) { if (gotFeedError) { LOGGER.finest("Failed to read Feed DTD: " + e.getMessage()); } else { LOGGER.log(Level.WARNING, "Failed to read Feed DTD. ", e); } } catch (UnsupportedOperationException e) { // This older GSA does not support getdtd. LOGGER.fine("Unsupported GSA version lacks get Feed DTD support."); } } return feedDtd; } /** * Get the response to a URL request. The response is returned * as an HttpResponse containing the HTTP ResponseCode and the * returned content as a String. The content String is only returned * if the response code was OK. * * @param url the URL to request * @param name the name of the feature requested (for logging) * @return String representing response to an HTTP GET. * @throws UnsupportedOperationException if the GSA does * not support the requested feature. * @throws FeedException if any other error prevented reading * a valid response. */ private String doGet(URL url, String name) throws FeedException { HttpURLConnection conn = null; BufferedReader br = null; String str = null; StringBuilder buf = new StringBuilder(); try { if (LOGGER.isLoggable(Level.FINEST)) { LOGGER.finest("Opening " + name + " connection to " + url); } conn = (HttpURLConnection)url.openConnection(); if (conn instanceof HttpsURLConnection && !validateCertificate) { SslUtil.setTrustingHttpsOptions((HttpsURLConnection) conn); } conn.connect(); int responseCode = conn.getResponseCode(); if (responseCode == HttpURLConnection.HTTP_OK) { br = new BufferedReader(new InputStreamReader(conn.getInputStream(), "UTF8")); while ((str = br.readLine()) != null) { buf.append(str); } str = buf.toString().trim(); if (LOGGER.isLoggable(Level.FINEST)) { LOGGER.finest("Received " + name + ": " + str); } return str; } else if (responseCode == HttpURLConnection.HTTP_NOT_FOUND) { throw new UnsupportedOperationException( "GSA lacks " + name + " support."); } else { throw new FeedException(url.toString() + " returned response " + responseCode + " " + conn.getResponseMessage()); } } catch (IOException ioe) { throw new FeedException(url.toString(), ioe); } catch (GeneralSecurityException e) { throw new FeedException(url.toString(), e); } finally { try { if (br != null) { br.close(); } } catch (IOException e) { LOGGER.warning("Error after reading response for " + name + ": " + e.getMessage()); } finally { if (conn != null) { conn.disconnect(); } } } } }