/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nutch.protocol.ftp;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.net.InetAddress;
import java.net.Socket;
import java.util.List;
//import java.util.LinkedList;
import org.apache.commons.net.MalformedServerReplyException;
import org.apache.commons.net.ftp.FTP;
import org.apache.commons.net.ftp.FTPCommand;
import org.apache.commons.net.ftp.FTPFile;
import org.apache.commons.net.ftp.FTPFileEntryParser;
import org.apache.commons.net.ftp.FTPReply;
import org.apache.commons.net.ftp.FTPConnectionClosedException;
/***********************************************
* Client.java encapsulates functionalities necessary for nutch to
* get dir list and retrieve file from an FTP server.
* This class takes care of all low level details of interacting
* with an FTP server and provides a convenient higher level interface.
*
* Modified from FtpClient.java in apache commons-net.
*
* Notes by John Xing:
* ftp server implementations are hardly uniform and none seems to follow
* RFCs whole-heartedly. We have no choice, but assume common denominator
* as following:
* (1) Use stream mode for data tranfer. Block mode will be better for
* multiple file downloading and partial file downloading. However
* not every ftpd has block mode support.
* (2) Use passive mode for data connection.
* So nutch will work if we run behind firewall.
* (3) Data connection is opened/closed per ftp command for the reasons
* listed in (1). There are ftp servers out there,
* when partial downloading is enforeced by closing data channel
* socket on our client side, the server side immediately closes
* control channel (socket). Our codes deal with such a bad behavior.
* (4) LIST is used to obtain remote file attributes if possible.
* MDTM & SIZE would be nice, but not as ubiquitously implemented as LIST.
* (5) Avoid using ABOR in single thread? Do not use it at all.
*
* About exceptions:
* Some specific exceptions are re-thrown as one of FtpException*.java
* In fact, each function throws FtpException*.java or pass IOException.
*
* @author John Xing
***********************************************/
public class Client extends FTP
{
private int __dataTimeout;
private int __passivePort;
private String __passiveHost;
private int __fileType, __fileFormat;
private boolean __remoteVerificationEnabled;
private FTPFileEntryParser __entryParser;
private String __systemName;
// constructor
public Client()
{
__initDefaults();
__dataTimeout = -1;
__remoteVerificationEnabled = true;
}
// defaults when initialize
private void __initDefaults()
{
__passiveHost = null;
__passivePort = -1;
__fileType = FTP.ASCII_FILE_TYPE;
__fileFormat = FTP.NON_PRINT_TEXT_FORMAT;
__systemName = null;
__entryParser = null;
}
// parse reply for pass()
private void __parsePassiveModeReply(String reply)
throws MalformedServerReplyException
{
int i, index, lastIndex;
String octet1, octet2;
StringBuffer host;
reply = reply.substring(reply.indexOf('(') + 1,
reply.indexOf(')')).trim();
host = new StringBuffer(24);
lastIndex = 0;
index = reply.indexOf(',');
host.append(reply.substring(lastIndex, index));
for (i = 0; i < 3; i++)
{
host.append('.');
lastIndex = index + 1;
index = reply.indexOf(',', lastIndex);
host.append(reply.substring(lastIndex, index));
}
lastIndex = index + 1;
index = reply.indexOf(',', lastIndex);
octet1 = reply.substring(lastIndex, index);
octet2 = reply.substring(index + 1);
// index and lastIndex now used as temporaries
try
{
index = Integer.parseInt(octet1);
lastIndex = Integer.parseInt(octet2);
}
catch (NumberFormatException e)
{
throw new MalformedServerReplyException(
"Could not parse passive host information.\nServer Reply: " + reply);
}
index <<= 8;
index |= lastIndex;
__passiveHost = host.toString();
__passivePort = index;
}
// open passive data connection socket
protected Socket __openPassiveDataConnection(int command, String arg)
throws IOException, FtpExceptionCanNotHaveDataConnection {
Socket socket;
// // 20040317, xing, accommodate ill-behaved servers, see below
// int port_previous = __passivePort;
if (pasv() != FTPReply.ENTERING_PASSIVE_MODE)
throw new FtpExceptionCanNotHaveDataConnection(
"pasv() failed. " + getReplyString());
try {
__parsePassiveModeReply(getReplyStrings()[0]);
} catch (MalformedServerReplyException e) {
throw new FtpExceptionCanNotHaveDataConnection(e.getMessage());
}
// // 20040317, xing, accommodate ill-behaved servers, see above
// int count = 0;
// System.err.println("__passivePort "+__passivePort);
// System.err.println("port_previous "+port_previous);
// while (__passivePort == port_previous) {
// // just quit if too many tries. make it an exception here?
// if (count++ > 10)
// return null;
// // slow down further for each new try
// Thread.sleep(500*count);
// if (pasv() != FTPReply.ENTERING_PASSIVE_MODE)
// throw new FtpExceptionCanNotHaveDataConnection(
// "pasv() failed. " + getReplyString());
// //return null;
// try {
// __parsePassiveModeReply(getReplyStrings()[0]);
// } catch (MalformedServerReplyException e) {
// throw new FtpExceptionCanNotHaveDataConnection(e.getMessage());
// }
// }
socket = _socketFactory_.createSocket(__passiveHost, __passivePort);
if (!FTPReply.isPositivePreliminary(sendCommand(command, arg))) {
socket.close();
return null;
}
if (__remoteVerificationEnabled && !verifyRemote(socket))
{
InetAddress host1, host2;
host1 = socket.getInetAddress();
host2 = getRemoteAddress();
socket.close();
// our precaution
throw new FtpExceptionCanNotHaveDataConnection(
"Host attempting data connection " + host1.getHostAddress() +
" is not same as server " + host2.getHostAddress() +
" So we intentionally close it for security precaution."
);
}
if (__dataTimeout >= 0)
socket.setSoTimeout(__dataTimeout);
return socket;
}
/***
* Sets the timeout in milliseconds to use for data connection.
* set immediately after opening the data connection.
***/
public void setDataTimeout(int timeout)
{
__dataTimeout = timeout;
}
/***
* Closes the connection to the FTP server and restores
* connection parameters to the default values.
* <p>
* @exception IOException If an error occurs while disconnecting.
***/
public void disconnect() throws IOException
{
__initDefaults();
super.disconnect();
// no worry for data connection, since we always close it
// in every ftp command that invloves data connection
}
/***
* Enable or disable verification that the remote host taking part
* of a data connection is the same as the host to which the control
* connection is attached. The default is for verification to be
* enabled. You may set this value at any time, whether the
* FTPClient is currently connected or not.
* <p>
* @param enable True to enable verification, false to disable verification.
***/
public void setRemoteVerificationEnabled(boolean enable)
{
__remoteVerificationEnabled = enable;
}
/***
* Return whether or not verification of the remote host participating
* in data connections is enabled. The default behavior is for
* verification to be enabled.
* <p>
* @return True if verification is enabled, false if not.
***/
public boolean isRemoteVerificationEnabled()
{
return __remoteVerificationEnabled;
}
/***
* Login to the FTP server using the provided username and password.
* <p>
* @param username The username to login under.
* @param password The password to use.
* @return True if successfully completed, false if not.
* @exception FTPConnectionClosedException
* If the FTP server prematurely closes the connection as a result
* of the client being idle or some other reason causing the server
* to send FTP reply code 421. This exception may be caught either
* as an IOException or independently as itself.
* @exception IOException If an I/O error occurs while either sending a
* command to the server or receiving a reply from the server.
***/
public boolean login(String username, String password) throws IOException
{
user(username);
if (FTPReply.isPositiveCompletion(getReplyCode()))
return true;
// If we get here, we either have an error code, or an intermmediate
// reply requesting password.
if (!FTPReply.isPositiveIntermediate(getReplyCode()))
return false;
return FTPReply.isPositiveCompletion(pass(password));
}
/***
* Logout of the FTP server by sending the QUIT command.
* <p>
* @return True if successfully completed, false if not.
* @exception FTPConnectionClosedException
* If the FTP server prematurely closes the connection as a result
* of the client being idle or some other reason causing the server
* to send FTP reply code 421. This exception may be caught either
* as an IOException or independently as itself.
* @exception IOException If an I/O error occurs while either sending a
* command to the server or receiving a reply from the server.
***/
public boolean logout() throws IOException
{
return FTPReply.isPositiveCompletion(quit());
}
// retrieve list reply for path
public void retrieveList(String path, List entries, int limit,
FTPFileEntryParser parser)
throws IOException,
FtpExceptionCanNotHaveDataConnection,
FtpExceptionUnknownForcedDataClose,
FtpExceptionControlClosedByForcedDataClose {
Socket socket = __openPassiveDataConnection(FTPCommand.LIST, path);
if (socket == null)
throw new FtpExceptionCanNotHaveDataConnection("LIST "
+ ((path == null) ? "" : path));
BufferedReader reader =
new BufferedReader(new InputStreamReader(socket.getInputStream()));
// force-close data channel socket, when download limit is reached
boolean mandatory_close = false;
//List entries = new LinkedList();
int count = 0;
String line = parser.readNextEntry(reader);
while (line != null) {
FTPFile ftpFile = parser.parseFTPEntry(line);
// skip non-formatted lines
if (ftpFile == null) {
line = parser.readNextEntry(reader);
continue;
}
entries.add(ftpFile);
count += line.length();
// impose download limit if limit >= 0, otherwise no limit
// here, cut off is up to the line when total bytes is just over limit
if (limit >= 0 && count > limit) {
mandatory_close = true;
break;
}
line = parser.readNextEntry(reader);
}
//if (mandatory_close)
// you always close here, no matter mandatory_close or not.
// however different ftp servers respond differently, see below.
socket.close();
// scenarios:
// (1) mandatory_close is false, download limit not reached
// no special care here
// (2) mandatory_close is true, download limit is reached
// different servers have different reply codes:
try {
int reply = getReply();
if (!_notBadReply(reply))
throw new FtpExceptionUnknownForcedDataClose(getReplyString());
} catch (FTPConnectionClosedException e) {
// some ftp servers will close control channel if data channel socket
// is closed by our end before all data has been read out. Check:
// tux414.q-tam.hp.com FTP server (hp.com version whp02)
// so must catch FTPConnectionClosedException thrown by getReply() above
//disconnect();
throw new FtpExceptionControlClosedByForcedDataClose(e.getMessage());
}
}
// retrieve file for path
public void retrieveFile(String path, OutputStream os, int limit)
throws IOException,
FtpExceptionCanNotHaveDataConnection,
FtpExceptionUnknownForcedDataClose,
FtpExceptionControlClosedByForcedDataClose {
Socket socket = __openPassiveDataConnection(FTPCommand.RETR, path);
if (socket == null)
throw new FtpExceptionCanNotHaveDataConnection("RETR "
+ ((path == null) ? "" : path));
InputStream input = socket.getInputStream();
// 20040318, xing, treat everything as BINARY_FILE_TYPE for now
// do we ever need ASCII_FILE_TYPE?
//if (__fileType == ASCII_FILE_TYPE)
// input = new FromNetASCIIInputStream(input);
// fixme, should we instruct server here for binary file type?
// force-close data channel socket
boolean mandatory_close = false;
int len; int count = 0;
byte[] buf =
new byte[org.apache.commons.net.io.Util.DEFAULT_COPY_BUFFER_SIZE];
while((len=input.read(buf,0,buf.length)) != -1){
count += len;
// impose download limit if limit >= 0, otherwise no limit
// here, cut off is exactly of limit bytes
if (limit >= 0 && count > limit) {
os.write(buf,0,len-(count-limit));
mandatory_close = true;
break;
}
os.write(buf,0,len);
os.flush();
}
//if (mandatory_close)
// you always close here, no matter mandatory_close or not.
// however different ftp servers respond differently, see below.
socket.close();
// scenarios:
// (1) mandatory_close is false, download limit not reached
// no special care here
// (2) mandatory_close is true, download limit is reached
// different servers have different reply codes:
// do not need this
//sendCommand("ABOR");
try {
int reply = getReply();
if (!_notBadReply(reply))
throw new FtpExceptionUnknownForcedDataClose(getReplyString());
} catch (FTPConnectionClosedException e) {
// some ftp servers will close control channel if data channel socket
// is closed by our end before all data has been read out. Check:
// tux414.q-tam.hp.com FTP server (hp.com version whp02)
// so must catch FTPConnectionClosedException thrown by getReply() above
//disconnect();
throw new FtpExceptionControlClosedByForcedDataClose(e.getMessage());
}
}
// reply check after closing data connection
private boolean _notBadReply(int reply) {
if (FTPReply.isPositiveCompletion(reply)) {
// do nothing
} else if (reply == 426) { // FTPReply.TRANSFER_ABORTED
// some ftp servers reply 426, e.g.,
// foggy FTP server (Version wu-2.6.2(2)
// there is second reply witing? no!
//getReply();
} else if (reply == 450) { // FTPReply.FILE_ACTION_NOT_TAKEN
// some ftp servers reply 450, e.g.,
// ProFTPD [ftp.kernel.org]
// there is second reply witing? no!
//getReply();
} else if (reply == 451) { // FTPReply.ACTION_ABORTED
// some ftp servers reply 451, e.g.,
// ProFTPD [ftp.kernel.org]
// there is second reply witing? no!
//getReply();
} else if (reply == 451) { // FTPReply.ACTION_ABORTED
} else {
// what other kind of ftp server out there?
return false;
}
return true;
}
/***
* Sets the file type to be transferred. This should be one of
* <code> FTP.ASCII_FILE_TYPE </code>, <code> FTP.IMAGE_FILE_TYPE </code>,
* etc. The file type only needs to be set when you want to change the
* type. After changing it, the new type stays in effect until you change
* it again. The default file type is <code> FTP.ASCII_FILE_TYPE </code>
* if this method is never called.
* <p>
* @param fileType The <code> _FILE_TYPE </code> constant indcating the
* type of file.
* @return True if successfully completed, false if not.
* @exception FTPConnectionClosedException
* If the FTP server prematurely closes the connection as a result
* of the client being idle or some other reason causing the server
* to send FTP reply code 421. This exception may be caught either
* as an IOException or independently as itself.
* @exception IOException If an I/O error occurs while either sending a
* command to the server or receiving a reply from the server.
***/
public boolean setFileType(int fileType) throws IOException
{
if (FTPReply.isPositiveCompletion(type(fileType)))
{
__fileType = fileType;
__fileFormat = FTP.NON_PRINT_TEXT_FORMAT;
return true;
}
return false;
}
/***
* Fetches the system type name from the server and returns the string.
* This value is cached for the duration of the connection after the
* first call to this method. In other words, only the first time
* that you invoke this method will it issue a SYST command to the
* FTP server. FTPClient will remember the value and return the
* cached value until a call to disconnect.
* <p>
* @return The system type name obtained from the server. null if the
* information could not be obtained.
* @exception FTPConnectionClosedException
* If the FTP server prematurely closes the connection as a result
* of the client being idle or some other reason causing the server
* to send FTP reply code 421. This exception may be caught either
* as an IOException or independently as itself.
* @exception IOException If an I/O error occurs while either sending a
* command to the server or receiving a reply from the server.
***/
public String getSystemName()
throws IOException, FtpExceptionBadSystResponse
{
//if (syst() == FTPReply.NAME_SYSTEM_TYPE)
// Technically, we should expect a NAME_SYSTEM_TYPE response, but
// in practice FTP servers deviate, so we soften the condition to
// a positive completion.
if (__systemName == null && FTPReply.isPositiveCompletion(syst())) {
__systemName = (getReplyStrings()[0]).substring(4);
} else {
throw new FtpExceptionBadSystResponse(
"Bad response of SYST: " + getReplyString());
}
return __systemName;
}
/***
* Sends a NOOP command to the FTP server. This is useful for preventing
* server timeouts.
* <p>
* @return True if successfully completed, false if not.
* @exception FTPConnectionClosedException
* If the FTP server prematurely closes the connection as a result
* of the client being idle or some other reason causing the server
* to send FTP reply code 421. This exception may be caught either
* as an IOException or independently as itself.
* @exception IOException If an I/O error occurs while either sending a
* command to the server or receiving a reply from the server.
***/
public boolean sendNoOp() throws IOException
{
return FTPReply.isPositiveCompletion(noop());
}
// client.stat(path);
// client.sendCommand("STAT");
// client.sendCommand("STAT",path);
// client.sendCommand("MDTM",path);
// client.sendCommand("SIZE",path);
// client.sendCommand("HELP","SITE");
// client.sendCommand("SYST");
// client.setRestartOffset(120);
}