TextMimeDetector.java example

Explorer

delcyon-capo-master
- java
  - com
    - delcyon
      - capo
        CapoApplication.java
        CapoThreadFactory.java
        Configuration.java
        ContextThread.java
        InterruptibleRunnable.java
        annotations
        ControlNamespaceURI.java
        DefaultDocumentProvider.java
        DirectoyProvider.java
        XmlMappedArrays.java
        client
        CapoClient.java
        controller
        AbstractClientSideControl.java
        AbstractControl.java
        ControlElement.java
        ControlElementProvider.java
        Group.java
        LocalRequestProcessor.java
        client
        ClientSideControl.java
        ControllerRequest.java
        ServerControllerResponse.java
        elements
        AppendElement.java
        CallElement.java
        ChooseElement.java
        CommandElement.java
        CreateElement.java
        DebugElement.java
        DiffElement.java
        ExportElement.java
        GroupElement.java
        ImportElement.java
        InsertBeforeElement.java
        LogElement.java
        OpenElement.java
        OtherwiseElement.java
        ParseElement.java
        RemoteGroupElement.java
        RemoteGroupMessage.java
        RemoveElement.java
        RepeatElement.java
        ReplaceElement.java
        RequestElement.java
        ResourceControlElement.java
        ResourceMetaDataElement.java
        ResourceMonitorElement.java
        RestartElement.java
        SetAttributeElement.java
        SetIDElement.java
        SnapshotElement.java
        StepElement.java
        SyncElement.java
        TaskElement.java
        TransformElement.java
        UpdateElement.java
        VarElement.java
        WhenElement.java
        server
        ClientControllerRequest.java
        ControllerClientRequestProcessor.java
        ControllerProcessingException.java
        ControllerResponse.java
        ServerSideControl.java
        crypto
        CertificateRequest.java
        CertificateRequestProcessor.java
        datastream
        AccessibleByteArrayOutputStream.java
        BufferedSocket.java
        ConsoleOutputStreamFilter.java
        NullOutputStream.java
        OutputStreamAttributeFilterProvider.java
        RegexFilterOutputStream.java
        SocketFinalizer.java
        StreamEventFilterInputStream.java
        StreamEventFilterOutputStream.java
        StreamEventListener.java
        StreamFinalizer.java
        StreamHandler.java
        StreamProcessor.java
        StreamProcessorProvider.java
        StreamUtil.java
        TriggerFilterOutputStream.java
        stream_attribute_filter
        AbstractFilterInputStream.java
        ContentFormatTypeFilterInputStream.java
        ContentFormatTypeFilterOutputStream.java
        InputStreamAttributeFilterProvider.java
        MD5FilterInputStream.java
        MD5FilterOutputStream.java
        MimeTypeFilterInputStream.java
        SizeFilterInputStream.java
        SizeFilterOutputStream.java
        StreamAttributeFilter.java
        exceptions
        MissingAttributeException.java
        http
        HTTPStreamConsumer.java
        SimpleHttpRequest.java
        SimpleHttpResponse.java
        modules
        ModuleProvider.java
        ModuleRequest.java
        ModuleRequestProcessor.java
        parsers
        GrammarParser.java
        ParseNode.java
        ParseRule.java
        ParseTape.java
        ParseToken.java
        ParseTree.java
        Tokenizer.java
        preferences
        Preference.java
        PreferenceInfo.java
        PreferenceInfoHelper.java
        PreferenceProvider.java
        protocol
        client
        CapoConnection.java
        Request.java
        XMLRequest.java
        XMLServerResponse.java
        XMLServerResponseProcessor.java
        XMLServerResponseProcessorProvider.java
        server
        AbstractClientRequestProcessor.java
        AbstractResponse.java
        ClientRequest.java
        ClientRequestProcessor.java
        ClientRequestProcessorProvider.java
        ClientRequestProcessorSession.java
        ClientRequestProcessorSessionManager.java
        ClientRequestXMLProcessor.java
        Response.java
        XMLResponse.java
        resourcemanager
        CapoDataManager.java
        ContentFormatType.java
        ErrorResourceDescriptor.java
        ResourceDescriptor.java
        ResourceListener.java
        ResourceManager.java
        ResourceParameter.java
        ResourceParameterBuilder.java
        ResourceType.java
        ResourceTypeProvider.java
        ResourceURI.java
        remote
        RemoteResourceDescriptorMessage.java
        RemoteResourceDescriptorProxy.java
        RemoteResourceRequest.java
        RemoteResourceResponse.java
        RemoteResourceResponseProcessor.java
        RemoteResourceType.java
        types
        AbstractContentMetaData.java
        AbstractResourceDescriptor.java
        AbstractResourceType.java
        ClientsResourceDescriptor.java
        ClientsResourceType.java
        ContentMetaData.java
        FileResourceContentMetaData.java
        FileResourceDescriptor.java
        FileResourceType.java
        HttpResourceDescriptor.java
        HttpResourceType.java
        JcrContentMetaData.java
        JcrResourceDescriptor.java
        JcrResourceType.java
        JcrVersionContentMetaData.java
        JdbcResourceDescriptor.java
        JdbcResourceType.java
        RefResourceDescriptor.java
        RefResourceType.java
        ShellResourceDescriptor.java
        ShellResourceType.java
        SimpleContentMetaData.java
        StateParameters.java
        Versionable.java
        server
        CapoServer.java
        jackrabbit
        CapoJcrServer.java
        jetty
        CapoJettyServer.java
        tasks
        TaskManagerDocumentUpdaterThread.java
        TaskManagerThread.java
        util
        CloneControl.java
        CommandExecution.java
        ControlledClone.java
        EqualityProcessor.java
        HexUtil.java
        InternHashMap.java
        LeveledConsoleHandler.java
        LogPrefixFormatter.java
        MarshalWrapper.java
        MarshalWrapperInterface.java
        NamespaceContextMap.java
        ReflectionUtility.java
        StacktraceElementMarshalWrapper.java
        ToStringControl.java
        VariableContainerWrapper.java
        XMLAttribute.java
        XMLElement.java
        XMLSerializer.java
        diff
        Diff.java
        DiffDataConsumer.java
        DiffDataProvider.java
        DiffEntry.java
        InputStreamTokenizer.java
        Window.java
        WindowItem.java
        WindowItemLink.java
        XMLTextDiff.java
        webapp
        models
        DomItemModel.java
        ResourceDescriptorItemModel.java
        WContentMetaDataItemModel.java
        servlets
        CapoWebApplication.java
        CapoWebWTServlet.java
        resource
        AbstractResourceServlet.java
        DefaultResourceStreamer.java
        ResourceStreamer.java
        WResourceDescriptor.java
        WebResourcesServlet.java
        widgets
        CapoWTreeView.java
        WAceEditor.java
        WBoundedContainerWidget.java
        WCSSItemDelegate.java
        WCapoResourceEditor.java
        WCapoResourceExplorer.java
        WCapoResourceTreeView.java
        WCapoSearchControl.java
        WCapoXmlTreeView.java
        WConsoleWidget.java
        WCursorState.java
        WDiffWidget.java
        WLoginControl.java
        WResourceFactory.java
        WTailFileWidget.java
        WValidatorFactory.java
        WWindowAnchor.java
        WWorker.java
        WXMLEditor.java
        WXmlElementEditor.java
        WXmlNavigationBar.java
        xml
        CapoXPathFunction.java
        CapoXPathFunctionResolver.java
        XMLDiff.java
        XMLProcessor.java
        XMLProcessorProvider.java
        XMLStreamProcessor.java
        XPath.java
        XPathFunctionProcessor.java
        XPathFunctionProvider.java
        XPathFunctionUtility.java
        cdom
        CAttr.java
        CComment.java
        CDOMEvent.java
        CDOMEventListener.java
        CDOMHandler.java
        CDOMImplementation.java
        CDocument.java
        CDocumentBuilder.java
        CDocumentBuilderFactory.java
        CDocumentType.java
        CElement.java
        CNamedNodeMap.java
        CNode.java
        CNodeDefinition.java
        CNodeList.java
        CNodeValidator.java
        CNodeValidator2.java
        CProcessingInstruction.java
        CText.java
        CValidationException.java
        NodeProcessor.java
        NodeValidationUtilitesFI.java
        OccurancePredicate.java
        VariableContainer.java
        VariableProcessor.java
        dom
        ResourceAttr.java
        ResourceDeclarationElement.java
        ResourceDocument.java
        ResourceDocumentBuilder.java
        ResourceElement.java
        ResourceElementResourceDescriptor.java
        ResourceElementResourceType.java
        ResourceNode.java
        ResourceText.java
  - eu
    - medsea
      - mimeutil
        MimeException.java
        MimeType.java
        MimeTypeHashSet.java
        MimeUtil.java
        MimeUtil2.java
        TextMimeDetector.java
        TextMimeType.java
        detector
        ExtensionMimeDetector.java
        InvalidMagicMimeEntryException.java
        MagicMimeEntry.java
        MagicMimeEntryOperation.java
        MagicMimeMimeDetector.java
        MatchingMagicMimeEntry.java
        MimeDetector.java
        OpendesktopMimeDetector.java
        WindowsRegistryMimeDetector.java
        handler
        TextMimeHandler.java
      - util
        EncodingGuesser.java
        StringUtil.java
        ZipJarUtil.java
- tests
  - com
    - delcyon
      - capo
        ProblemTests.java
        controller
        elements
        GroupElementTest.java
        ImportElementTest.java
        ParserElementTest.java
        ResourceElementTest.java
        RestartElementTest.java
        SnapshotElementTest.java
        SyncElementTest.java
        TaskElementTest.java
        crypto
        CertificateRequestProcessorTest.java
        datastream
        RegexFilterOutputStreamTest.java
        parsers
        GrammarParserTest.java
        TokenizerTest.java
        resourcemanager
        ResourceDescriptorTest.java
        ResourceURITest.java
        types
        ClientsResourceDescriptorTest.java
        FileResourceDescriptorTest.java
        HttpResourceDescriptorTest.java
        JcrResourceDescriptorTest.java
        JdbcResourceDescriptorTest.java
        RefResourceDescriptorTest.java
        ShellResourceDescriptorTest.java
        server
        CapoServerTest.java
        tests
        util
        ExternalTestClient.java
        ExternalTestServer.java
        TestCapoApplication.java
        TestClient.java
        TestServer.java
        Util.java
        external
        Util.java
        util
        TestInterface.java
        XMLSerializerTest.java
        XMLSerializerTestData.java
        diff
        DiffTest.java
        XMLDiffTest.java
        xml
        cdom
        CDocumentTest.java
        dom
        ResourceDocumentTest.java
        xsd
        SchemaDocumentTest.java

/*
 * Copyright 2007-2009 Medsea Business Solutions S.L.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
/**
 * <p>
 * </p>
 * @author Steven McArdle
 */
package eu.medsea.mimeutil;

import java.io.BufferedInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.UnsupportedEncodingException;
import java.net.URL;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Iterator;
import java.util.logging.Level;

import com.delcyon.capo.CapoApplication;

import eu.medsea.mimeutil.detector.MimeDetector;
import eu.medsea.mimeutil.handler.TextMimeHandler;
import eu.medsea.util.EncodingGuesser;

/**
 * This MimeDetector cannot be registered, unregistered or subclassed. It is a
 * default MimeDetector that is pre-installed into the mime-util utility and is
 * used as the FIRST MimeDetector.
 * <p>
 * You can influence this MimeDetector in several ways.
 * <ul>
 * <li>Specify a different list of preferred encodings using the static
 * TextMimeDetector.setPreferredEncodings(...) method.</li>
 * <li>Change the list of supported encodings using the static
 * EncodingGuesser.setSupportedEncodings(...) method.</li>
 * <li>Register TextMimeHandler(s) using the static
 * TextMimeDetector.registerTextMimeHandler(...) method (very, VERY powerful).</li>
 * </ul>
 * <p>
 * The TextMimeDetector.setPreferredEncodings(...) method is used to provide a
 * preferred list of encodings. The final encoding for the MimeType will be the
 * first one in this list that is also contained in the possible encodings
 * returned from the EncodingGuesser class. If none of these match then the
 * first entry in the possible encodings collection is used.
 * </p>
 * <p>
 * The EncodingGuesser.setSupportedEncodings(...) method is used to set the list
 * of encodings that will be considered when trying to guess the encoding. If
 * you provide encodings that are not supported by your JVM an error is logged
 * and the next encoding is tried. If you set this to an empty Collection then
 * you will effectively turn this MimeDetector OFF (the default). This is the
 * recommended way to disable this MimeDetector. The most common usage scenario
 * for this method is when your application is designed to support only a
 * limited set of encodings such as UTF-8 and UTF-16 encoded text files. You can
 * set the supported encodings list to this sub set of encodings and improve the
 * performance of this MimeDetector greatly.
 * </p>
 * <p>
 * The TextMimeDetector.registerTextMimeHandler(...) method can be used to
 * register special TextMimeHandler(s). These MimeHandler(s) are delegated to
 * when once valid encodings have been found for the content contained in File,
 * InputStream or byte []. The handlers can influence both the returned MimeType
 * and encoding of any matched content. For instance, the default behavior is to
 * return a MimeType of text/plain and encoding set according to the rules
 * above. The Handler(s) allow you to further process the content and decide
 * that it is in fact a text/xml or application/svg-xml or even
 * mytype/mysubtype. You can also change the assigned encoding as it may be
 * wrong for your new MimeType. For instance, if you decide the MimeType is
 * really an XML file and not just a standard text/plain file and the detector
 * calculated that the best encoding is UTF-8 but you detect and encoding
 * attribute in the XML content for ISO-8859-1, you can set this as well thus
 * returning a TextMimeType of application/xml with an encoding or ISO-8859-1
 * instead of a TextMimeType of text/plain and an encoding of UTF-8.<br/>
 * <br/>
 * IMPORTANT: Your handler(s) will only get to see and act on content that this
 * MimeDetector thinks is text in the first place. So if your restrictions on
 * supported encodings will no longer detect a file as text then your handler(s)
 * will never be called.
 * </p>
 * </p>
 * <p>
 * The methods will do their best to eliminate any binary files before trying to
 * detect an encoding. However, if a binary file contains only a few bytes of
 * data or you are very unlucky it could be mistakenly recognised as a text file
 * and processed by this MimeDetector.
 * </p>
 * <p>
 * The Collection(s) returned from the methods in this class will contain either
 * 0 or 1 MimeType entry of type TextMimeType with a mime type of "text/plain"
 * or whatever matching registered TextMimeHandler(s) decide to return. You can
 * test for matches from this MimeDetector by using the instanceof operator on
 * the Collection of returned MimeType(s) to your code (remember, the returned
 * Collection to you is the accumulated collection from ALL registered
 * MimeDetectors. You can retrieve the encoding using the getEncoding() method
 * of TextMimeType after casting the MimeType to a TextMimeType.
 * </p>
 * <p>
 * You should also remember that if this MimeDetector puts a TextMimeType into
 * the eventual Collection of MimeType(s) returned to your code of say
 * "text/plain" and one or more of the other registered MimeDetector(s) also add
 * an instance of "text/plain" in accordance with their detection rules, the
 * type will not be changed from TextMimeType to MimeType. Only the specificity
 * value of the MimeType will be increased thus improving the likelihood that
 * this MimeType will be returned from the
 * MimeUtil.getMostSpecificMimeType(Collection mimeTypes) method.
 * </p>
 * 
 * @author Steven McArdle
 */
public final class TextMimeDetector extends MimeDetector
{

	// The maximum amount of data to retrieve from a stream
	private static final int BUFFER_SIZE = 1024;

	// No text file should have 2 or more consecutive NULL values
	private static final int MAX_NULL_VALUES = 1;

	private static Collection preferredEncodings = new ArrayList();
	static
	{
		TextMimeDetector.setPreferredEncodings(new String[] { "UTF-16", "UTF-8", "ISO-8859-1", "windows-1252", "US-ASCII" });
	}

	// Registered list of TextMimeHandler(s)
	private static Collection handlers = new ArrayList();

	// Private so nobody can register one using the
	// MimeUtil.registerMimeDetector(...) method
	private TextMimeDetector()
	{
	}

	// Package scoped so that the class can still be create for use by mime-util
	// without resorting to a singleton approach
	// Could change this in the future !!!
	TextMimeDetector(int dummy)
	{
		this();
	}

	/**
	 * @see MimeDetector.getDescription()
	 */
	public String getDescription()
	{
		return "Determine if a file or stream contains a text mime type. If so then return TextMimeType with text/plain and the best guess encoding.";
	}

	/**
	 * This MimeDetector requires content so defer to the file method
	 */
	public Collection getMimeTypesFileName(String fileName) throws UnsupportedOperationException
	{
		return getMimeTypesFile(new File(fileName));
	}

	/**
	 * We only want to deal with the stream from the URL
	 * 
	 * @see MimeDetector.getMimeTypesURL(URL url)
	 */
	public Collection getMimeTypesURL(URL url) throws UnsupportedOperationException
	{

		InputStream in = null;
		try
		{
			return getMimeTypesInputStream(in = new BufferedInputStream(MimeUtil.getInputStreamForURL(url)));
		} catch (UnsupportedOperationException e)
		{
			throw e;
		} catch (Exception e)
		{
			throw new MimeException(e);
		} finally
		{
			try
			{
				in.close();
			} catch (Exception ignore)
			{
				CapoApplication.logger.log(Level.SEVERE,ignore.getLocalizedMessage());
			}
		}
	}

	/**
	 * We only want to deal with the stream for the file
	 * 
	 * @see MimeDetector.getMimeTypesURL(URL url)
	 */
	public Collection getMimeTypesFile(File file) throws UnsupportedOperationException
	{

		if (!file.exists())
		{
			throw new UnsupportedOperationException("This MimeDetector requires actual content.");
		}
		InputStream in = null;
		try
		{
			in = new BufferedInputStream(new FileInputStream(file));
			return getMimeTypesInputStream(in);
		} catch (UnsupportedOperationException e)
		{
			throw e;
		} catch (Exception e)
		{
			throw new MimeException(e);
		} finally
		{
			try
			{
				in.close();
			} catch (Exception ignore)
			{
				CapoApplication.logger.log(Level.SEVERE,ignore.getLocalizedMessage());
			}
		}
	}

	/**
	 * @see MimeDetector.getMimeTypesInputStream(InputStream in)
	 */
	public Collection getMimeTypesInputStream(InputStream in) throws UnsupportedOperationException
	{

		int offset = 0;
		int len = TextMimeDetector.BUFFER_SIZE;
		byte[] data = new byte[len];
		byte[] copy = null;
		// Mark the input stream
		in.mark(len);

		try
		{
			// Since an InputStream might return only some data (not all
			// requested), we have to read in a loop until
			// either EOF is reached or the desired number of bytes have been
			// read.
			int restBytesToRead = len;
			while (restBytesToRead > 0)
			{
				int bytesRead = in.read(data, offset, restBytesToRead);
				if (bytesRead < 0)
					break; // EOF

				offset += bytesRead;
				restBytesToRead -= bytesRead;
			}
			if (offset < len)
			{
				copy = new byte[offset];
				System.arraycopy(data, 0, copy, 0, offset);
			}
			else
			{
				copy = data;
			}
		} catch (IOException ioe)
		{
			throw new MimeException(ioe);
		} finally
		{
			try
			{
				// Reset the input stream to where it was marked.
				in.reset();
			} catch (Exception e)
			{
				throw new MimeException(e);
			}
		}
		return getMimeTypesByteArray(copy);
	}

	/**
	 * @see MimeDetector.getMimeTypesByteArray(byte [] data)
	 */
	public Collection getMimeTypesByteArray(byte[] data) throws UnsupportedOperationException
	{

		// Check if the array contains binary data
		if (EncodingGuesser.getSupportedEncodings().isEmpty() || isBinary(data))
		{
			throw new UnsupportedOperationException();
		}

		Collection mimeTypes = new ArrayList();

		Collection possibleEncodings = EncodingGuesser.getPossibleEncodings(data);

		CapoApplication.logger.fine("Possible encodings [" + possibleEncodings.size() + "] " + possibleEncodings);

		if (possibleEncodings.isEmpty())
		{
			// Is not a text file understood by this JVM
			throw new UnsupportedOperationException();
		}

		String encoding = null;
		// Iterate over the preferedEncodings array in the order defined and
		// return the first one found
		for (Iterator it = TextMimeDetector.preferredEncodings.iterator(); it.hasNext();)
		{
			encoding = (String) it.next();
			if (possibleEncodings.contains(encoding))
			{
				mimeTypes.add(new TextMimeType("text/plain", encoding));
				break;
			}
		}
		// If none of the preferred encodings were acceptable lets see if the
		// default encoding can be used.
		if (mimeTypes.isEmpty() && possibleEncodings.contains(EncodingGuesser.getDefaultEncoding()))
		{
			encoding = EncodingGuesser.getDefaultEncoding();
			mimeTypes.add(new TextMimeType("text/plain", encoding));
		}

		// If none of our preferredEncodings or the default encoding are in the
		// possible encodings list we return the first possibleEncoding;
		if (mimeTypes.isEmpty())
		{
			Iterator it = possibleEncodings.iterator();
			encoding = (String) it.next();
			mimeTypes.add(new TextMimeType("text/plain", encoding));
		}

		if (mimeTypes.isEmpty() || handlers.isEmpty())
		{
			// Nothing to handle
			return mimeTypes;
		}

		// String will be passed in as is currently in the encoding defined by
		// encoding
		try
		{
			int lengthBOM = EncodingGuesser.getLengthBOM(encoding, data);
			String content = new String(EncodingGuesser.getByteArraySubArray(data, lengthBOM, data.length - lengthBOM), encoding);
			return fireMimeHandlers(mimeTypes, content);
		} catch (UnsupportedEncodingException ignore)
		{
			// This should never, never, never happen
		}
		return mimeTypes;
	}

	/**
	 * Change the list of preferred encodings. This list is used where multiple
	 * possible encodings are identified to refer to the contents in a byte
	 * array passed in or read in from a Stream or File object. This list is
	 * iterated over in order and the first match is set as the encoding for the
	 * text/plain TextMimeType ONLY if the JVM default encoding is not in the
	 * list. If the neither the defaultEncoding or any of these preferred
	 * encodings are in the list of possible encodings then the first possible
	 * encoding will be used.
	 * 
	 * @param encodings
	 *            String array of canonical encoding names.
	 */
	public static void setPreferredEncodings(String[] encodings)
	{
		TextMimeDetector.preferredEncodings = EncodingGuesser.getValidEncodings(encodings);

		CapoApplication.logger.fine("Preferred Encodings set to " + TextMimeDetector.preferredEncodings);

	}

	/**
	 * Register a TexMimeHandler(s)
	 * 
	 * @param handler
	 *            to register
	 */
	public static void registerTextMimeHandler(TextMimeHandler handler)
	{
		handlers.add(handler);
	}

	/**
	 * Unregister a TextMimeHandler
	 * 
	 * @param handler
	 *            to unregister
	 */
	public static void unregisterTextMimeHandler(TextMimeHandler handler)
	{
		handlers.remove(handler);
	}

	/**
	 * Get the current Collection of registered TexMimeHandler(s)
	 * 
	 * @return currently registered collection of TextMimeHandler(s)
	 */
	public static Collection getRegisteredTextMimeHandlers()
	{
		return handlers;
	}

	/**
	 * Give registered TextMimeHandler(s) the opportunity to influence the
	 * actual mime type before returning from the getMimeTypesXXX(...) methods
	 * 
	 * @param mimeTypes
	 * @param content
	 * @return
	 */
	private Collection fireMimeHandlers(Collection mimeTypes, String content)
	{
		// We only have one entry in the mimeTypes Collection due to the way
		// this MimeDetector works.
		TextMimeType mimeType = (TextMimeType) mimeTypes.iterator().next();

		for (Iterator it = handlers.iterator(); it.hasNext();)
		{
			TextMimeHandler tmh = (TextMimeHandler) it.next();
			if (tmh.handle(mimeType, content))
			{
				// The first handler to return true will short circuit the rest
				// of the handlers
				break;
			}
		}
		return mimeTypes;
	}

	/*
	 * This is a quick check for the byte array to see if it contains binary
	 * data. As no known text encoding can have more than MAX_NULL_VALUES
	 * consecutive null values the method does a quick and dirty elimination of
	 * what are probably binary files but should never eliminate possible text
	 * files. It is possible that some binary files will not have
	 * MAX_NULL_VALUES consecutive byte values especially if it's a small file
	 * and will slip through here. Later tests should eliminate these. We will
	 * modify this method to include other known sequences as and when we
	 * discover them
	 */
	private boolean isBinary(byte[] data)
	{

		int negCount = 0;

		for (int i = 0; i < data.length; i++)
		{
			if (data[i] == 0)
			{
				negCount++;
			}
			else
			{
				negCount = 0;
			}
			if (negCount == MAX_NULL_VALUES)
			{
				return true;
			}
		}
		return false;
	}
}