CodepageProcessor.java example

Explorer

common_utils-master
- baidu_api
  - src
    - main
      - java
        com
        xiongyingqi
        utils
        baidu
        CommonUtil.java
        ip
        IpAddress.java
        vo
        ContentVo.java
        IpAddressVo.java
        Status.java
        map
        AddressMatcher.java
    - test
      - java
        com
        xiongyingqi
        utils
        baidu
        IpAddressTest.java
- common_code
  - src
    - main
      - java
        com
        xiongyingqi
        utils
        code
        CodeAnnotation.java
        CodeBuilder.java
        CodeCondition.java
        CodeHelper.java
        CodeProcessor.java
    - test
      - java
        com
        xiongyingqi
        utils
        code
        CodeBuilderTest.java
- common_helper
  - src
    - main
      - java
        com
        xiongyingqi
        calendar
        CalendarBuilder.java
        captcha
        background
        BackgroundFactory.java
        SingleColorBackgroundFactory.java
        color
        ColorFactory.java
        GradientColorFactory.java
        RandomColorFactory.java
        SingleColorFactory.java
        filter
        AbstractFilterFactory.java
        ConfigurableFilterFactory.java
        FilterFactory.java
        library
        AbstractConvolveImageOp.java
        AbstractImageOp.java
        AbstractTransformImageOp.java
        BlurImageOp.java
        CurvesImageOp.java
        DiffuseImageOp.java
        DoubleRippleImageOp.java
        MarbleImageOp.java
        PerlinNoise.java
        RippleImageOp.java
        SoftenImageOp.java
        WobbleImageOp.java
        predefined
        CurvesRippleFilterFactory.java
        DiffuseRippleFilterFactory.java
        DoubleRippleFilterFactory.java
        MarbleRippleFilterFactory.java
        RippleFilterFactory.java
        WobbleRippleFilterFactory.java
        font
        FontFactory.java
        RandomFontFactory.java
        service
        AbstractCaptchaService.java
        Captcha.java
        CaptchaService.java
        ConfigurableCaptchaService.java
        SimpleCaptchaService.java
        text
        renderer
        AbstractTextRenderer.java
        BestFitTextRenderer.java
        RandomYBestFitTextRenderer.java
        SimpleTextRenderer.java
        TextCharacter.java
        TextRenderer.java
        TextString.java
        utils
        encoder
        EncoderHelper.java
        word
        AdaptiveRandomWordFactory.java
        RandomWordFactory.java
        WordFactory.java
        jackson
        FilterPropertyHandler.java
        annotation
        AllowProperty.java
        IgnoreProperties.java
        IgnoreProperty.java
        helper
        ThreadJacksonMixInHolder.java
        impl
        Jackson1JavassistFilterPropertyHandler.java
        JavassistFilterPropertyHandler.java
        JavassistTest.java
        util
        AlternativeJdkIdGenerator.java
        AnnotationHelper.java
        AntPathMatcher.java
        AopTargetUtils.java
        Assert.java
        AutoPopulatingList.java
        Base64.java
        ByteHelper.java
        CRCHelper.java
        CalendarHelper.java
        ClassHelper.java
        ClassLookupHelper.java
        ClassUtils.java
        ClipBoardHelper.java
        CollectionHelper.java
        CollectionUtils.java
        CommonsLogWriter.java
        ComparatorHelper.java
        CompositeIterator.java
        ConcurrencyThrottleSupport.java
        ConcurrentReferenceHashMap.java
        ConsoleHelper.java
        CosineSimilarAlgorithm.java
        CustomizableThreadCreator.java
        DateHelper.java
        DefaultPropertiesPersister.java
        DigestUtils.java
        DomainHelper.java
        DynamicPassword.java
        EPlatform.java
        EntityHelper.java
        ErrorHandler.java
        FileCopyUtils.java
        FileHelper.java
        FileSystemUtils.java
        FileType.java
        GenericHelper.java
        GetDocumentPathDemo.java
        IdGenerator.java
        IdentityCardHelper.java
        InvalidMimeTypeException.java
        JsonHelper.java
        Judgment.java
        KeyObject.java
        KingrayResource.java
        LinkedCaseInsensitiveMap.java
        LinkedMultiValueMap.java
        Log4jConfigurer.java
        MD5Crypt.java
        MD5Helper.java
        MessageEncryptionHelper.java
        MethodInvoker.java
        MimeType.java
        MimeTypeUtils.java
        MultiValueMap.java
        MyClassLoader.java
        NumberHelper.java
        NumberUtils.java
        ObjectUtils.java
        PackageUtil.java
        PathMatcher.java
        PatternMatchUtils.java
        PhoneNumberAddress.java
        PrintHelper.java
        PropertiesHelper.java
        PropertiesPersister.java
        PropertyPlaceholderHelper.java
        ReflectHelper.java
        ReflectionUtils.java
        RegexHelper.java
        ResourceUtils.java
        RsaUtils.java
        SerializationUtils.java
        SerializeHelper.java
        ServletHelper.java
        SocketUtils.java
        StackTraceHelper.java
        StopWatch.java
        StreamUtils.java
        StringHelper.java
        StringUtil.java
        StringUtils.java
        StringValueResolver.java
        SystemPropertyUtils.java
        Test.java
        ThreadHelper.java
        ThreadPool.java
        ThreadPoolException.java
        TimerHelper.java
        TypeUtils.java
        UnicodeHelper.java
        WeakReferenceMonitor.java
        WinRegisterDemo.java
        WindowsHelper.java
        comparator
        BooleanComparator.java
        ComparableComparator.java
        CompoundComparator.java
        InstanceComparator.java
        InvertibleComparator.java
        NullSafeComparator.java
        StringComparator.java
        package-info.java
        concurrent
        FutureAdapter.java
        ListenableFuture.java
        ListenableFutureAdapter.java
        ListenableFutureCallback.java
        ListenableFutureCallbackRegistry.java
        ListenableFutureTask.java
        package-info.java
        package-info.java
        xml
        AbstractStaxContentHandler.java
        AbstractStaxXMLReader.java
        AbstractXMLReader.java
        AbstractXMLStreamReader.java
        DomContentHandler.java
        DomUtils.java
        SimpleNamespaceContext.java
        SimpleSaxErrorHandler.java
        SimpleTransformErrorListener.java
        StaxEventContentHandler.java
        StaxEventXMLReader.java
        StaxResult.java
        StaxSource.java
        StaxStreamContentHandler.java
        StaxStreamXMLReader.java
        StaxUtils.java
        TransformerUtils.java
        XMLEventStreamReader.java
        XMLEventStreamWriter.java
        XmlValidationModeDetector.java
        package-info.java
    - test
      - java
        AtomicTest.java
        LinkList.java
        LongTest.java
        UserAuthentication.java
        com
        xiongyingqi
        calendar
        CalendarBuilderTest.java
        jackson
        JsonFilterPropertyTest.java
        pojo
        Group.java
        User.java
        util
        AbstractImplements.java
        ClassHelperTest.java
        ConsoleHelperTest.java
        DateHelperTest.java
        EntityHelperTest.java
        FileHelperTest.java
        KeyObjectTest.java
        PrintHelperTest.java
        PropertiesHelperTest.java
        SubClass.java
        TestInterface.java
- common_http
  - src
    - main
      - java
        com
        xiongyingqi
        http
        BuildNameValuePairsHelper.java
        HttpAccess.java
        HttpBuilder.java
    - test
      - java
        Stock.java
- common_log
  - src
    - main
      - java
        com
        xiongyingqi
        Logger.java
  - test
    - java
      - com
        xiongyingqi
        LoggerTest.java
- common_thread
  - src
    - main
      - java
        com
        xiongyingqi
        utils
        thead
        ThreadPool.java
        ThreadPoolException.java
- commons-file
  - src
    - main
      - java
        com
        xiongyingqi
        util
        FileEncode.java
        cpdetector
        io
        parser
        EncodingLexer.java
        EncodingParser.java
        EncodingParserTokenTypes.java
        info
        monitorenter
        cpdetector
        ACmdLineArgsInheritor.java
        CharsetPrinter.java
        CodepageProcessor.java
        io
        ASCIIDetector.java
        AbstractCodepageDetector.java
        ByteOrderMarkDetector.java
        ClassFileFilterIsA.java
        CodepageDetectorProxy.java
        FileFilterExtensions.java
        HTMLCodepageDetector.java
        IClassFileFilter.java
        ICodepageDetector.java
        InputStreamDebug.java
        JChardetFacade.java
        JarArchive.java
        ParsingDetector.java
        UnicodeDetector.java
        UnknownCharset.java
        UnsupportedCharset.java
        parser
        EncodingLexer.java
        EncodingParser.java
        EncodingParserTokenTypes.java
        reflect
        SingletonLoader.java
        test
        ui
        ClassFileChooser.java
        FitAuthoringClient.java
        util
        collections
        ITreeNode.java
        TreeNodeUniqueChildren.java
        ui
        ITableRenderer.java
        StreamTableRenderer.java
        TableRendererHTML.java
        io
        LimitedInputStream.java
        MultiplexingOutputStream.java
        util
        Entry.java
        ExceptionUtil.java
        FileUtil.java
        StringUtil.java
        jargs
        gnu
        CmdLineParser.java
    - test
      - java
        FileHelperTest.java
- commons-ip
  - src
    - main
      - java
        com
        xiongyingqi
        ip
        IpHelper.java
        vo
        IpVo.java
- commons-logic
  - src
    - main
      - java
        com
        xiongyingqi
        logic
        ConditionalOperation.java
        action
        Action.java
        condition
        And.java
        Condition.java
        Logic.java
        Not.java
        Or.java
    - test
      - java
        com
        xiongyingqi
        logic
        ConditionalOperationTest.java
- commons-scan
  - src
    - main
      - java
        com
        xiongyingqi
        util
        ClassLookupHelper.java
        MethodScanner.java
        PackageScanner.java
    - test
      - java
        com
        xiongyingqi
        util
        AbstractImplements.java
        PackageScannerTest.java
        SubClass.java
        TestInterface.java
- commons-spring
  - src
    - main
      - java
        com
        xiongyingqi
        util
        SpringMVCHelper.java
- qrcode
  - src
    - main
      - java
        com
        xiongyingqi
        qrcode
        Profile.java
        QRCode.java
        QRCodeGenerator.java
    - test
      - java
        com
        xiongyingqi
        qrcode
        QRCodeGeneratorTest.java
        QRCodeTest.java
- spring-email
  - src
    - main
      - java
        com
        xiongyingqi
        email
        JavaMailSenderFactory.java
        service
        IEmailService.java
        impl
        EmailService.java
        vo
        AttachmentVo.java
        EmailAccount.java
        EmailVo.java
        InlineImageVo.java
    - test
      - java
        EmailBaseTest.java
        EmailServiceTest.java

/*
 * CodepageProcessor.java an executable command line interface 
 * for batch processing files with cpdetector.
 *
 * Copyright (C) Achim Westermann, created on 02.06.2004, 09:02:19.
 *  
 * ***** BEGIN LICENSE BLOCK *****
 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
 * 
 * The contents of this collection are subject to the Mozilla Public License Version 
 * 1.1 (the "License"); you may not use this file except in compliance with 
 * the License. You may obtain a copy of the License at 
 * http://www.mozilla.org/MPL/
 * 
 * Software distributed under the License is distributed on an "AS IS" basis,
 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
 * for the specific language governing rights and limitations under the
 * License.
 * 
 * The Original Code is the cpDetector code in [sub] packages info.monitorenter and 
 * cpdetector. 
 * 
 * The Initial Developer of the Original Code is
 * Achim Westermann <achim.westermann@gmx.de>.
 * 
 * Portions created by the Initial Developer are Copyright (c) 2007 
 * the Initial Developer. All Rights Reserved.
 * 
 * Contributor(s):
 * 
 * Alternatively, the contents of this file may be used under the terms of
 * either the GNU General Public License Version 2 or later (the "GPL"), or
 * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
 * in which case the provisions of the GPL or the LGPL are applicable instead
 * of those above. If you wish to allow use of your version of this file only
 * under the terms of either the GPL or the LGPL, and not to allow others to
 * use your version of this file under the terms of the MPL, indicate your
 * decision by deleting the provisions above and replace them with the notice
 * and other provisions required by the GPL or the LGPL. If you do not delete
 * the provisions above, a recipient may use your version of this file under
 * the terms of any one of the MPL, the GPL or the LGPL.
 * 
 * ***** END LICENSE BLOCK ***** * 
 */
/*
 * 
 *  This program is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 2 of the License, or
 *  (at your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with this program; if not, write to the Free Software
 *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 *
 *  If you modify or optimize the code in a useful way please let me know.
 *  Achim.Westermann@gmx.de
 *
 */
package info.monitorenter.cpdetector;

import info.monitorenter.cpdetector.io.CodepageDetectorProxy;
import info.monitorenter.cpdetector.io.FileFilterExtensions;
import info.monitorenter.cpdetector.io.ICodepageDetector;
import info.monitorenter.cpdetector.io.JChardetFacade;
import info.monitorenter.cpdetector.io.ParsingDetector;
import info.monitorenter.cpdetector.io.UnknownCharset;
import info.monitorenter.cpdetector.reflect.SingletonLoader;
import info.monitorenter.util.FileUtil;
import jargs.gnu.CmdLineParser;

import java.io.BufferedInputStream;
import java.io.BufferedOutputStream;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileFilter;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.Reader;
import java.io.Writer;
import java.nio.charset.Charset;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.MissingResourceException;
import java.util.Set;
import java.util.SortedMap;
import java.util.StringTokenizer;


public class CodepageProcessor
    extends ACmdLineArgsInheritor {

  
  protected File collectionRoot = null;

  
  protected CodepageDetectorProxy detector;

  
  private Charset[] parseCodepages;

  private static String fileseparator = System.getProperty("file.separator");

  
  private FileFilter extensionFilter;

  
  private File outputDir;

  
  private boolean moveUnknown = false;

  
  private boolean printCharsets = false;

  private boolean verbose = false;

  
  private long wait = 0;

  
  private Charset targetCodepage = null;

  
  private static char[] transcodeBuffer = new char[1024];

  
  private static byte[] rawtransportBuffer = new byte[1024];

  public CodepageProcessor() {
    super();
    this.detector = CodepageDetectorProxy.getInstance();
    // adding the options:
    this.addCmdLineOption("documents", new CmdLineParser.Option.StringOption('r', "documents"));
    this.addCmdLineOption("extensions", new CmdLineParser.Option.StringOption('e', "extensions"));
    this.addCmdLineOption("outputDir", new CmdLineParser.Option.StringOption('o', "outputDir"));
    this.addCmdLineOption("moveUnknown", new CmdLineParser.Option.BooleanOption('m', "moveUnknown"));
    this.addCmdLineOption("verbose", new CmdLineParser.Option.BooleanOption('v', "verbose"));
    this.addCmdLineOption("wait", new CmdLineParser.Option.IntegerOption('w', "wait"));
    this.addCmdLineOption("transform", new CmdLineParser.Option.StringOption('t', "transform"));
    this.addCmdLineOption("detectors", new CmdLineParser.Option.StringOption('d', "detectors"));
    this.addCmdLineOption("charsets", new CmdLineParser.Option.BooleanOption('c', "charsets"));
  }

  public void parseArgs(String[] cmdLineArgs) throws Exception {
    // Has to be first call!!
    super.parseArgs(cmdLineArgs);
    Object collectionOption = this.getParsedCmdLineOption("documents");
    Object extensionsOption = this.getParsedCmdLineOption("extensions");
    Object outputDirOption = this.getParsedCmdLineOption("outputDir");
    Object moveUnknownOption = this.getParsedCmdLineOption("moveUnknown");
    Object verboseOption = this.getParsedCmdLineOption("verbose");
    Object waitOption = this.getParsedCmdLineOption("wait");
    Object transformOption = this.getParsedCmdLineOption("transform");
    Object detectorOption = this.getParsedCmdLineOption("detectors");
    Object charsetsOption = this.getParsedCmdLineOption("charsets");

    if (charsetsOption != null) {
      this.printCharsets = ((Boolean) charsetsOption).booleanValue();
    } else {

      if (collectionOption == null) {
        usage();
        throw new MissingResourceException("Parameter for collection root directory is missing.",
            "String", "-r");
      }
      this.collectionRoot = new File(collectionOption.toString());
      if (outputDirOption == null) {
        usage();
        throw new MissingResourceException("Parameter for output directory is missing.", "String",
            "-o");
      }
      this.outputDir = new File(outputDirOption.toString());
      if (extensionsOption != null) {
        this.extensionFilter = new FileFilterExtensions(this.parseCSVList(extensionsOption
            .toString()));
      } else {
        // Anonymous dummy:
        this.extensionFilter = new FileFilter() {
          public boolean accept(File f) {
            return true;
          }
        };
      }
      if (moveUnknownOption != null) {
        this.moveUnknown = true;
      }
      if (verboseOption != null) {
        if (((Boolean) verboseOption).booleanValue()) {
          this.verbose = true;
        }
      }
      if (waitOption != null) {
        this.wait = ((Integer) waitOption).intValue() * 1000;
      }
      if (transformOption != null) {
        String charset = (String) transformOption;
        try {
          this.targetCodepage = Charset.forName(charset);
        } catch (Exception e) {
          StringBuffer msg = new StringBuffer();
          msg.append("Given charset name: \"");
          msg.append(charset);
          msg.append("\" for option -t is illegal: \n");
          msg.append("  ");
          msg.append(e.getMessage());
          msg.append("\n");
          msg.append("   Legal values are: \n");
          for (int i = 0; i < parseCodepages.length; i++) {
            msg.append("    ");
            msg.append(parseCodepages[i].name());
            msg.append("\n");
          }
          throw new IllegalArgumentException(msg.toString());
        }
      }
      if (detectorOption != null) {
        String[] detectors = this.parseCSVList((String) detectorOption);
        if (detectors.length == 0) {
          StringBuffer msg = new StringBuffer();
          msg.append("You specified the codepage detector argument \"-d\" but ommited any comma-separated fully qualified class-name.");
          throw new IllegalArgumentException(msg.toString());
        }

        // try to instantiate and cast:
        ICodepageDetector cpDetector = null;
        for (int i = 0; i < detectors.length; i++) {
          try {
            cpDetector = (ICodepageDetector) SingletonLoader.getInstance()
                .newInstance(detectors[i]);
            if (cpDetector != null) {
              this.detector.add(cpDetector);
            }
          } catch (InstantiationException ie) {
            System.err.println("Could not instantiate custom ICodepageDetector: " + detectors[i]
                + " (argument \"-c\"): " + ie.getMessage());
          }

        }
      }
      // default detector initialization:
      else {
        this.detector.add(new ParsingDetector(this.verbose));
        this.detector.add(JChardetFacade.getInstance());
      }
      this.loadCodepages();
    }
  }

  private void printCharsets() {
    if (this.parseCodepages == null || this.parseCodepages.length == 0) {
      this.loadCodepages();
    }
    Charset cs;
    for (int i = 0; i < this.parseCodepages.length; i++) {
      cs = this.parseCodepages[i];
      System.out.println("  " + cs.name() + ":");
      Set<String> aliases = cs.aliases();
      Iterator<String> itAliases = aliases.iterator();
      while (itAliases.hasNext()) {
        System.out.println("    " + itAliases.next());
      }
    }
  }

  private final String[] parseCSVList(String listLiteral) {
    if (listLiteral == null) {
      return null; // bounce bad callee.
    }
    List<String> tmpList = new LinkedList<String>();
    StringTokenizer tok = new StringTokenizer(listLiteral, ";,");
    while (tok.hasMoreElements()) {
      tmpList.add(tok.nextToken());
    }
    return (String[]) tmpList.toArray(new String[tmpList.size()]);
  }

  
  private void processRecursive(File f) throws Exception {
    if (f == null) {
      throw new IllegalArgumentException("File argument is null!");
    }
    if (!f.exists()) {
      throw new IllegalArgumentException(f.getAbsolutePath() + " does not exist.");
    }
    if (f.isDirectory()) {
      File[] childs = f.listFiles();
      for (int i = childs.length - 1; i >= 0; i--) {
        processRecursive(childs[i]);
      }
    } else if (this.extensionFilter.accept(f)) {
      this.process(f);
    }
  }

  public final void process() throws Exception {
    if (this.printCharsets) {
      this.printCharsets();
    } else {
      this.verifyFiles();
      this.describe();
      this.processRecursive(this.collectionRoot);
    }
    System.out.println("No exceptional program flow occured!");
  }

  
  protected void verifyFiles() throws IllegalArgumentException {
    StringBuffer msg = new StringBuffer();
    /*
     * Manual copy and paste for two file members. Not beautiful but formal correctness (all cases);
     */

    // collectionRoot:
    if (this.collectionRoot == null) {
      msg.append("-> Collection root directory is null!\n");
    } else {
      if (!this.collectionRoot.exists()) {
        msg.append("-> Collection root directory:\"");
        msg.append(this.collectionRoot.getAbsolutePath());
        msg.append("\" does not exist!\n");
      }
    }
    if (this.outputDir == null) {
      msg.append("-> Output directory is null!\n");
    } else {
      this.outputDir.mkdirs();
      if (!this.outputDir.isDirectory()) {
        msg.append("-> Output directory has to be a directory, no File!\n");
      }
    }
    // Uhmmkeh or not:
    if (msg.length() > 0) {
      throw new IllegalArgumentException(msg.toString());
    } else {
      System.out.println("All parameters are valid.");
    }
  }

  
  private void process(File document) throws Exception {
    Charset charset = null;
    try {
      Thread.sleep(this.wait);
    } catch (InterruptedException e) {
      // nop
    }
    Map.Entry filenameFinder;
    String prefix; // the path between this.collectionRoot and the file.
    File target;

    filenameFinder = FileUtil.cutDirectoryInformation(document.getAbsolutePath());
    prefix = document.getAbsolutePath();
    int stop = prefix.lastIndexOf(fileseparator);
    int start = this.collectionRoot.getAbsolutePath().length();
    if (start > stop) {
      prefix = "";
    } else {
      prefix = prefix.substring(this.collectionRoot.getAbsolutePath().length(), stop + 1);
    }
    if (this.verbose) {
      System.out.println("Processing document: " + prefix + "/" + filenameFinder.getValue());
    }
    charset = this.detector.detectCodepage(document.toURL());

    if ((charset == null) || (charset == UnknownCharset.getInstance())) {
      if (this.verbose) {
        System.out.println("  Charset not detected.");
      }
      if (!this.moveUnknown) {
        if (this.verbose) {
          System.out.println("  Dropping document.");
        }
        return;
      } else {
        // fake charset for name construction:
        charset = UnknownCharset.getInstance();
      }
    }

    if ((this.targetCodepage != null) && (charset != null)
        && (UnknownCharset.getInstance() != charset)) {

      // transform it:
      if (prefix.length() > 0) {
        target = new File(this.outputDir.getAbsolutePath() + "/" + this.targetCodepage.name() + "/"
            + prefix + "/");
      } else {
        target = new File(this.outputDir.getAbsolutePath() + "/" + this.targetCodepage.name() + "/");
      }
      if (target.mkdirs()) {
        if (this.verbose) {
          System.out.println("  Created directory : " + target.getAbsolutePath());
        }
      }
      target = new File(target.getAbsolutePath() + "/" + filenameFinder.getValue());
      if (this.verbose) {
        System.out.println("  Moving to \"" + target.getAbsolutePath() + "\".");
      }
      if (target.exists() && target.length() == document.length()) {
        if (this.verbose) {
          System.out.println("  File already exists and has same size. Skipping move.");
        }
      } else {
        target.createNewFile();
        Reader in = new BufferedReader(
            new InputStreamReader(new FileInputStream(document), charset));
        Writer out = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(target),
            this.targetCodepage));

        // da flow
        int toRead = transcodeBuffer.length;
        int len;
        while ((len = in.read(transcodeBuffer, 0, toRead)) != -1) {
          out.write(transcodeBuffer, 0, len);
        }
        in.close();
        out.close();
      }
    } else {
      if (this.targetCodepage != null) {
        System.out.println("Skipping transformation of document " + document.getAbsolutePath()
            + " because it's charset could not be detected.");
      }
      if (prefix.length() > 0) {
        target = new File(this.outputDir.getAbsolutePath() + "/" + charset.name().toLowerCase()
            + "/" + prefix + "/");
      } else {
        target = new File(this.outputDir.getAbsolutePath() + "/" + charset.name().toLowerCase()
            + "/");
      }
      if (target.mkdirs()) {
        if (this.verbose) {
          System.out.println("Created directory : " + target.getAbsolutePath());
        }
      }
      target = new File(target.getAbsolutePath() + "/" + filenameFinder.getValue());
      if (this.verbose) {

        System.out.println("  Moving to \"" + target.getAbsolutePath() + "\".");
      }
      this.rawCopy(document, target);
    }
  }

  private void rawCopy(File from, File to) throws IOException {
    if (to.exists()) {
      if (from.length() == to.length()) {
        return;
      }
    } else {
      to.createNewFile();
    }
    /*
     * Target existed and had the same length : skip: Target existed and had a different length:
     * overwrite target. Target did not exist: Create it first before writing.
     */
    InputStream in = new BufferedInputStream(new FileInputStream(from));
    OutputStream out = new BufferedOutputStream(new FileOutputStream(to));

    // da flow
    int toRead = rawtransportBuffer.length;
    int len;
    while ((len = in.read(rawtransportBuffer, 0, toRead)) != -1) {
      out.write(rawtransportBuffer, 0, len);
    }
    in.close();
    out.close();
  }

  protected void describe() {
    StringBuffer msg = new StringBuffer();
    msg.append("Setup:\n");
    msg.append("  Collection-Root        : ");
    msg.append(this.collectionRoot.getAbsolutePath());
    msg.append("\n");
    msg.append("  Output-Dir             : ");
    msg.append(this.outputDir.getAbsolutePath());
    msg.append("\n");
    msg.append("  Move unknown           : ");
    msg.append(this.moveUnknown);
    msg.append("\n");
    msg.append("  verbose                : ");
    msg.append(this.verbose);
    msg.append("\n");
    msg.append("  wait                   : ");
    msg.append(this.wait);
    msg.append("\n");
    if (this.targetCodepage != null) {
      msg.append("  transform to codepage  : ");
      msg.append(this.targetCodepage.name());
      msg.append("\n");
    }
    msg.append("  detection algorithm    : ");
    msg.append("\n");
    msg.append(this.detector.toString());
    System.out.println(msg.toString());
  }

  
  protected void usage() {
    StringBuffer tmp = new StringBuffer();

    tmp.append("usage: java -cp jargs-1.0.jar")
        .append(File.separatorChar)
        .append("cpdetector_1.0.9.jar")
        .append(File.pathSeparatorChar)
        .append("antlr-2.7.4.jar")
        .append(File.pathSeparatorChar)
        .append(
            "chardet.jar info.monitorenter.cpdetector.CodepageProcessor -r <testdocumentdir> -o <testoutputdir> [options]");
    tmp.append("\n");
    tmp.append("options: \n");
    tmp.append("\n  Optional:\n");
    tmp.append("  -c              : Only print available charsets on this system.\n");
    tmp.append("  -e <extensions> : A comma- or semicolon- separated string for document extensions like \"-e txt,dat\" (without dot or space!).\n");
    tmp.append("  -m              : Move files with unknown charset to directory \"unknown\".\n");
    tmp.append("  -v              : Verbose output.\n");
    tmp.append("  -w <int>        : Wait <int> seconds before trying next document (good, if you want to work on the very same machine).\n");
    tmp.append("  -t <charset>    : Try to transform the document to given charset (codepage) name. \n");
    tmp.append("                    This is only possible for documents that are detected to have a  \n");
    tmp.append("                    codepage that is supported by the current java VM. If not possible \n");
    tmp.append("                    sorting will be done as normal. \n");
    tmp.append("  -d              : Semicolon-separated list of fully qualified classnames. \n");
    tmp.append("                    These classes will be casted to ICodepageDetector instances \n");
    tmp.append("                    and used in the order specified.\n");
    tmp.append("                    If this argument is ommited, a HTMLCodepageDetector followed by .\n");
    tmp.append("                    a JChardetFacade is used by default.\n");
    tmp.append("  Mandatory (if no -c option given) :\n");
    tmp.append("  -r            : Root directory containing the collection (recursive).\n");
    tmp.append("  -o            : Output directory containing the sorted collection.\n");
    System.out.print(tmp.toString());
  }

  void loadCodepages() {
    SortedMap<String, Charset> charSets = Charset.availableCharsets();
    Iterator<Map.Entry<String, Charset>> csIt = charSets.entrySet().iterator();
    Map.Entry<String, Charset> entry;
    Iterator<String> aliasIt;
    Set<String> aliases;
    Charset cs;
    if (this.verbose) {
      System.out.println("Loading system codepages...");
    }
    this.parseCodepages = new Charset[charSets.size()];
    int index = 0;
    while (csIt.hasNext()) {
      entry = csIt.next();
      cs = (Charset) entry.getValue();
      if (this.verbose) {
        System.out.println("Charset: " + cs.name());
        aliases = cs.aliases();
        System.out.println("  Aliases: ");
        aliasIt = aliases.iterator();
        while (aliasIt.hasNext()) {
          System.out.println("    " + aliasIt.next().toString());
        }
      }
      this.parseCodepages[index] = cs;
      index++;
    }
  }

  
  public static void main(String[] args) throws Exception {
    CodepageProcessor sorter = new CodepageProcessor();
      sorter.parseArgs(args);
      sorter.process();
  }
}