/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.cloudera.knittingboar.utils;
import java.io.BufferedReader;
import java.io.File;
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.URL;
import org.apache.commons.io.FileUtils;
public class DataUtils {
private static File twentyNewsGroups;
private static final String TWENTY_NEWS_GROUP_LOCAL_DIR = "knittingboar-20news";
private static final String TWENTY_NEWS_GROUP_TAR_URL = "http://people.csail.mit.edu/jrennie/20Newsgroups/20news-bydate.tar.gz";
private static final String TWENTY_NEWS_GROUP_TAR_FILE_NAME = "20news-bydate.tar.gz";
public static String get20NewsgroupsLocalDataLocation() {
File tmpDir = new File("/tmp");
if(!tmpDir.isDirectory()) {
tmpDir = new File(System.getProperty("java.io.tmpdir"));
}
File baseDir = new File(tmpDir, TWENTY_NEWS_GROUP_LOCAL_DIR);
return baseDir.toString();
}
public static synchronized File getTwentyNewsGroupDir() throws IOException {
if(twentyNewsGroups != null) {
return twentyNewsGroups;
}
// mac gives unique tmp each run and we want to store this persist
// this data across restarts
File tmpDir = new File("/tmp");
if(!tmpDir.isDirectory()) {
tmpDir = new File(System.getProperty("java.io.tmpdir"));
}
File baseDir = new File(tmpDir, TWENTY_NEWS_GROUP_LOCAL_DIR);
if(!(baseDir.isDirectory() || baseDir.mkdir())) {
throw new IOException("Could not mkdir " + baseDir);
}
File tarFile = new File(baseDir, TWENTY_NEWS_GROUP_TAR_FILE_NAME);
if(!tarFile.isFile()) {
FileUtils.copyURLToFile(new URL(TWENTY_NEWS_GROUP_TAR_URL), tarFile);
}
Process p = Runtime.getRuntime().exec(String.format("tar -C %s -xvf %s",
baseDir.getAbsolutePath(), tarFile.getAbsolutePath()));
BufferedReader stdError = new BufferedReader(new
InputStreamReader(p.getErrorStream()));
System.out.println("Here is the standard error of the command (if any):\n");
String s;
while ((s = stdError.readLine()) != null) {
System.out.println(s);
}
stdError.close();
twentyNewsGroups = baseDir;
return twentyNewsGroups;
}
}