/*
* Created on Feb 8, 2006
*
* TODO To change the template for this generated file go to
* Window - Preferences - Java - Code Style - Code Templates
*/
package org.mindswap.swoop.automation;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.FileReader;
import java.io.FileWriter;
import java.util.StringTokenizer;
/**
* @author Dave Wang
*
* TODO To change the template for this generated type comment go to
* Window - Preferences - Java - Code Style - Code Templates
*/
public class PostProcessor
{
public static final String FILE_OWL = "OWL";
public static final String FILE_DAML = "DAML";
public static final String FILE_RDF = "RDF";
public static final String FILE_RDFS = "RDFS";
public static final String FILE_NONE = "NONE";
public static void removeWordNetStats( String input, String output, String wordnets )
{
try
{
BufferedReader reader = new BufferedReader( new FileReader( input ) );
BufferedWriter writer = new BufferedWriter( new FileWriter( output ) );
BufferedWriter collector = new BufferedWriter( new FileWriter( wordnets) );
String line = null;
while ( (line = reader.readLine()) != null )
{
if ( line.startsWith("http://xmlns.com/wordnet/1.6/") )
{
collector.write( line );
collector.newLine();
continue;
}
writer.write( line );
writer.newLine();
}
writer.flush();
collector.close();
reader.close();
writer.close();
collector.close();
}
catch ( Exception e )
{
e.printStackTrace();
}
}
public static void removeW3CJENATestStats( String input, String output, String discardedfile )
{
try
{
BufferedReader reader = new BufferedReader( new FileReader( input ) );
// writes data that we want to keep
BufferedWriter writer = new BufferedWriter( new FileWriter( output ) );
// collects stats to 'throw away'
BufferedWriter collector = new BufferedWriter( new FileWriter( discardedfile ) );
String line = null;
while ( (line = reader.readLine()) != null )
{
if (( line.indexOf("http://cvs.sourceforge.net/viewcvs.py/jena/jena2/testing/") != -1 ) ||
( line.indexOf("www.w3.org/2002") != -1 ) ||
( line.indexOf("http://lists.w3.org/Archives/Public") != -1 ) ||
( line.indexOf("http://web3.w3.org/2002") != -1 ) )
{
collector.write( line );
collector.newLine();
continue;
}
writer.write( line );
writer.newLine();
}
writer.flush();
collector.close();
reader.close();
writer.close();
collector.close();
}
catch ( Exception e )
{
e.printStackTrace();
}
}
/* Input phyURIs is a filename containing only the list of physicalURIs
* output contains a column of the physicalURIs and the best guessed filetype
*
*/
public static void extractFileType( String phyURIs, String output )
{
try
{
BufferedReader reader = new BufferedReader( new FileReader( phyURIs) );
BufferedWriter writer = new BufferedWriter( new FileWriter( output ) );
String line = "";
while ( (line = reader.readLine()) != null )
{
String type = FILE_NONE;
line = line.trim();
line = line.toLowerCase();
if ( line.endsWith(".owl") )
type = FILE_OWL;
else if ( line.endsWith(".rdf") )
type = FILE_RDF;
else if ( line.endsWith(".daml") )
type = FILE_DAML;
else if ( line.endsWith(".rdfs") )
type = FILE_RDFS;
else if ( line.indexOf(".owl") != -1 )
type = FILE_OWL;
else if ( line.indexOf(".rdf") != -1 )
type = FILE_RDF;
else if ( line.indexOf(".daml") != -1 )
type = FILE_DAML;
else if ( line.indexOf(".rdfs") != -1 )
type = FILE_RDFS;
else
type = FILE_NONE;
writer.write( line + "\t" + type );
writer.newLine();
writer.flush();
}
writer.close();
}
catch ( Exception e )
{
e.printStackTrace();
}
}
/* Input phyuriExpresivity is a filename containing the list of physical URIs and Expressivity
* output contains a column of the physical URIs, fixed Expressivity, and a rank based
* on complexity results.
*/
public static void fixNrankExpressivity( String uriExpressivity, String output )
{
try
{
BufferedReader reader = new BufferedReader( new FileReader( uriExpressivity) );
BufferedWriter writer = new BufferedWriter( new FileWriter( output ) );
String line = "";
while ( (line = reader.readLine()) != null )
{
StringTokenizer tokens = new StringTokenizer( line );
int count = tokens.countTokens();
if ( count < 2 )
{
writer.write( tokens.nextToken() + " \t ");
writer.newLine();
writer.flush();
continue;
}
else
{
String uri = tokens.nextToken();
String exp = tokens.nextToken();
String cExp = fixExpressivity( exp );
String rank = rankExpressivity( cExp );
writer.write( uri + "\t" + cExp + "\t" + rank );
writer.newLine();
writer.flush();
}
}
writer.close();
}
catch ( Exception e )
{
e.printStackTrace();
}
}
private static String fixExpressivity( String exp )
{
// change ALCR+ to S
if ( exp.indexOf( "ALCR+") != -1 )
exp = exp.replaceAll( "ALCR+", "S" );
// change IO to OI
if ( exp.indexOf("IO") != -1 )
exp = exp.replaceAll( "IO", "OI");
// remove (D)
if ( exp.indexOf("(D)") != -1 )
exp = exp.replaceAll("\\(D\\)", "" );
exp = exp.trim();
return exp;
}
private static String rankExpressivity( String exp )
{
ExpressivityRanker ranker = ExpressivityRanker.getInstance();
return ranker.rankByRoughBin( exp );
}
public static void main( String [] args )
{
System.out.print("Starting...");
//PostProcessor.removeWordNetStats( "copy.txt", "noRDFS.txt", "RDFS.txt");
PostProcessor.fixNrankExpressivity( "expressivity.txt", "binnedExpressivity.txt" );
//PostProcessor.removeW3CJENATestStats("NoDamlStats.txt", "NoTestFileStats.txt", "TestFileStats.txt");
System.out.println("Done.");
}
}