/*
* Copyright (c) 2013, University of Toronto.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License. You may obtain
* a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations
* under the License.
*/
package edu.toronto.cs.xcurator.parser;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
/**
* @author Eric Yao <jiaxian.yao@mail.utoronto.ca>
*/
public class PatentParser implements Parser {
@Override
public void parse(String rawDir, String parsedDir, String fileName) {
// Get the current directory
String cwd = System.getProperty("user.dir");
try {
String line;
int count = 0;
FileInputStream is = new FileInputStream(cwd + "\\resources" + "\\" + rawDir + "\\" + fileName);
BufferedReader br = new BufferedReader(new InputStreamReader(is, "UTF-8"));
FileOutputStream os = new FileOutputStream(cwd + "\\resources" + "\\" + parsedDir + "\\" + fileName);
BufferedWriter bw = new BufferedWriter(new OutputStreamWriter(os, "UTF-8"));
bw.write("<?xml version=\"1.0\" encoding=\"UTF-8\"?>");
bw.newLine();
bw.write("<us-patent-grants>");
bw.newLine();
while ((line = br.readLine()) != null && count <= 300) {
if (line.contains("<?xml version=\"1.0\" encoding=\"UTF-8\"?>")) {
count++;
} else if (line.contains("!DOCTYPE")) {
continue;
} else {
bw.write(line);
bw.newLine();
}
}
bw.write("</us-patent-grants>");
bw.close();
os.close();
br.close();
is.close();
System.out.println((count - 1) + " Patents Parsed.");
} catch (FileNotFoundException e) {
System.out.println("File not found.");
} catch (IOException e) {
System.out.println("IOException while reading lines.");
}
}
}