package plume;
import static plume.EntryReader.Entry;
import java.io.*;
import java.util.*;
import java.util.regex.*;
import com.sun.javadoc.*;
/**
* Lookup searches a set of files, much like <tt>grep</tt> does. However,
* Lookup searches by entry (by default, paragraphs) rather than by line,
* respects comments (ignores matches within them), respects
* <tt>\include</tt> directives (searches the named file), and has other
* features. <p>
*
* Each search criterion is a keyword or regular expression. Lookup
* outputs each <em>entry</em> that matches all the search criteria. <p>
*
* By default, search criteria are treated as keywords, and each paragraph
* is treated as an entry — in other words, Lookup prints each
* paragraph (in any of the files) that contains all the keywords,
* essentially performing paragraph-wise grep. <p>
*
* A file can contain one or more entries, each of which is a short entry
* or a long entry.
* <ul>
* <li>A short entry is a single paragraph (delimited from the next entry
* by a blank line). Lookup searches all of a short entry.</li>
* <li>A long entry is introduced by a line that begins with '>entry'. The
* remainder of that line is a one-line description of the entry. A
* long entry is terminated by '<entry', by the start of a new long
* entry, or by the start of a new file. Lookup searches only the first
* line of a long entry.</li>
* </ul>
*
* If multiple entries match, the first line of each is printed. If only
* one entry matches, then that entry is printed in its entirety. <p>
*
* By default, Lookup searches the file ~/lookup/root. Files can
* contain comments and can include other files. Comments start with
* a % sign in the first column. Any comment line is ignored (it is
* not treated as a blank line for the purpose of separating entries).
* A file can include another file via a line of the form
* '\include{filename}'. <p>
*
* The default behavior can be customized by way of command-line options. <p>
*
* The command-line options are as follows:
* <!-- start options doc (DO NOT EDIT BY HAND) -->
* <ul>
* <li>Getting help
* <ul>
* <li><b>-h</b> <b>--help=</b><i>boolean</i>. Show detailed help information and exit. [default false]</li>
* <li><b>-v</b> <b>--verbose=</b><i>boolean</i>. Print progress information [default false]</li>
* </ul>
* </li>
* <li>Where to search
* <ul>
* <li><b>-f</b> <b>--entry-file=</b><i>string</i>. Specify the colon-separated search list for the file that contains
* information to be searched. Only the first file found is used, though
* it may itself contain include directives. [default ~/lookup/root]</li>
* <li><b>-b</b> <b>--search-body=</b><i>boolean</i>. Search the body of long entries in addition to the entry's
* description. The bodies of short entries are always searched. [default false]</li>
* </ul>
* </li>
* <li>What to search for
* <ul>
* <li><b>-e</b> <b>--regular-expressions=</b><i>boolean</i>. Specifies that keywords are regular expressions. If false, keywords
* are text matches. [default false]</li>
* <li><b>-c</b> <b>--case-sensitive=</b><i>boolean</i>. If true, keywords matching is case sensistive. By default both
* regular expressions and text keywords are case insensitive. [default false]</li>
* <li><b>-w</b> <b>--word-match=</b><i>boolean</i>. If true, match a text keyword only as a separate word, not as a
* substring of a word. This option is ignored if
* regular_expressions is true. [default false]</li>
* </ul>
* </li>
* <li>How to print matches
* <ul>
* <li><b>-a</b> <b>--print-all=</b><i>boolean</i>. By default, if multiple entries are matched, only a synopsis
* of each entry is printed. If 'print_all' is selected then
* the body of each matching entry is printed. [default false]</li>
* <li><b>-i</b> <b>--item-num=</b><i>integer</i>. Specifies which item to print when there are multiple matches.</li>
* <li><b>-l</b> <b>--show-location=</b><i>boolean</i>. If true, show the filename/line number of each matching entry
* in the output. [default false]</li>
* </ul>
* </li>
* <li>Customizing format of files to be searched
* <ul>
* <li><b>--entry-start-re=</b><i>regex</i>. Regex that denotes the start of a long entry [default ^>entry *()]</li>
* <li><b>--entry-stop-re=</b><i>regex</i>. Regex that denotes the end of a long entry [default ^<entry]</li>
* <li><b>--description-re=</b><i>regex</i>. Regex that finds an entry's description (for long entries)</li>
* <li><b>--comment-re=</b><i>string</i>. Regex that matches an entire comment (not just a comment start) [default ^%.*]</li>
* <li><b>--include-re=</b><i>string</i>. Regex that matches an include directive; group 1 is the file name [default \\include\{(.*)\}]</li>
* </ul>
* </li>
* </ul>
* <!-- end options doc -->
**/
public class Lookup {
/** Show detailed help information and exit. */
@OptionGroup("Getting help")
@Option ("-h Show detailed help information")
public static boolean help = false;
@Option ("-v Print progress information")
public static boolean verbose = false;
// This uses only the first file because the default search path might be
// something like user:system and you might want only your version of the
// system files. It might be useful to also support (via another flag,
// or by taking over this one, or by the syntax of the separator, or in
// some other way) specifying multiple files on the command line.
/**
* Specify the colon-separated search list for the file that contains
* information to be searched. Only the first file found is used, though
* it may itself contain include directives.
*/
@OptionGroup("Where to search")
@Option ("-f Specify the search list of files of information; may only be supplied once")
public static String entry_file = "~/lookup/root";
/**
* Search the body of long entries in addition to the entry's
* description. The bodies of short entries are always searched.
*/
@Option ("-b Search body of long entries for matches")
public static boolean search_body = false;
/**
* Specifies that keywords are regular expressions. If false, keywords
* are text matches.
*/
@OptionGroup("What to search for")
@Option ("-e Keywords are regular expressions")
public static boolean regular_expressions = false;
/**
* If true, keywords matching is case sensistive. By default both
* regular expressions and text keywords are case insensitive.
*/
@Option ("-c Keywords are case sensistive")
public static boolean case_sensitive = false;
/**
* If true, match a text keyword only as a separate word, not as a
* substring of a word. This option is ignored if
* regular_expressions is true.
*/
@Option ("-w Only match text keywords against complete words")
public static boolean word_match = false;
/**
* By default, if multiple entries are matched, only a synopsis
* of each entry is printed. If 'print_all' is selected then
* the body of each matching entry is printed.
*/
@OptionGroup("How to print matches")
@Option ("-a Print the entire entry for each match")
public static boolean print_all = false;
/**
* Specifies which item to print when there are multiple matches.
*/
@Option ("-i Choose a specific item when there are multiple matches")
public static /*@Nullable*/ Integer item_num;
/**
* If true, show the filename/line number of each matching entry
* in the output.
*/
@Option ("-l Show the location of each matching entry")
public static boolean show_location = false;
@OptionGroup("Customizing format of files to be searched")
@Option ("Regex that denotes the start of a long entry")
public static Pattern entry_start_re = Pattern.compile ("^>entry *()");
@Option ("Regex that denotes the end of a long entry")
public static Pattern entry_stop_re = Pattern.compile ("^<entry");
@Option ("Regex that finds an entry's description (for long entries)")
public static /*@Nullable*/ Pattern description_re = null;
// If "", gets set to null immediately after option processing.
@Option ("Regex that matches an entire comment (not just a comment start)")
public static /*@Nullable*/ String comment_re = "^%.*";
@Option ("Regex that matches an include directive; group 1 is the file name")
public static String include_re = "\\\\include\\{(.*)\\}";
/** Platform-specific line separator **/
@SuppressWarnings("nullness") // line.separator property always exists
private static final String lineSep = System.getProperty("line.separator");
/** One line synopsis of usage **/
private static String usage_string
= "lookup [options] <keyword> ...";
/**
* Look for the specified keywords in the file(s) and print
* the corresponding entries.
*/
public static void main (String args[]) throws IOException {
Options options = new Options (usage_string, Lookup.class);
String[] keywords = options.parse_or_usage (args);
// If help was requested, print it and exit
if (help) {
InputStream is = Lookup.class.getResourceAsStream ("lookup.txt");
if (is == null) {
// This should never happen.
System.out.println("Unable to find resource 'lookup.txt' with help text.");
System.exit(1);
}
BufferedReader help_stream = new BufferedReader (new InputStreamReader (is));
String line = help_stream.readLine();
while (line != null) {
System.out.println (line);
line = help_stream.readLine();
}
System.exit (0);
}
if (verbose) {
System.out.printf ("Options settings: %n%s%n", options.settings());
}
// Make sure at least one keyword was specified
if (keywords.length == 0) {
options.print_usage ("Error: No keywords specified");
System.exit (254);
}
// comment_re starts out non-null and the option processing code can't
// make it null, so no null pointer exception is possible in the
// if statement predicate that immediately follows this assertion.
assert comment_re != null : "@SuppressWarnings(nullness): application invariant";
// If the comment regular expression is empty, turn off comment processing
if (comment_re.equals (""))
comment_re = null;
// Open the first readable root file
EntryReader reader = null;
String entry_files[] = entry_file.split (":");
List<Exception> file_errors = new ArrayList<Exception>();
for (String ef : entry_files) {
ef = UtilMDE.expandFilename (ef);
try {
reader = new EntryReader (ef, comment_re, include_re);
} catch (FileNotFoundException e) {
file_errors.add (e);
}
if (reader != null)
break;
}
if (reader == null) {
System.out.println ("Error: Can't read any entry files");
for (Exception file_error : file_errors)
System.out.printf (" entry file %s%n", file_error.getMessage());
System.exit (254);
}
// Setup the regular expressions for long entries
reader.set_entry_start_stop (entry_start_re, entry_stop_re);
List<Entry> matching_entries = new ArrayList<Entry>();
try {
// Process each entry looking for matches
int entry_cnt = 0;
Entry entry = reader.get_entry ();
while (entry != null) {
entry_cnt++;
if (verbose && ((entry_cnt % 1000) == 0))
System.out.printf ("%d matches in %d entries\r",
matching_entries.size(), entry_cnt);
int matchcount = 0;
for (String keyword : keywords) {
String search = entry.get_description (description_re);
if (search_body || entry.short_entry)
search = entry.body;
if (!case_sensitive) {
search = search.toLowerCase();
}
if (regular_expressions) {
int flags = Pattern.CASE_INSENSITIVE;
if (case_sensitive)
flags = 0;
if (Pattern.compile (keyword, flags).matcher(search).find())
matchcount++;
} else {
if (!case_sensitive)
keyword = keyword.toLowerCase();
if (word_match) {
keyword = "\\b" + keyword + "\\b";
if (Pattern.compile (keyword).matcher(search).find())
matchcount++;
} else if (search.contains(keyword))
matchcount++;
}
}
if (matchcount == keywords.length)
matching_entries.add (entry);
entry = reader.get_entry ();
}
} catch (FileNotFoundException e) {
System.out.printf ("Error: Can't read %s at line %d in file %s%n",
e.getMessage(), reader.getLineNumber(),
reader.getFileName());
System.exit (254);
}
// Print the results
if (matching_entries.size() == 0) {
System.out.println ("Nothing found.");
} else if (matching_entries.size() == 1) {
Entry e = matching_entries.get(0);
if (show_location)
System.out.printf ("%s:%d:%n", e.filename, e.line_number);
System.out.print (e.body);
} else { // there must be multiple matches
if (item_num != null) {
Entry e = matching_entries.get (item_num-1);
if (show_location)
System.out.printf ("%s:%d:%n", e.filename, e.line_number);
System.out.print (e.body);
} else {
int i = 0;
if (print_all)
System.out.printf ("%d matches found (separated by dashes "
+"below)%n", matching_entries.size());
else
System.out.printf ("%d matches found. Use -i to print a "
+ "specific match or -a to see them all%n",
matching_entries.size());
for (Entry e : matching_entries) {
i++;
if (print_all) {
if (show_location)
System.out.printf ("%n-------------------------%n%s:%d:%n",
e.filename, e.line_number);
else
System.out.printf ("%n-------------------------%n");
System.out.print (e.body);
} else {
if (show_location)
System.out.printf (" -i=%d %s:%d: %s%n", i, e.filename,
e.line_number, e.first_line);
else
System.out.printf (" -i=%d %s%n", i,
e.get_description (description_re));
}
}
}
}
}
/**
* Returns the next entry. If no more entries are available, returns null.
*/
public static /*@Nullable*/ Entry old_get_entry (EntryReader reader) throws IOException {
try {
// Skip any preceeding blank lines
String line = reader.readLine();
while ((line != null) && (line.trim().length() == 0))
line = reader.readLine();
if (line == null)
return (null);
String body = "";
Entry entry = null;
String filename = reader.getFileName();
long line_number = reader.getLineNumber();
// If this is a long entry
if (line.startsWith (">entry")) {
// Get the current filename
String current_filename = reader.getFileName();
// Remove '>entry' from the line
line = line.replaceFirst ("^>entry *", "");
String first_line = line;
// Read until we find the termination of the entry
while ((line != null) && !line.startsWith (">entry") &&
!line.equals ("<entry")
&& current_filename.equals (reader.getFileName())) {
body += line + lineSep;
line = reader.readLine();
}
// If this entry was terminated by the start of the next one,
// put that line back
if ((line != null) && (line.startsWith (">entry")
|| !current_filename.equals (reader.getFileName())))
reader.putback (line);
entry = new Entry (first_line, body, filename, line_number, false);
} else { // blank separated entry
String first_line = line;
// Read until we find another blank line
while ((line != null) && (line.trim().length() != 0)) {
body += String.format ("%s%n", line);
line = reader.readLine();
}
entry = new Entry (first_line, body, filename, line_number, true);
}
return (entry);
} catch (FileNotFoundException e) {
System.out.printf ("Error: Can't read %s at line %d in file %s%n",
e.getMessage(), reader.getLineNumber(),
reader.getFileName());
System.exit (254);
return (null);
}
}
/** Returns the first line of entry. **/
public static String first_line (String entry) {
int ii = entry.indexOf (lineSep);
if (ii == -1)
return entry;
return entry.substring (0, ii);
}
}