package edu.northwestern.at.morphadorner.servlets; /* Please see the license information at the end of this file. */ import java.io.*; import java.net.*; import javax.servlet.*; import javax.servlet.http.*; import edu.northwestern.at.utils.*; import edu.northwestern.at.utils.html.*; import edu.northwestern.at.utils.corpuslinguistics.inflector.*; import edu.northwestern.at.utils.corpuslinguistics.languagerecognizer.*; import edu.northwestern.at.utils.corpuslinguistics.lemmatizer.*; import edu.northwestern.at.utils.corpuslinguistics.lexicon.*; import edu.northwestern.at.utils.corpuslinguistics.namerecognizer.*; import edu.northwestern.at.utils.corpuslinguistics.partsofspeech.*; import edu.northwestern.at.utils.corpuslinguistics.postagger.*; import edu.northwestern.at.utils.corpuslinguistics.postagger.guesser.*; import edu.northwestern.at.utils.corpuslinguistics.postagger.transitionmatrix.*; import edu.northwestern.at.utils.corpuslinguistics.postagger.trigram.*; import edu.northwestern.at.utils.corpuslinguistics.sentencesplitter.*; import edu.northwestern.at.utils.corpuslinguistics.spellingmapper.*; import edu.northwestern.at.utils.corpuslinguistics.spellingstandardizer.*; import edu.northwestern.at.utils.corpuslinguistics.stemmer.*; import edu.northwestern.at.utils.corpuslinguistics.tokenizer.*; import edu.northwestern.at.utils.servlets.*; import net.sf.jlinkgrammar.*; /** Base class for MorphAdorner example servlets. * * <p> * Extends XHttpServlet with methods for MorphAdorner servlets. * Also stores common objects used by multiple servlets. * </p> */ abstract public class BaseAdornerServlet extends XHttpServlet { /** True to Output full top and bottom page HTML. * false to output only servlet output. */ protected boolean outputFullHTML = true; /** Default data directory. */ protected static String defaultDataDirectory = "/nupos"; /** Data directory. */ protected static String dataDirectory; /** 19th century adorner information. */ protected static AdornerInfo ncfAdornerInfo; /** 19th century fiction lexicon file name. */ protected static String ncfWordLexiconFileName = "/ncflexicon.lex"; /** 19th century fiction lexicon file name. */ protected static String ncfSuffixLexiconFileName = "/ncfsuffixlexicon.lex"; /** Alternate to standard spelling pairs for * 19th century fiction. */ protected static String ncfSpellingPairsFileName = "/ncfmergedspellingpairs.tab"; /** 19th century fiction transition matrix file name. */ protected static String ncfTransitionMatrixFileName = "/ncftransmat.mat"; /** Early modern English adorner information. */ protected static AdornerInfo emeAdornerInfo; /** Early modern English word lexicon file name. */ protected static String emeWordLexiconFileName = "/emelexicon.lex"; /** Early modern English suffix lexicon File name. */ protected static String emeSuffixLexiconFileName = "/emesuffixlexicon.lex"; /** Early modern English transition matrix file name. */ protected static String emeTransitionMatrixFileName = "/emetransmat.mat"; /** Early modern English alternate to standard spelling * pairs. */ protected static String emeSpellingPairsFileName = "/ememergedspellingpairs.tab"; /** Standard spellings file name. */ protected static String standardSpellingsFileName = "/standardspellings.txt"; /** The lemmatizer. */ protected static Lemmatizer lemmatizer; /** Porter stemmer. */ protected static Stemmer porterStemmer = new PorterStemmer(); /** Lancaster stemmer. */ protected static Stemmer lancasterStemmer = new LancasterStemmer(); /** Names. */ protected static Names names = new Names(); /** The language recognizer. */ protected static LanguageRecognizer languageRecognizer = new DefaultLanguageRecognizer(); /** English inflector. */ protected static Inflector inflector = new EnglishInflector(); /** British to US spelling mapper. */ protected static SpellingMapper britishToUS = new BritishToUSSpellingMapper(); /** Extra words file name. */ protected static String extraWordsFileName = "/extrawords.txt"; /** Latin words file name. */ protected static String latinWordsFileName = "/latinwords.txt"; /** Latin words list. */ protected static TaggedStrings latinWords; /** Extra words list. */ protected static TaggedStrings extraWords; /** Link grammar dictionary. */ protected static Dictionary dictionary ; /** Link grammar parser options. */ protected static ParseOptions parseOptions ; /** Link grammar parser data file directory. */ protected static String lgParserDataDirectory = "/lgparser"; /** Initialization states. */ protected static final int INITNOTSTARTED = 0; protected static final int INITINPROGRESS = 1; protected static final int INITDONE = 2; protected static final int INITFAILED = 3; /** Initialization complete. */ protected static int initializationStatus = INITNOTSTARTED; /** Servlet not ready message. */ protected static final String servletNotReadyMessage = "Servlet not yet ready, please try again in a minute."; /** Servlet not ready title. */ protected static final String servletNotReadyTitle = "Servlet not ready"; /** Initialize common objects. * * @param config Servlet configuration. */ protected synchronized static void doInitialization ( ServletConfig config ) { // If init done or in progress, // do nothing. if ( initializationStatus != INITNOTSTARTED ) return; try { // Remember initialization has started. initializationStatus = INITINPROGRESS; // Get the data directory. dataDirectory = defaultDataDirectory; if ( config.getInitParameter( "datadirectory" ) != null ) { dataDirectory = config.getInitParameter( "datadirectory" ).trim(); } if ( dataDirectory.length() == 0 ) { File tryDir = new File( "data" ); if ( tryDir.exists() ) { dataDirectory = tryDir.getAbsolutePath(); dataDirectory = new File( dataDirectory ).getCanonicalPath(); } else { tryDir = new File( "../data" ); if ( tryDir.exists() ) { dataDirectory = tryDir.getAbsolutePath(); dataDirectory = new File( dataDirectory ).getCanonicalPath(); } } } // Add data directory to file names. ncfWordLexiconFileName = dataDirectory + ncfWordLexiconFileName; ncfSuffixLexiconFileName = dataDirectory + ncfSuffixLexiconFileName; ncfSpellingPairsFileName = dataDirectory + ncfSpellingPairsFileName; ncfTransitionMatrixFileName = dataDirectory + ncfTransitionMatrixFileName; emeWordLexiconFileName = dataDirectory + emeWordLexiconFileName; emeSuffixLexiconFileName = dataDirectory + emeSuffixLexiconFileName; emeSpellingPairsFileName = dataDirectory + emeSpellingPairsFileName; emeTransitionMatrixFileName = dataDirectory + emeTransitionMatrixFileName; standardSpellingsFileName = dataDirectory + standardSpellingsFileName; extraWordsFileName = dataDirectory + extraWordsFileName; latinWordsFileName = dataDirectory + latinWordsFileName; lgParserDataDirectory = dataDirectory + lgParserDataDirectory; // Get latin words list. if ( latinWords == null ) { latinWords = getLatinWordsList(); } // Get extra words list. if ( extraWords == null ) { extraWords = getExtraWordsList(); } TaggedStrings[] extraWordLists = new TaggedStrings[] { extraWords , latinWords }; // Create early modern English adorner // information. if ( emeAdornerInfo == null ) { emeAdornerInfo = new AdornerInfo ( emeWordLexiconFileName , emeSuffixLexiconFileName , emeTransitionMatrixFileName , standardSpellingsFileName , emeSpellingPairsFileName , extraWordLists , names ); } // Create 19th century fiction adorner // information. if ( ncfAdornerInfo == null ) { ncfAdornerInfo = new AdornerInfo ( ncfWordLexiconFileName , ncfSuffixLexiconFileName , ncfTransitionMatrixFileName , standardSpellingsFileName , ncfSpellingPairsFileName , extraWordLists , names ); } // Get lemmatizer. if ( lemmatizer == null ) { lemmatizer = new DefaultLemmatizer(); lemmatizer.setDictionary ( ncfAdornerInfo.standardizer.getStandardSpellings() ); } // Get link grammar parser. if ( parseOptions == null ) { parseOptions = new ParseOptions() ; parseOptions.parse_options_set_short_length( 10 ) ; parseOptions.parse_options_set_max_null_count( 10 ) ; parseOptions.parse_options_set_linkage_limit( 100 ) ; } if ( dictionary == null ) { dictionary = new Dictionary ( parseOptions , lgParserDataDirectory + "/4.0.dict" , "4.0.knowledge" , "4.0.constituent-knowledge" , "4.0.affix" ) ; } // Initialization complete. initializationStatus = INITDONE; } catch ( Exception e ) { e.printStackTrace(); initializationStatus = INITFAILED; } } /** Initialize common objects. * * @param config Servlet configuration. */ protected synchronized static void initialize ( final ServletConfig config ) { // Run initializer thread. Thread runner = new Thread( "Servlet Initializer" ) { public void run() { doInitialization( config ); } }; runner.start(); } /** Get Latin words list. */ public synchronized static TaggedStrings getLatinWordsList() { // Load Latin words. TextFile latinWordsFile = new TextFile ( latinWordsFileName , "utf-8" ); SingleTagTaggedStrings latinWords = new SingleTagTaggedStrings ( latinWordsFile.toArray() , "fw-la" ); latinWordsFile = null; return latinWords; } /** Get extra words list. */ public synchronized static TaggedStrings getExtraWordsList() { UTF8Properties result = null; // Load extra words. try { result = new UTF8Properties(); result.load ( new File( extraWordsFileName ).toURI().toURL().openStream() ); } catch ( Exception e ) { } return result; } /** Select adorner to use. * * @param adornerName Adorner name. * * @return AdornerInfo for specified adorner. */ public static AdornerInfo getAdornerInfo( String adornerName ) { AdornerInfo result; if ( ( adornerName != null ) && adornerName.equals( "eme" ) ) { result = emeAdornerInfo; } else { result = ncfAdornerInfo; } return result; } /** Check if servlet ready for use. * * @return true if servlet ready for use. */ public static boolean isReady() { return ( initializationStatus == INITDONE ); } /** Handle servlet post requests. * * @param request Servlet request. * @param response Servlet response. */ public void doPost ( HttpServletRequest request , HttpServletResponse response ) throws ServletException, java.io.IOException { ServletResult results; // If the servlet is not yet ready, // output a message to that effect. if ( !isReady() ) { results = outputNotReady( servletNotReadyMessage ); } else { results = doHandleRequest( request , response ); } // Output results. if ( results.getFromForm() ) { // Get servlet session. HttpSession session = request.getSession( true ); session.setAttribute ( results.getSessionAttributeName() , results.getResults() ); // Output accumulated results if // we're constructing the results // as a full web page. Otherwise // redirect to originating page // to get results displayed. if ( outputFullHTML ) { outputResults ( response , results.getResults() , results.getTitle() ); } else { response.sendRedirect ( createRedirectURL ( response, results.getRedirectionURL() , "" , "" , "" ) ); } } else { outputResults ( response , results.getResults() , results.getTitle() ); } } /** Output top of page. * * @param out PrintWriter for servlet output. * @param title The servlet title. */ public void outputHeader ( java.io.PrintWriter out , String title ) { String docType = "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\"" + " \"DTD/xhtml1-transitional.dtd\">"; out.println( docType ); out.println( "<html>" ); out.println( "<head>" ); out.println( "<title>" ); if ( title != null ) out.println( title ); out.println( "</title>" ); out.println( "<meta http-equiv=\"Content-Type\" " + "content=\"text/html; charset=utf-8\" />" ); out.println( "<link type=\"text/css\" rel=\"stylesheet\" " + "href=\"/morphadorner/styles/mstyle.css\" />" ); out.println( "</head>" ); out.println( "<body class=\"nomargin\">" ); } /** Output servlet not ready message. * * @param notReadyMessage Server not ready message. */ public ServletResult outputNotReady ( String notReadyMessage ) { // Get string output writer. StringPrintWriter out = new StringPrintWriter(); out.println( "<h2>" + notReadyMessage + "</h2>" ); ServletResult result = new ServletResult ( false , out.getString() , servletNotReadyTitle , "", "" ); return result; } /** Output empty table row as spacer. * * @param out PrintWriter for servlet output. * @param nColumns Number of empty table columns to output. */ public void outputSpacerRow( java.io.PrintWriter out , int nColumns ) { out.println( "<tr>" ); for ( int i = 0 ; i < nColumns ; i++ ) { out.println( "<td>" ); out.println( " " ); out.println( "</td>" ); } out.println( "</tr>" ); } /** Output bottom of page. * * @param out PrintWriter for servlet output. */ public void outputFooter( java.io.PrintWriter out ) { out.println( "</body>" ); out.println( "</html>" ); } /** Output a select clause. * * @param out PrintWriter for servlet output. * @param selectValue The value. * @param selected True if selected. */ public void outputSelect ( java.io.PrintWriter out , String selectValue , boolean selected ) { out.print( "<option value=\"" ); out.print( selectValue ); out.print( "\"" ); if ( selected ) { out.print( " selected=\"selected\" " ); } out.print( "\">" ); out.print( selectValue ); out.println( "</option>" ); } /** Output adorner selection form field. * * @param out PrintWriter for servlet output. * @param label Column label. May be empty. * @param adornerName Adorner name. */ public void outputAdornerSelection ( java.io.PrintWriter out , String label , String adornerName ) { out.println( "<tr>" ); if ( ( label != null ) && ( label.length() > 0 ) ) { out.println( "<td valign=\"top\">" ); out.println( "<strong>" ); out.print( label ); out.println( "</strong>" ); out.println( "</td>" ); } out.println( "<td>" ); String checkedEME = ""; String checkedNCF = "checked=\"checked\""; if ( ( adornerName != null ) && ( adornerName.equals( "eme" ) ) ) { checkedEME = "checked=\"checked\""; checkedNCF = ""; } out.println( "<input type=\"radio\" name=\"adornername\" " + "value=\"eme\"" + checkedEME + ">Early Modern English</input><br />" ); out.println( "<input type=\"radio\" name=\"adornername\" " + "value=\"ncf\"" + checkedNCF + ">Nineteenth Century Fiction</input>" ); out.println( "</td>" ); out.println( "</tr>" ); } /** Return stored results. * * @param response Servlet response object. * @param results Result string to return to client for display. * @param title Title for output. */ public void outputResults ( HttpServletResponse response , String results , String title ) throws java.io.IOException { java.io.PrintWriter out = response.getWriter(); if ( outputFullHTML ) outputHeader( out , title ); out.println( results ); if ( outputFullHTML ) outputFooter( out ); out.flush(); out.close(); } /** Remove HTML/XML tags from text. * * @param text The text from which to remove tags. * * @return The text with tags removed. */ public String unTag( String text ) { String result = text.trim(); if ( HTMLUtils.isHTMLTaggedText( result ) ) { result = HTMLUtils.stripHTMLTags( result ); } result = result.replaceAll( "\\s" , " " ); return result; } /** Handle request. * * @param request Servlet request. * @param response Servlet response. * * @return Servlet results. */ protected ServletResult doHandleRequest ( HttpServletRequest request , HttpServletResponse response ) throws ServletException, java.io.IOException { response.setContentType( "text/html; charset=utf-8" ); request.setCharacterEncoding( "utf8" ); return handleRequest( request , response ); } /** Gets integer parameter value. * * @param requestValue Parameter value from request. * @param defaultValue Default parameter value if parameter null * or invalid. * * @return The parameter value, or the defaultValue * if paramValue is null or invalid. */ public static int getIntValue ( String requestValue , int defaultValue ) { int result = defaultValue; if ( requestValue != null ) { try { result = Integer.parseInt( requestValue ); } catch ( NumberFormatException e ) { result = defaultValue; } } return result; } /** Handle request. Must be overridden in subclass. * * @param request Servlet request. * @param response Servlet response. * * @return Servlet results. */ protected abstract ServletResult handleRequest ( HttpServletRequest request , HttpServletResponse response ) throws ServletException, java.io.IOException; } /* Copyright (c) 2008, 2009 by Northwestern University. All rights reserved. Developed by: Academic and Research Technologies Northwestern University http://www.it.northwestern.edu/about/departments/at/ Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal with the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimers. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimers in the documentation and/or other materials provided with the distribution. * Neither the names of Academic and Research Technologies, Northwestern University, nor the names of its contributors may be used to endorse or promote products derived from this Software without specific prior written permission. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE SOFTWARE. */