TextUtils.java example

Explorer

ambra-master
- base
  - src
    - main
      - java
        org
        ambraproject
        ApplicationException.java
        Constants.java
        action
        BaseActionSupport.java
        BaseSessionAwareActionSupport.java
        ListingAction.java
        LogoutAction.java
        RedirectAction.java
        freemarker
        AmbraFreemarkerConfig.java
        AmbraFreemarkerManager.java
        AmbraTemplateStorage.java
        rhino
        shared
        AuthorsXmlExtractor.java
        Rhino.java
        XPathExtractor.java
        service
        annotation
        AnnotationService.java
        AnnotationServiceImpl.java
        article
        AIArticleClassifier.java
        ArticleAssetService.java
        ArticleAssetServiceImpl.java
        ArticleAssetWrapper.java
        ArticleClassifier.java
        ArticleService.java
        ArticleServiceImpl.java
        ArticleServiceSearchParameters.java
        BrowseParameters.java
        BrowseService.java
        BrowseServiceImpl.java
        DuplicateArticleIdException.java
        FetchArticleService.java
        FetchArticleServiceImpl.java
        MostViewedArticleService.java
        NoSuchArticleIdException.java
        NoSuchObjectIdException.java
        SecondaryObject.java
        cache
        Cache.java
        EhcacheProvider.java
        EternalCache.java
        NullCache.java
        captcha
        CaptchaService.java
        CaptchaServiceImpl.java
        cas
        ConfigWrapper.java
        ConfigWrapperUtil.java
        InitParamProvider.java
        client
        filter
        CASFilter.java
        CASFilterWrapper.java
        crossref
        CrossRefLookupService.java
        CrossRefLookupServiceImpl.java
        feed
        AnnotationFeedSearchParameters.java
        FeedSearchParameters.java
        FeedService.java
        FeedServiceImpl.java
        hibernate
        HibernateService.java
        HibernateServiceImpl.java
        journal
        JournalCreator.java
        JournalCreatorImpl.java
        JournalService.java
        JournalServiceImpl.java
        mailer
        AmbraMailer.java
        AmbraMailerImpl.java
        migration
        BootstrapMigratorService.java
        BootstrapMigratorServiceImpl.java
        LegacyMigration.java
        Migration.java
        Migrations.java
        SchemaMigration.java
        ScriptMigration.java
        orcid
        OrcidAuthorizationException.java
        OrcidService.java
        OrcidServiceImpl.java
        permission
        PermissionsService.java
        PermissionsServiceImpl.java
        raptor
        RaptorService.java
        RaptorServiceImpl.java
        search
        MostViewedArticleServiceImpl.java
        MostViewedCache.java
        SearchParameters.java
        SearchService.java
        SolrException.java
        SolrFieldConversion.java
        SolrFieldConversionImpl.java
        SolrHttpService.java
        SolrHttpServiceImpl.java
        SolrSearchService.java
        SolrServerFactory.java
        SolrServiceUtil.java
        taxonomy
        TaxonomyService.java
        TaxonomyServiceImpl.java
        trackback
        BlogLinkDigest.java
        DuplicateTrackbackException.java
        InboundLinkTranslator.java
        LinkbackService.java
        LinkbackServiceImpl.java
        PingbackFault.java
        PingbackService.java
        PingbackServiceImpl.java
        TrackbackService.java
        TrackbackServiceImpl.java
        user
        DuplicateOrcidException.java
        DuplicateUserException.java
        NoSuchUserException.java
        RegistrationConstants.java
        UserAlert.java
        UserAlreadyVerifiedException.java
        UserRegistrationService.java
        UserRegistrationServiceImpl.java
        UserService.java
        UserServiceImpl.java
        VerificationTokenException.java
        xml
        XMLService.java
        XMLServiceImpl.java
        struts2
        AmbraFeedResult.java
        AmbraFreemarkerResult.java
        AmbraStreamResult.java
        AmbraStruts2Dispatcher.java
        CitationResult.java
        EnsureRoleInterceptor.java
        EnsureUserAccountInterceptor.java
        InternalIpInterceptor.java
        JsonResult.java
        util
        ArticleFormattingDirective.java
        AuthorNameAbbreviationDirective.java
        CategoryUtils.java
        DateParser.java
        DocumentBuilderFactoryCreator.java
        FileUtils.java
        HibernateEntityUtil.java
        InvalidDateException.java
        MimeTypeToFileExtMapper.java
        Pair.java
        ProfanityCheckingService.java
        ProfanityCheckingServiceImpl.java
        RandomNumberDirective.java
        SimpleTextDirective.java
        StringListTypeConverter.java
        TextUtils.java
        URLParametersDirective.java
        UriUtil.java
        VersionedCSSDirective.java
        VersionedFileDirective.java
        VersionedJSDirective.java
        XPathUtil.java
        views
        AcademicEditorView.java
        AnnotationView.java
        ArticleAmendment.java
        ArticleCategory.java
        ArticleCategoryPair.java
        AssetView.java
        AuthorView.java
        BrowseResult.java
        CategoryView.java
        CitationReference.java
        CitationView.java
        CitedArticleView.java
        CrossRefSearch.java
        IssueInfo.java
        JournalView.java
        LinkbackView.java
        OrcidAuthorization.java
        SavedSearchHit.java
        SavedSearchView.java
        SearchHit.java
        SearchResultSinglePage.java
        TOCArticle.java
        TOCArticleGroup.java
        TOCRelatedArticle.java
        TaxonomyCookie.java
        UserProfileInfo.java
        VolumeInfo.java
        article
        ArticleInfo.java
        ArticleType.java
        BaseArticleInfo.java
        CitationInfo.java
        Days.java
        FeaturedArticle.java
        HomePageArticleInfo.java
        Months.java
        RelatedArticleInfo.java
        Years.java
        web
        Cookies.java
        DebuggingFilter.java
        DummySSOFilter.java
        GatekeeperFilter.java
        HttpResourceServer.java
        JournalStaticResourceFilter.java
        MultipleRequestFilter.java
        SessionCounter.java
        VirtualJournalContext.java
        VirtualJournalContextFilter.java
        VirtualJournalMappingFilter.java
    - test
      - java
        org
        ambraproject
        action
        BaseHttpTest.java
        BaseInterceptorTest.java
        BaseTest.java
        BaseWebTest.java
        freemarker
        AmbraFreeMarkerConfigTest.java
        model
        ArticleTypeTest.java
        service
        annotation
        AnnotationServiceTest.java
        article
        ArticleAssetServiceTest.java
        ArticleClassifierTest.java
        ArticleServiceTest.java
        BrowseServiceSolrTest.java
        BrowseServiceTest.java
        DummyArticleClassifier.java
        FetchArticleServiceTest.java
        HtmlChecker.java
        MostViewedArticleServiceTest.java
        crossref
        CrossRefLookupServiceImplTest.java
        journal
        JournalCreatorTest.java
        JournalServiceTest.java
        orcid
        OrcidServiceImplTest.java
        permission
        PermissionServiceTest.java
        raptor
        RaptorServiceTest.java
        search
        EmbeddedSolrServerFactoryTest.java
        MostViewedCacheTest.java
        SearchParametersTest.java
        SearchServiceTest.java
        SolrFieldConversionTest.java
        SolrHttpServiceTest.java
        trackback
        TrackbackServiceTest.java
        user
        UserRegistrationServiceTest.java
        UserServiceTest.java
        xml
        XMLServiceTest.java
        XSLTransformationTest.java
        struts2
        EnsureRoleInterceptorTest.java
        EnsureUserAccountInterceptorTest.java
        testutils
        AmbraTestConfigurationFactory.java
        DummyAmbraMailer.java
        DummyCaptcha.java
        DummyDataStore.java
        DummyHibernateDataStore.java
        EmbeddedSolrServerFactory.java
        HibernateTestSessionFactory.java
        MockHttpClient.java
        MockPermissionsService.java
        util
        ArticleFormattingDirectiveTest.java
        CategoryUtilsTest.java
        DateParserTest.java
        FileUtilsTest.java
        ProfanityCheckingServiceTest.java
        SimpleTextDirectiveTest.java
        TextUtilsTest.java
        VersionedFileDirectiveTest.java
        XPathUtilTest.java
        web
        HttpResourceServerTest.java
        apache
        commons
        httpclient
        HttpClientMock.java
- libs
  - ambra-emailer
    - src
      - main
        java
        org
        ambraproject
        email
        MailerUser.java
        TemplateMailer.java
        impl
        FreemarkerTemplateMailer.java
      - test
        java
        org
        ambraproject
        email
        MockTemplateMailer.java
  - conf-helper
    - src
      - main
        java
        org
        ambraproject
        configuration
        ConfigurationStore.java
        SpringPlaceholderConfigurer.java
        WebAppListener.java
      - test
        java
        org
        ambraproject
        configuration
        ConfigurationTest.java
        OverrideTest.java
  - dom-ranges-helper
    - src
      - main
        java
        org
        ambraproject
        dom
        ranges
        RangeNodeFilter.java
        SelectionRange.java
        SelectionRangeList.java
      - test
        java
        AnnotateExample.java
        org
        ambraproject
        dom
        ranges
        Bug298Test.java
        SelectionRangeListTest.java
  - entity-resolver
    - src
      - main
        java
        org
        ambraproject
        xml
        transform
        CustomEntityResolver.java
        EntityResolvingSource.java
        MemoryCacheURLRetriever.java
        NetworkURLRetriever.java
        ResourceURLRetriever.java
        URLRetriever.java
        cache
        CachedSource.java
      - test
        java
        org
        ambraproject
        xml
        transform
        EntityResolverTest.java
  - password-service
    - src
      - main
        java
        org
        ambraproject
        service
        password
        PasswordDigestService.java
      - test
        java
        org
        ambraproject
        service
        password
        TestPasswordDigestService.java
- models
  - src
    - main
      - java
        org
        ambraproject
        hibernate
        GenerateSchemaSQL.java
        GenericEnumUserType.java
        SchemaGenerator.java
        models
        AmbraEntity.java
        Annotation.java
        AnnotationType.java
        Article.java
        ArticleAsset.java
        ArticleAuthor.java
        ArticleCategoryFlagged.java
        ArticleEditor.java
        ArticleList.java
        ArticlePerson.java
        ArticleRelationship.java
        ArticleView.java
        Category.java
        CitedArticle.java
        CitedArticleAuthor.java
        CitedArticleEditor.java
        CitedArticlePerson.java
        Flag.java
        FlagReasonCode.java
        Issue.java
        Journal.java
        Linkback.java
        Pingback.java
        SavedSearch.java
        SavedSearchQuery.java
        SavedSearchType.java
        Syndication.java
        Trackback.java
        UserLogin.java
        UserOrcid.java
        UserProfile.java
        UserProfileMetaData.java
        UserRole.java
        UserSearch.java
        Version.java
        Volume.java
        util
        TokenGenerator.java
        URIGenerator.java
    - test
      - java
        org
        ambraproject
        models
        AnnotationTest.java
        ArticleAssetTest.java
        ArticleListTest.java
        ArticleRelationshipTest.java
        ArticleTest.java
        ArticleViewTest.java
        BaseHibernateTest.java
        CategoryTest.java
        CitedArticleTest.java
        CreateAndModifiedDateTest.java
        FlagTest.java
        IssueTest.java
        JournalTest.java
        SavedSearchTest.java
        SyndicationTest.java
        TrackbackTest.java
        UserLoginTest.java
        UserOrcidTest.java
        UserProfileMetaDataTest.java
        UserProfileTest.java
        UserRoleTest.java
        UserSearchTest.java
        VersionTest.java
        VolumeTest.java
        util
        URIGeneratorTest.java
- queue
  - src
    - main
      - java
        org
        ambraproject
        amendment
        AmendmentService.java
        AmendmentServiceImpl.java
        category
        CategoryService.java
        CategoryServiceImpl.java
        queue
        CamelSender.java
        MessageSender.java
        MessageService.java
        MessageServiceImpl.java
        routes
        CrossRefLookupRoutes.java
        SavedSearchEmailRoutes.java
        search
        SavedSearchJob.java
        SavedSearchRetriever.java
        SavedSearchRetrieverImpl.java
        SavedSearchRunner.java
        SavedSearchRunnerImpl.java
        SavedSearchSender.java
        SavedSearchSenderImpl.java
        service
        ned
        NedService.java
        NedServiceImpl.java
        syndication
        SyndicationException.java
        SyndicationService.java
        impl
        SyndicationServiceImpl.java
        solr
        XmlTransformer.java
        util
        xml
        ArticleXMLReader.java
    - test
      - java
        org
        ambraproject
        category
        CategoryServiceTest.java
        queue
        CamelSenderTest.java
        CrossRefLookupRoutesTest.java
        MessageServiceImplTest.java
        SavedSearchRetrieverTest.java
        SavedSearchRouteBaseTest.java
        SavedSearchRouteMonthlyTest.java
        SavedSearchRouteWeeklyTest.java
        SavedSearchRunnerTest.java
        SavedSearchSenderTest.java
        search
        service
        DummySOLRMessageSender.java
        service
        ned
        NedServiceMock.java
        syndication
        SyndicationServiceTest.java
- webapp
  - src
    - main
      - java
        org
        ambraproject
        action
        FeedbackAction.java
        HomePageAction.java
        InternalIpAction.java
        MostViewedAction.java
        NoOpAction.java
        PageNotFoundAction.java
        annotation
        CreateAnnotationAction.java
        CreateFlagAction.java
        CreateReplyAction.java
        DiscussionAction.java
        GetAnnotationAction.java
        ListReplyAction.java
        StartDiscussionAction.java
        article
        ArticleHeaderAction.java
        ArticleListAction.java
        BrowseIssueAction.java
        BrowseVolumeAction.java
        CreateCitation.java
        EmailArticleAction.java
        FetchArticleAction.java
        FetchArticleAmendmentAction.java
        FetchArticleCategoriesAction.java
        FetchArticleTabsAction.java
        FetchObjectAction.java
        FetchPowerPointAction.java
        FigureSlideShow.java
        MediaCoverageAction.java
        SlideshowAction.java
        debug
        DebugInfoAction.java
        ProcessDumpAction.java
        ThreadDumpAction.java
        feed
        FeedAction.java
        ObsoleteFeedAction.java
        search
        BaseSearchAction.java
        ListFacetAction.java
        SaveJournalAlertAction.java
        SaveSearchAction.java
        SearchAction.java
        taxonomy
        BrowseAction.java
        DeflagTaxonomyTermAction.java
        FlagTaxonomyTermAction.java
        TaxonomyAction.java
        trackback
        CreatePingbackAction.java
        CreateTrackbackAction.java
        user
        DisplayUserAction.java
        EditUserAction.java
        OrcidConfirmAction.java
        OrcidRemoveAction.java
        UserActionSupport.java
    - test
      - java
        org
        ambraproject
        action
        AmbraHttpTest.java
        AmbraWebTest.java
        FeedbackActionTest.java
        HomepageActionTest.java
        MostViewedActionTest.java
        annotation
        CreateAnnotationActionTest.java
        CreateFlagActionTest.java
        CreateReplyActionTest.java
        ListReplyActionTest.java
        article
        BrowseIssueActionTest.java
        CreateCitationTest.java
        EmailArticleActionTest.java
        FetchActionTest.java
        FetchArticleActionTest.java
        FetchArticleTabsActionTest.java
        FetchObjectActionTest.java
        FetchPowerPointActionTest.java
        MediaCoverageActionTest.java
        SlideshowActionTest.java
        search
        SaveSearchActionTest.java
        SearchActionTest.java
        trackback
        CreateTrackbackActionTest.java
        TrackbackHttpTest.java
        user
        DisplayUserActionTest.java
        EditUserActionTest.java

/*
 * Copyright (c) 2006-2013 by Public Library of Science
 *
 * http://plos.org
 * http://ambraproject.org
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.ambraproject.util;

import java.io.StringReader;
import java.io.StringWriter;
import java.net.MalformedURLException;
import java.net.URI;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.util.regex.Pattern;
import java.util.List;
import java.util.ArrayList;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import org.apache.commons.lang.StringEscapeUtils;
import org.apache.commons.lang.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.w3c.dom.Document;
import org.w3c.dom.Node;
import com.opensymphony.util.UrlUtils;
import org.xml.sax.InputSource;
import sun.misc.BASE64Encoder;

/**
 * Provides some useful text manipulation functions.
 */
public class TextUtils {
  public static final String HTTP_PREFIX = "http://";
  private static final Pattern maliciousContentPattern = Pattern.compile("[<>\"\'%;()&+]");
  private static final Pattern lineBreakPattern = Pattern.compile("\\p{Zl}|\r\n|\n|\u0085|\\p{Zp}");
  private static final Pattern strongPattern = Pattern.compile("'''");
  private static final Pattern emphasizePattern = Pattern.compile("''");
  private static final Pattern strongEmphasizePattern = Pattern.compile("'''''");
  private static final Pattern superscriptPattern = Pattern.compile("\\^\\^");
  private static final Pattern subscriptPattern = Pattern.compile("~~");

  private static Logger log = LoggerFactory.getLogger(TextUtils.class);

  /**
   * Create a hash of a string
   *
   * @param string the string to make the hash
   *
   * @return the hash of the string
   */
  public static String createHash(String string) {
    return createHash(string.getBytes());
  }

  /**
   * Create a hash of a byte array
   *
   * @param bytes
   *
   * @return the hash of the byte array
   */
  public static String createHash(byte[] bytes) {
    try {
      MessageDigest messageDigest = MessageDigest.getInstance("SHA-1");
      messageDigest.update(bytes);

      return encodeText(messageDigest.digest());
    } catch(NoSuchAlgorithmException ex) {
      throw new RuntimeException(ex);
    }
  }

  /**
   * Produces a String value suitable for rendering in HTML for the given binary data.
   */
  private static String encodeText(byte[] data) {
    BASE64Encoder encoder = new BASE64Encoder();
    String base64 = encoder.encodeBuffer(data);

    // Make the returned value a little prettier by replacing slashes with underscores, and removing the trailing
    // "=".
    base64 = base64.replace('/', '_').trim();
    return base64.substring(0, base64.length() - 1);
  }

  /**
   * Convert a List of URIs to a List of Strings
   * @param list a List of URIs
   * @return a list of strings
   */
  public static List<String> toStringList(List<URI> list) {
    List<String> simpleCollection = new ArrayList<String>();

    for (URI uri : list) {
      simpleCollection.add(uri.toString());
    }

    return simpleCollection;
  }

  /**
   * Takes in a String and returns it with all line separators replaced by <br/> tags suitable
   * for display as HTML.
   *
   * @param input HTML
   * @return String with line separators replaced with <br/>
   */
  public static String makeHtmlLineBreaks (final String input) {
    if (StringUtils.isBlank(input)) {
      return input;
    }
    return lineBreakPattern.matcher(input).replaceAll("<br/>");
  }

  /**
   * Takes in a String and returns it with all pairs of <code>'''</code>
   *   replaced by \<strong\>\</strong\> tags suitable for display as HTML.
   * For example: <code>foo '''bar''' baz</code> is transformed into
   *   <code>foo \<strong\>bar\</strong\> baz</code>
   * <p/>
   * The <code>strong</code> tag is used instead of the <code>b</code> tag
   *   because <code>strong</code> is preferred for CSS styling.
   * <p/>
   * There is no check for consistency of markup pairs (e.g., <code>foo ''bar''' baz</code> will become
   *   <code>foo ''bar\<strong\> baz</code>) which will, rightfully, infuriate some users.
   *
   * @param input HTML
   * @return String with all pairs of <code>'''</code> replaced by \<strong\>\</strong\> tags
   */
  public static String makeHtmlStrong (final String input) {
    // If no Pattern in "input" parameter, then do nothing.
    if (StringUtils.isBlank(input)
        || (! strongPattern.matcher(input).find())) {
      return input;
    }

    String transformedInput = input; // This will be the String that gets returned.
    boolean isInsideATag = false; // Whether an open tag was the most recent substitution.

    // While there is Pattern in "input" parameter, replace each instance of Pattern with
    //   either an open or close tag.  Alternate the tag substituted to give tag pairs.
    while (strongPattern.matcher(transformedInput).find()) {
      if (! isInsideATag) {
        transformedInput = strongPattern.matcher(transformedInput).replaceFirst("<strong>");
        isInsideATag = true;
      } else {
        transformedInput = strongPattern.matcher(transformedInput).replaceFirst("</strong>");
        isInsideATag = false;
      }
    }

    return transformedInput;
  }

  /**
   * Takes in a String and returns it with all pairs of <code>''</code> replaced by \<em\>\</em\>
   *   tags suitable for display as HTML.
   * <p/>
   * For example: <code>foo ''bar'' baz</code> is transformed into <code>foo \<em\>bar\</em\> baz</code>
   * <p/>
   * The <code>em</code> tag is used instead of the <code>i</code> tag
   *   because <code>em</code> is preferred for CSS styling.
   * <p/>
   * There is no check for consistency of markup pairs (e.g., <code>foo 'bar'' baz</code> will become
   *   <code>foo 'bar\<em\> baz</code>) which will, rightfully, infuriate some users.
   *
   * @param input HTML
   * @return String with all pairs of <code>''</code> replaced by \<em\>\</em\> tags
   */
  public static String makeHtmlEmphasized (final String input) {
    // If no Pattern in "input" parameter, then do nothing.
    if (StringUtils.isBlank(input)
        || (! emphasizePattern.matcher(input).find())) {
      return input;
    }

    String transformedInput = input; // This will be the String that gets returned.
    boolean isInsideATag = false; // Whether an open tag was the most recent substitution.

    // While there is Pattern in "input" parameter, replace each instance of Pattern with
    //   either an open or close tag.  Alternate the tag substituted to give tag pairs.
    while (emphasizePattern.matcher(transformedInput).find()) {
      if (! isInsideATag) {
        transformedInput = emphasizePattern.matcher(transformedInput).replaceFirst("<em>");
        isInsideATag = true;
      } else {
        transformedInput = emphasizePattern.matcher(transformedInput).replaceFirst("</em>");
        isInsideATag = false;
      }
    }

    return transformedInput;
  }

  /**
   * Takes in a String and returns it with all pairs of <code>'''''</code> replaced by
   *   \<strong\>\<em\>\</em\>\</strong\> tags suitable for display as HTML.
   * <p/>
   * For example: <code>foo '''''bar''''' baz</code> is
   *   transformed into <code>foo \<strong\>\<em\>bar\</em\>\</strong\> baz</code>
   * <p/>
   * The <code>em</code> tag is used instead of the <code>i</code> tag
   *   because <code>em</code> is preferred for CSS styling.
   * The <code>strong</code> tag is used instead of the <code>b</code> tag
   *   because <code>strong</code> is preferred for CSS styling.
   * <p/>
   * There is no check for consistency of markup pairs (e.g., <code>foo 'bar''''' baz</code> will become
   *   <code>foo 'bar\<strong\>\<em\> baz</code>) which will, rightfully, infuriate some users.
   *
   * @param input HTML
   * @return String with all pairs of <code>'''''</code> replaced by \<strong\>\<em\>\</em\>\</strong\> tags
   */
  public static String makeHtmlStrongEmphasized (final String input) {
    // If no Pattern in "input" parameter, then do nothing.
    if (StringUtils.isBlank(input)
        || (! strongEmphasizePattern.matcher(input).find())) {
      return input;
    }

    String transformedInput = input; // This will be the String that gets returned.
    boolean isInsideATag = false; // Whether an open tag was the most recent substitution.

    // While there is Pattern in "input" parameter, replace each instance of Pattern with
    //   either an open or close tag.  Alternate the tag substituted to give tag pairs.
    while (strongEmphasizePattern.matcher(transformedInput).find()) {
      if (! isInsideATag) {
        transformedInput = strongEmphasizePattern.matcher(transformedInput).replaceFirst("<strong><em>");
        isInsideATag = true;
      } else {
        transformedInput = strongEmphasizePattern.matcher(transformedInput).replaceFirst("</em></strong>");
        isInsideATag = false;
      }
    }

    return transformedInput;
  }

  /**
   * Takes in a String and returns it with all pairs of <code>^^</code> replaced by \<sup\>\</sup\>
   * tags suitable for display as HTML.
   * <p/>
   * For example: <code>foo ^^bar^^ baz</code> is transformed into
   *   <code>foo \<sup\>bar\</sup\> baz</code>
   * <p/>
   * There is no check for consistency of markup pairs (e.g., <code>foo ^bar^^ baz</code> will become
   *   <code>foo ^bar\<sup\> baz</code>) which will, rightfully, infuriate some users.
   *
   * @param input HTML
   * @return String with all pairs of <code>^^</code> replaced by \<sup\>\</sup\> tags
   */
  public static String makeHtmlSuperscript (final String input) {
    // If no Pattern in "input" parameter, then do nothing.
    if (StringUtils.isBlank(input)
        || (! superscriptPattern.matcher(input).find())) {
      return input;
    }

    String transformedInput = input; // This will be the String that gets returned.
    boolean isInsideATag = false; // Whether an open tag was the most recent substitution.

    // While there is Pattern in "input" parameter, replace each instance of Pattern with
    //   either an open or close tag.  Alternate the tag substituted to give tag pairs.
    while (superscriptPattern.matcher(transformedInput).find()) {
      if (! isInsideATag) {
        transformedInput = superscriptPattern.matcher(transformedInput).replaceFirst("<sup>");
        isInsideATag = true;
      } else {
        transformedInput = superscriptPattern.matcher(transformedInput).replaceFirst("</sup>");
        isInsideATag = false;
      }
    }

    return transformedInput;
  }

  /**
   * Takes in a String and returns it with all pairs of <code>~~</code> replaced by \<sub\>\</sub\>
   * tags suitable for display as HTML.
   * <p/>
   * For example: <code>foo ~~bar~~ baz</code> is transformed into
   *   <code>foo \<sub\>bar\</sub\> baz</code>
   * <p/>
   * There is no check for consistency of markup pairs (e.g., <code>foo ~bar~~ baz</code> will become
   *   <code>foo ~bar\<sub\> baz</code>) which will, rightfully, infuriate some users.
   *
   * @param input HTML
   * @return String with all pairs of <code>~~</code> replaced by \<sub\>\</sub\> tags
   */
  public static String makeHtmlSubscript (final String input) {
    // If no Pattern in "input" parameter, then do nothing.
    if (StringUtils.isBlank(input)
        || (! subscriptPattern.matcher(input).find())) {
      return input;
    }

    String transformedInput = input; // This will be the String that gets returned.
    boolean isInsideATag = false; // Whether an open tag was the most recent substitution.

    // While there is Pattern in "input" parameter, replace each instance of Pattern with
    //   either an open or close tag.  Alternate the tag substituted to give tag pairs.
    while (subscriptPattern.matcher(transformedInput).find()) {
      if (! isInsideATag) {
        transformedInput = subscriptPattern.matcher(transformedInput).replaceFirst("<sub>");
        isInsideATag = true;
      } else {
        transformedInput = subscriptPattern.matcher(transformedInput).replaceFirst("</sub>");
        isInsideATag = false;
      }
    }

    return transformedInput;
  }

  /**
   * Linkify any possible web links excepting email addresses and enclosed with <p> tags
   * @param text text
   * @param maxLength The max length (in displayed characters) of the text to be displayed inside the <a>tag</a>
   * @return hyperlinked text
   */
  public static String hyperlinkEnclosedWithPTags(final String text, int maxLength) {
    final StringBuilder retStr = new StringBuilder("<p>");
    retStr.append(hyperlink(text, maxLength));
    retStr.append("</p>");
    return (retStr.toString());
  }

  /**
   * Linkify any possible web links excepting email addresses and enclosed with <p> tags
   * @param text text
   * @return hyperlinked text
   */
  public static String hyperlinkEnclosedWithPTags(final String text) {
    return hyperlinkEnclosedWithPTags(text, 0);
  }

  /**
   * Linkify any possible web links excepting email addresses
   *
   * @param text      text
   * @param maxLength The max length (in displayed characters) of the text to be displayed
   *                  inside the <a>tag</a>
   * @return hyperlinked text
   */
  public static String hyperlink(final String text, int maxLength) {
    if (StringUtils.isBlank(text)) {
      return text;
    }
    /*
     * HACK: [issue - if the text ends with ')' this is included in the hyperlink] 
     * so to avoid this we explicitly guard against it here 
     * NOTE: com.opensymphony.util.TextUtils.linkURL guards against an atomically wrapped url: 
     * "(http://www.domain.com)" but NOT "(see http://www.domain.com)"
     */
    if (text.indexOf('}') >= 0 || text.indexOf('{') >= 0) {
      return linkURL(text, null, maxLength);
    }
    String s = text.replace('(', '{');
    s = s.replace(')', '}');
    s = linkURL(s, null, maxLength);
    s = StringUtils.replace(s, "{", "(");
    s = StringUtils.replace(s, "}", ")");
    return s;
    // END HACK
  }

  /**
   * Linkify any possible web links excepting email addresses
   *
   * @param text text
   * @return hyperlinked text
   */
  public static String hyperlink(final String text) {
    return hyperlink(text, 0);
  }

  /**
   * Return the escaped html. Useful when you want to make any dangerous scripts safe to render.
   * <p/>
   * Also transforms wiki-type markup into HTML tags and replaces line breaks with HTML "break" tags.
   *
   * @param bodyContent bodyContent
   * @return escaped html text
   */
  public static String escapeHtml(final String bodyContent) {
    String transformedBodyContent = makeHtmlLineBreaks(StringEscapeUtils.escapeHtml(bodyContent));

    // The order of these three methods is important; we have to transform all instances of
    //   ''''' before trying to match instances of ''' or ''
    transformedBodyContent = makeHtmlStrongEmphasized(transformedBodyContent); // matches '''''
    transformedBodyContent = makeHtmlStrong(transformedBodyContent); // matches '''
    transformedBodyContent = makeHtmlEmphasized(transformedBodyContent); // matches ''

    transformedBodyContent = makeHtmlSuperscript(transformedBodyContent); // matches ^^
    transformedBodyContent = makeHtmlSubscript(transformedBodyContent); // matches ~~

    return transformedBodyContent;
  }

  /**
   * @param bodyContent bodyContent
   * @return Return escaped and hyperlinked text
   */
  public static String escapeAndHyperlink(final String bodyContent) {
    return hyperlinkEnclosedWithPTags(escapeHtml(bodyContent),0);
  }

  /**
   * Transforms an org.w3c.dom.Document into a String
   *
   * @param node Document to transform
   * @return String representation of node
   * @throws TransformerException TransformerException
   */
  public static String getAsXMLString(final Node node) throws TransformerException {
    final Transformer tf = TransformerFactory.newInstance().newTransformer();
    final StringWriter stringWriter = new StringWriter();

    tf.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "yes");
    tf.transform(new DOMSource(node), new StreamResult(stringWriter));

    return stringWriter.toString();
  }

  /**
   * @param url A URL
   * @return whether the url is a valid address
   */
  public static boolean verifyUrl(final String url) {
    try {
      URI u = new URI(url);

      // To see if we can get a valid url or if we get an exception
      u.toURL();
      return true;
    } catch (Exception e) {
      return false;
    }
  }

  /**
   * Make a valid url from the given input url or url fragment
   * @param url url
   * @return valid url
   * @throws MalformedURLException MalformedURLException
   */
  public static String makeValidUrl(final String url) throws MalformedURLException {
    String finalUrl = url;
    if (!verifyUrl(finalUrl)) {
      finalUrl = HTTP_PREFIX + finalUrl;
      if (!verifyUrl(finalUrl)) {
        throw new MalformedURLException("Invalid url:" + url);
      }
    }
    return finalUrl;
  }

  /**
   * Check if the input text is potentially malicious. For more details read;
   * http://www.dwheeler.com/secure-programs/Secure-Programs-HOWTO/cross-site-malicious-content.html
   * @param text text
   * @return boolean
   */
  public static boolean isPotentiallyMalicious(final String text) {
    return maliciousContentPattern.matcher(text).find();
  }

  /**
   * Escape html entity characters and high characters (eg "curvy" Word quotes).
   * Note this method can also be used to encode XML.
   *
   * @param s                  the String to escape.
   * @param encodeSpecialChars if true high characters will be encode other wise not.
   * @return the escaped string
   */
  private static String htmlEncode(String s, boolean encodeSpecialChars) {
    s = noNull(s, "");

    StringBuilder str = new StringBuilder();

    for (int j = 0; j < s.length(); j++) {
      char c = s.charAt(j);

      // encode standard ASCII characters into HTML entities where needed
      if (c < '\200') {
        switch (c) {
          case '"':
            str.append(""");

            break;

          case '&':
            str.append("&");

            break;

          case '<':
            str.append("<");

            break;

          case '>':
            str.append(">");

            break;

          default:
            str.append(c);
        }
      }
      // encode 'ugly' characters (ie Word "curvy" quotes etc)
      else if (encodeSpecialChars && (c < '\377')) {
        String hexChars = "0123456789ABCDEF";
        int a = c % 16;
        int b = (c - a) / 16;
        str.append("&#x")
           .append(hexChars.charAt(b))
           .append(hexChars.charAt(a))
           .append(';');
      }
      //add other characters back in - to handle charactersets
      //other than ascii
      else {
        str.append(c);
      }
    }

    return str.toString();
  }

  /**
   * Wrap all urls ('abc://' and 'www.abc') in specified string with href tags.
   * Any text after the length defined by the maxDisplayLength parameter will be dropped and three periods will be added "..."
   *
   * @param str The block of text to check.
   * @param target The target to use for the href (optional).
   * @param maxDisplayLength The max length (in displayed characters) of the text to be displayed inside the <a>tag</a>
   * @return String The block of text with all url's placed in href tags.
   */
  //TODO: If openSymphony's implemntation of this method one day mactches this, we can remove this class
  private static String linkURL(String str, String target, int maxDisplayLength) {
    StringBuilder sb = new StringBuilder((int) (str.length() * 1.05));
    sb.append(str);
    linkURL(sb, target, maxDisplayLength);
    return sb.toString();
  }

  /**
   * Return <code>string</code>, or <code>defaultString</code> if
   * <code>string</code> is <code>null</code> or <code>""</code>.
   * Never returns <code>null</code>.
   *
   * <p>Examples:</p>
   * <pre>
   * // prints "hello"
   * String s=null;
   * System.out.println(TextUtils.noNull(s,"hello");
   *
   * // prints "hello"
   * s="";
   * System.out.println(TextUtils.noNull(s,"hello");
   *
   * // prints "world"
   * s="world";
   * System.out.println(TextUtils.noNull(s, "hello");
   * </pre>
   *
   * @param string the String to check.
   * @param defaultString The default string to return if <code>string</code> is <code>null</code> or <code>""</code>
   * @return <code>string</code> if <code>string</code> is non-empty, and <code>defaultString</code> otherwise
   * @see #stringSet(String)
   */
  private static String noNull(String string, String defaultString) {
    return (stringSet(string)) ? string : defaultString;
  }

  /**
   * Check whether <code>string</code> has been set to
   * something other than <code>""</code> or <code>null</code>.
   * @param string the <code>String</code> to check
   * @return a boolean indicating whether the string was non-empty (and non-null)
   */
  private static boolean stringSet(String string) {
    return (string != null) && !"".equals(string);
  }

  /**
   * Get the starting index of a URL (either 'abc://' or 'www.')
   * @param str String builder
   * @param startIndex index
   * @return new index
   */
  private static int getStartUrl(StringBuilder str, int startIndex) {
    int schemeIndex = getSchemeIndex(str, startIndex);
    final int wwwIndex = str.indexOf("www.", startIndex + 1);

    if ((schemeIndex == -1) && (wwwIndex == -1)) {
      return -1;
    } else if (schemeIndex == -1) {
      return wwwIndex;
    } else if (wwwIndex == -1) {
      return schemeIndex;
    }

    return Math.min(schemeIndex, wwwIndex);
  }

  private static void linkURL(StringBuilder str, String target, int maxDisplayLength) {
    String urlToDisplay;

    int lastEndIndex = -1; //Stores the index position, within the whole string, of the ending char of the last URL found.

    String targetString = ((target == null) || (target.trim().length() == 0)) ? "" : (" target=\"" + target.trim() + '\"');

    while (true) {
      int linkStartIndex = getStartUrl(str, lastEndIndex);

      //if no more links found - then end the loop
      if (linkStartIndex == -1) {
        break;
      } else {
        //Get the whole URL...
        //We move forward and add each character to the URL string until we encounter
        //an invalid URL character (we assume that the URL ends there).
        int linkEndIndex = linkStartIndex;
        String urlStr = "";

        while (true) {
          // if char at linkEndIndex is '&' then we look at the next 4 chars
          // to see if they make up "&" altogether. This is the html coded
          // '&' and will pretty much stuff up an otherwise valid link becos of the ';'.
          // We therefore have to remove it before proceeding...
          if (str.charAt(linkEndIndex) == '&') {
            if (((linkEndIndex + 6) <= str.length()) && """.equals(str.substring(linkEndIndex, linkEndIndex + 6))) {
              break;
            } else if (((linkEndIndex + 5) <= str.length()) && "&".equals(str.substring(linkEndIndex, linkEndIndex + 5))) {
              str.replace(linkEndIndex, linkEndIndex + 5, "&");
            }
          }

          if (UrlUtils.isValidURLChar(str.charAt(linkEndIndex))) {
            urlStr += str.charAt(linkEndIndex);
            linkEndIndex++;

            if (linkEndIndex == str.length()) { //Reached end of str...

              break;
            }
          } else {
            break;
          }
        }

        //if the characters before the linkStart equal 'href="' then don't link the url - CORE-44
        if (linkStartIndex >= 6) { //6 = "href\"".length()

          String prefix = str.substring(linkStartIndex - 6, linkStartIndex);

          if ("href=\"".equals(prefix)) {
            lastEndIndex = linkEndIndex;

            continue;
          }
        }

        //if the characters after the linkEnd are '</a>' then this url is probably already linked - CORE-44
        if (str.length() >= (linkEndIndex + 4)) { //4 = "</a>".length()

          String suffix = str.substring(linkEndIndex, linkEndIndex + 4);

          if ("</a>".equals(suffix)) {
            lastEndIndex = linkEndIndex + 4;

            continue;
          }
        }

        //Decrement linkEndIndex back by 1 to reflect the real ending index position of the URL...
        linkEndIndex--;

        // If the last char of urlStr is a '.' we exclude it. It is most likely a full stop and
        // we don't want that to be part of an url.
        while (true) {
          char lastChar = urlStr.charAt(urlStr.length() - 1);

          if (lastChar == '.') {
            urlStr = urlStr.substring(0, urlStr.length() - 1);
            linkEndIndex--;
          } else {
            break;
          }
        }

        //if the URL had a '(' before it, and has a ')' at the end, trim the last ')' from the url
        //ie '(www.opensymphony.com)' => '(<a href="http://www.openymphony.com/">www.opensymphony.com</a>)'
        char lastChar = urlStr.charAt(urlStr.length() - 1);

        if (lastChar == ')') {
          if ((linkStartIndex > 0) && ('(' == (str.charAt(linkStartIndex - 1)))) {
            urlStr = urlStr.substring(0, urlStr.length() - 1);
            linkEndIndex--;
          }
        } else if (lastChar == '\'') {
          if ((linkStartIndex > 0) && ('\'' == (str.charAt(linkStartIndex - 1)))) {
            urlStr = urlStr.substring(0, urlStr.length() - 1);
            linkEndIndex--;
          }
        }
        //perhaps we ended with '>', '<' or '"'
        //We need to strip these
        //ie '"www.opensymphony.com"' => '"<a href="http://www.openymphony.com/">www.opensymphony.com</a>"'
        //ie '<www.opensymphony.com>' => '<<a href="http://www.openymphony.com/">www.opensymphony.com</a>>'
        else if (lastChar == ';') {
          // 6 = """.length()
          if ((urlStr.length() > 6) && """.equalsIgnoreCase(urlStr.substring(urlStr.length() - 6))) {
            urlStr = urlStr.substring(0, urlStr.length() - 6);
            linkEndIndex -= 6;
          }
          // 4 = "<".length()  || ">".length()
          else if (urlStr.length() > 4) {
            final String endingStr = urlStr.substring(urlStr.length() - 4);

            if ("<".equalsIgnoreCase(endingStr) || ">".equalsIgnoreCase(endingStr)) {
              urlStr = urlStr.substring(0, urlStr.length() - 4);
              linkEndIndex -= 4;
            }
          }
        }

        // we got the URL string, now we validate it and convert it into a hyperlink...

        if (maxDisplayLength > 0 && urlStr.length() > maxDisplayLength) {
          urlToDisplay = htmlEncode(urlStr.substring(0, maxDisplayLength), true) + "...";
        } else {
          urlToDisplay = htmlEncode(urlStr, true);
        }

        if (urlStr.toLowerCase().startsWith("www.")) {
          urlStr = "http://" + urlStr;
        }

        if (UrlUtils.verifyHierachicalURI(urlStr)) {
          //Construct the hyperlink for the url...
          String urlLink;

          if (maxDisplayLength > 0 && urlStr.length() > maxDisplayLength) {
            //urlLink = "<a href=\"" + urlStr + "\"" + targetString + ">" + urlToDisplay + "</a>";
            urlLink = "<a href=\"" + urlStr + "\"" + targetString + " title=\"" + htmlEncode(urlStr, true) + "\">" + urlToDisplay + "</a>";
          } else {
            urlLink = "<a href=\"" + urlStr + "\"" + targetString + ">" + urlToDisplay + "</a>";
          }

          //urlLink = "<a href=\"" + urlStr + '\"' + targetString + '>' + urlToDisplay + "</a>";

          //Remove the original urlStr from str and put urlLink there instead...
          str.replace(linkStartIndex, linkEndIndex + 1, urlLink);

          //Set lastEndIndex to reflect the position of the end of urlLink
          //within the whole string...
          lastEndIndex = (linkStartIndex - 1) + urlLink.length();
        } else {
          //lastEndIndex is different from the one above cos' there's no
          //<a href...> tags added...
          lastEndIndex = (linkStartIndex - 1) + urlStr.length();
        }
      }
    }
  }

  /**
     * Given a string, and the index to start looking at, find the index of the start of the scheme. Eg.
   * <pre>
   * getSchemeIndex("notes://abc", 0) -> 0
   * getSchemeIndex("abc notes://abc", 0) -> 4
   * </pre>
   * @param str    The string to search for
   * @param startIndex   Where to start looking at
   * @return The location the string was found, ot -1 if the string was not found.
   */
  private static int getSchemeIndex(StringBuilder str, int startIndex) {
    int schemeIndex = str.indexOf(UrlUtils.SCHEME_URL, startIndex + 1);

    //if it was not found, or found at the start of the string, then return 'not found'
    if (schemeIndex <= 0) {
      return -1;
    }

    //walk backwards through the scheme until we find the first non valid character
    int schemeStart;

    for (schemeStart = schemeIndex - 1; schemeStart >= 0; schemeStart--) {
      char currentChar = str.charAt(schemeStart);

      if (!UrlUtils.isValidSchemeChar(currentChar)) {
        break;
      }
    }

    //reset the scheme to the starting character
    schemeStart++;

    /*
         we don't want to do this, otherwise an invalid scheme would ruin the linking for later schemes
                if (UrlUtils.isValidScheme(str.substring(schemeStart, schemeIndex)))
                    return schemeStart;
                else
                    return -1;
    */
    return schemeStart;
  }

  /**
   * Remove all of the XML and HTML tags from the <code>s</code> parameter.
   * The RegEx in this method removes everything between two "innermost" brackets
   * (e.g., <code><...></code>) so
   * it may accidentally remove sections of text that are not tags, just because both the
   * "greater than" and "less than" symbols exist and there is no tag bewteen them.
   * <p/>
   * For instance, the title: "Yak mass < whale mass, but yak mass > weasel mass" would
   * be reduced to: "Yak mass  weasel mass" which is very much not the desired result.
   * That is why this method is prefaced with the lable "simple".
   * <p/>
   * Note that the above example only fails because there is no tag between the
   * < and > for this method to remove.
   * If the title was, instead, "Yak mass < whale mass, <p>but yak mass > weasel mass",
   * then the <p> tag would be removed and the rest of the title would be left alone.
   *
   * TODO: Augment the RegEx to fix the above corner case.  This can be accomplished by ensuring
   *   todo: all openning tags have matching closing tags, then handling valid singleton tags (e.g.,
   *   todo: <p/>) as special cases.
   *
   * @param s The String which will have all of its tags removed
   * @return The <code>s</code> parameter with all tags removed
   */
  public static String simpleStripAllTags(String s) {
    return s.replaceAll("<[^<>]*?>", "");
  }

  /**
   * Transform a xml string to html text
   * @param xmlContent xml
   * @return html html text
   */
  public static String transformXMLtoHtmlText(String xmlContent)
  {
    if(xmlContent != null) {
      String htmlContent = "";

      try {
        DocumentBuilder db = DocumentBuilderFactory.newInstance().newDocumentBuilder();
        // surround the xml content with temporary root element to make sure that it can be parsed.
        InputSource source = new InputSource(new StringReader("<temprootelement>" + xmlContent +  "</temprootelement>"));
        Document doc = db.parse(source);

        // remove all the elements from the xml content
        StringWriter stw = new StringWriter();
        Transformer transformer = TransformerFactory.newInstance().newTransformer();
        transformer.setOutputProperty(OutputKeys.METHOD, "text");
        transformer.transform(new DOMSource(doc), new StreamResult(stw));

        htmlContent = stw.toString();
        // make sure all the characters are escaped using html entities
        htmlContent = StringEscapeUtils.escapeHtml(htmlContent);

      } catch (Exception e) {
        log.info("Failed to transform " + xmlContent + " to html text", e);
      }

      return htmlContent;
    } else {
      return "";
    }
  }

  /**
   * truncate text
   * @param text text to truncate
   * @param truncatedLength truncate length
   * @return truncated text
   */
  public static String truncateText(String text, int truncatedLength) {
    if (StringUtils.isBlank(text)) {
      return text;
    }

    if (text.length() > truncatedLength) {
      final String abrsfx = "...";
      final int abrsfxlen = 3;
      // attempt to truncate on a word boundary
      int index = truncatedLength - 1;

      while (!Character.isWhitespace(text.charAt(index)) ||
          index > (truncatedLength - abrsfxlen - 1)) {
        if (--index == 0) {
          break;
        }
      }

      if (index == 0) {
        index = truncatedLength - abrsfxlen - 1;
      }

      text = text.substring(0, index) + abrsfx;
      assert text.length() <= truncatedLength;
    }

    return text;
  }

  /**
   * truncate text and close open tags
   * @param text text to truncate
   * @param truncatedLength truncate length
   * @return truncated text
   */
  public static String truncateTextCloseOpenTag(String text, final int truncatedLength) {
    String shortenedText = truncateText(text, truncatedLength);
    int openIndex = shortenedText.lastIndexOf("<i>");
    if (openIndex != -1) {
      int closeIndex = shortenedText.indexOf("</i>", openIndex);
      if (closeIndex == -1) {
        shortenedText = shortenedText + "</i>";
      }
    }
    return shortenedText;
  }

  /**
   * Create a list of first, second and last authors
   *
   * @param authors the list of authors
   *
   * @return a combined string of first, second and last authors
   */
  public static String makeAuthorString(String[] authors) {
    if (authors.length <= 3) {
      return StringUtils.join(authors, ", ");
    }
    else {
      //use first two and last.
      return authors[0].trim() + ", " + authors[1].trim() + ", [...], " + authors[authors.length-1].trim();
    }
  }
}