/* LanguageTool, a natural language style checker * Copyright (C) 2011 Daniel Naber (http://www.danielnaber.de) * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 * USA */ package org.languagetool.dev.wikipedia; import org.junit.Test; import org.languagetool.language.German; import java.io.IOException; import java.net.URL; import java.util.List; import static org.hamcrest.CoreMatchers.is; import static org.junit.Assert.*; public class WikipediaQuickCheckTest { // only for interactive use, as it accesses a remote API public void noTestCheckPage() throws IOException, PageNotFoundException { WikipediaQuickCheck check = new WikipediaQuickCheck(); //String url = "http://de.wikipedia.org/wiki/Benutzer_Diskussion:Dnaber"; //String url = "http://de.wikipedia.org/wiki/OpenThesaurus"; //String url = "http://de.wikipedia.org/wiki/Gütersloh"; //String url = "http://de.wikipedia.org/wiki/Bielefeld"; String url = "https://de.wikipedia.org/wiki/Augsburg"; MarkupAwareWikipediaResult result = check.checkPage(new URL(url)); List<AppliedRuleMatch> appliedMatches = result.getAppliedRuleMatches(); System.out.println("ruleApplications: " + appliedMatches.size()); for (AppliedRuleMatch appliedMatch : appliedMatches) { System.out.println("====="); System.out.println("Rule : " + appliedMatch.getRuleMatch().getRule().getDescription() + "\n"); for (RuleMatchApplication ruleMatchApplication : appliedMatch.getRuleMatchApplications()) { System.out.println("Original : " + ruleMatchApplication.getOriginalErrorContext(10).replace("\n", " ")); if (ruleMatchApplication.hasRealReplacement()) { System.out.println("Corrected: " + ruleMatchApplication.getCorrectedErrorContext(10).replace("\n", " ")); } System.out.println(); } } } @Test public void testCheckWikipediaMarkup() throws IOException { WikipediaQuickCheck check = new WikipediaQuickCheck(); String markup = "== Beispiele ==\n\n" + "Eine kleine Auswahl von Fehlern.\n\n" + "Das Komma ist richtig, wegen dem Leerzeichen."; MediaWikiContent wikiContent = new MediaWikiContent(markup, "2012-11-11T20:00:00"); ErrorMarker errorMarker = new ErrorMarker("<err>", "</err>"); MarkupAwareWikipediaResult result = check.checkWikipediaMarkup(new URL("http://fake-url.org"), wikiContent, new German(), errorMarker); assertThat(result.getLastEditTimestamp(), is("2012-11-11T20:00:00")); List<AppliedRuleMatch> appliedMatches = result.getAppliedRuleMatches(); // even though this error has no suggestion, there's a (pseudo) correction: assertThat(appliedMatches.size(), is(1)); AppliedRuleMatch firstAppliedMatch = appliedMatches.get(0); assertThat(firstAppliedMatch.getRuleMatchApplications().size(), is(1)); RuleMatchApplication ruleMatchApplication = firstAppliedMatch.getRuleMatchApplications().get(0); assertTrue("Got: " + ruleMatchApplication.getTextWithCorrection(), ruleMatchApplication.getTextWithCorrection().contains("<err>wegen dem</err> Leerzeichen.")); assertThat(ruleMatchApplication.getOriginalErrorContext(12), is("st richtig, <err>wegen dem</err> Leerz")); assertThat(ruleMatchApplication.getCorrectedErrorContext(12), is("st richtig, <err>wegen dem</err> Leerz")); } @Test public void testGetPlainText() { WikipediaQuickCheck check = new WikipediaQuickCheck(); String filteredContent = check.getPlainText( "<?xml version=\"1.0\"?><api><query><normalized><n from=\"Benutzer_Diskussion:Dnaber\" to=\"Benutzer Diskussion:Dnaber\" />" + "</normalized><pages><page pageid=\"143424\" ns=\"3\" title=\"Benutzer Diskussion:Dnaber\"><revisions><rev xml:space=\"preserve\">\n" + "Test [[Link]] Foo&nbsp;bar.\n" + "</rev></revisions></page></pages></query></api>"); assertEquals("Test Link Foo\u00A0bar.", filteredContent); } @Test public void testGetPlainTextMapping() { WikipediaQuickCheck check = new WikipediaQuickCheck(); String text = "Test [[Link]] und [[AnotherLink|noch einer]] und [http://test.org external link] Foo&nbsp;bar.\n"; PlainTextMapping filteredContent = check.getPlainTextMapping( "<?xml version=\"1.0\"?><api><query><normalized><n from=\"Benutzer_Diskussion:Dnaber\" to=\"Benutzer Diskussion:Dnaber\" />" + "</normalized><pages><page pageid=\"143424\" ns=\"3\" title=\"Benutzer Diskussion:Dnaber\"><revisions><rev xml:space=\"preserve\">" + text + "</rev></revisions></page></pages></query></api>"); assertEquals("Test Link und noch einer und external link Foo\u00A0bar.", filteredContent.getPlainText()); assertEquals(1, filteredContent.getOriginalTextPositionFor(1).line); assertEquals(1, filteredContent.getOriginalTextPositionFor(1).column); assertEquals(filteredContent.getPlainText().charAt(0), text.charAt(0)); assertEquals('u', text.charAt(14)); // note that these are zero-based, the others are not assertEquals('u', filteredContent.getPlainText().charAt(10)); assertEquals(1, filteredContent.getOriginalTextPositionFor(11).line); assertEquals(15, filteredContent.getOriginalTextPositionFor(11).column); } @Test public void testGetPlainTextMappingMultiLine1() { WikipediaQuickCheck check = new WikipediaQuickCheck(); String text = "Test [[Link]] und [[AnotherLink|noch einer]].\nUnd [[NextLink]] Foobar.\n"; PlainTextMapping filteredContent = check.getPlainTextMapping( "<?xml version=\"1.0\"?><api><query><normalized><n from=\"Benutzer_Diskussion:Dnaber\" to=\"Benutzer Diskussion:Dnaber\" />" + "</normalized><pages><page pageid=\"143424\" ns=\"3\" title=\"Benutzer Diskussion:Dnaber\"><revisions><rev xml:space=\"preserve\">" + text + "</rev></revisions></page></pages></query></api>"); assertEquals("Test Link und noch einer. Und NextLink Foobar.", filteredContent.getPlainText()); assertEquals(1, filteredContent.getOriginalTextPositionFor(1).line); assertEquals(1, filteredContent.getOriginalTextPositionFor(1).column); assertEquals(filteredContent.getPlainText().charAt(0), text.charAt(0)); assertEquals('U', text.charAt(46)); // note that these are zero-based, the others are not assertEquals(' ', filteredContent.getPlainText().charAt(25)); assertEquals('U', filteredContent.getPlainText().charAt(26)); assertEquals(2, filteredContent.getOriginalTextPositionFor(27).line); assertEquals(45, filteredContent.getOriginalTextPositionFor(25).column); assertEquals(1, filteredContent.getOriginalTextPositionFor(26).column); assertEquals(2, filteredContent.getOriginalTextPositionFor(27).column); } @Test public void testGetPlainTextMappingMultiLine2() { WikipediaQuickCheck check = new WikipediaQuickCheck(); String text = "Test [[Link]] und [[AnotherLink|noch einer]].\n\nUnd [[NextLink]] Foobar.\n"; PlainTextMapping filteredContent = check.getPlainTextMapping( "<?xml version=\"1.0\"?><api><query><normalized><n from=\"Benutzer_Diskussion:Dnaber\" to=\"Benutzer Diskussion:Dnaber\" />" + "</normalized><pages><page pageid=\"143424\" ns=\"3\" title=\"Benutzer Diskussion:Dnaber\"><revisions><rev xml:space=\"preserve\">" + text + "</rev></revisions></page></pages></query></api>"); assertEquals("Test Link und noch einer.\n\nUnd NextLink Foobar.", filteredContent.getPlainText()); assertEquals(1, filteredContent.getOriginalTextPositionFor(1).line); assertEquals(1, filteredContent.getOriginalTextPositionFor(1).column); assertEquals(filteredContent.getPlainText().charAt(0), text.charAt(0)); assertEquals('U', text.charAt(47)); // note that these are zero-based, the others are not assertEquals('U', filteredContent.getPlainText().charAt(27)); assertEquals(3, filteredContent.getOriginalTextPositionFor(28).line); assertEquals(45, filteredContent.getOriginalTextPositionFor(25).column); assertEquals(46, filteredContent.getOriginalTextPositionFor(26).column); assertEquals(47, filteredContent.getOriginalTextPositionFor(27).column); assertEquals(1, filteredContent.getOriginalTextPositionFor(28).column); } @Test public void testRemoveInterLanguageLinks() { WikipediaQuickCheck check = new WikipediaQuickCheck(); assertEquals("foo bar", check.removeWikipediaLinks("foo [[pt:Some Article]] bar")); assertEquals("foo [[some link]] bar", check.removeWikipediaLinks("foo [[some link]] bar")); assertEquals("foo [[Some Link]] bar ", check.removeWikipediaLinks("foo [[Some Link]] bar [[pt:Some Article]]")); assertEquals("foo [[zh-min-nan:Linux]] bar", check.removeWikipediaLinks("foo [[zh-min-nan:Linux]] bar")); // known limitation assertEquals("[[Scultura bronzea di Gaudí mentre osserva il suo ''[[Il Capriccio|Capriccio]]'']]", check.removeWikipediaLinks("[[File:Gaudì-capriccio.JPG|thumb|left|Scultura bronzea di Gaudí mentre osserva il suo ''[[Il Capriccio|Capriccio]]'']]")); assertEquals("[[[[Palau de la Música Catalana]], entrada]]", check.removeWikipediaLinks("[[Fitxer:Palau_de_musica_2.JPG|thumb|[[Palau de la Música Catalana]], entrada]]")); assertEquals("foo bar", check.removeWikipediaLinks("foo [[Kategorie:Kurgebäude]] bar")); assertEquals("foo [[''Kursaal Palace'' in San Sebastián]] bar", check.removeWikipediaLinks("foo [[Datei:FestivalSS.jpg|miniatur|''Kursaal Palace'' in San Sebastián]] bar")); assertEquals("[[Yupana, emprat pels [[Inques]].]]", check.removeWikipediaLinks("[[Fitxer:Yupana 1.GIF|thumb|Yupana, emprat pels [[Inques]].]]")); } }