/*
* Copyright 2016 Studentmediene i Trondheim AS
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package no.dusken.momus.service.drive;
import no.dusken.momus.test.AbstractTestRunner;
import org.junit.Test;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.transaction.annotation.Transactional;
import static org.junit.Assert.assertEquals;
@Transactional
public class GoogleDocsTextConverterTest extends AbstractTestRunner {
@Autowired
GoogleDocsTextConverter googleDocsTextConverter;
@Test
public void testDefaultConverting() {
String in = "<html><head><title>testdrive2 - Momus</title><meta content=\"text/html; charset=UTF-8\" http-equiv=\"content-type\"><style type=\"text/css\">ol{margin:0;padding:0}.c2{max-width:468pt;background-color:#ffffff;padding:72pt 72pt 72pt 72pt}.c0{widows:2;orphans:2;direction:ltr}.c1{page-break-after:avoid}.c3{height:11pt}.title{widows:2;padding-top:0pt;line-height:1.15;orphans:2;text-align:left;color:#000000;font-size:21pt;font-family:\"Trebuchet MS\";padding-bottom:0pt;page-break-after:avoid}.subtitle{widows:2;padding-top:0pt;line-height:1.15;orphans:2;text-align:left;color:#666666;font-style:italic;font-size:13pt;font-family:\"Trebuchet MS\";padding-bottom:10pt;page-break-after:avoid}li{color:#000000;font-size:11pt;font-family:\"Arial\"}p{color:#000000;font-size:11pt;margin:0;font-family:\"Arial\"}h1{widows:2;padding-top:10pt;line-height:1.15;orphans:2;text-align:left;color:#000000;font-size:16pt;font-family:\"Trebuchet MS\";padding-bottom:0pt;page-break-after:avoid}h2{widows:2;padding-top:10pt;line-height:1.15;orphans:2;text-align:left;color:#000000;font-size:13pt;font-family:\"Trebuchet MS\";font-weight:bold;padding-bottom:0pt;page-break-after:avoid}h3{widows:2;padding-top:8pt;line-height:1.15;orphans:2;text-align:left;color:#666666;font-size:12pt;font-family:\"Trebuchet MS\";font-weight:bold;padding-bottom:0pt;page-break-after:avoid}h4{widows:2;padding-top:8pt;line-height:1.15;orphans:2;text-align:left;color:#666666;font-size:11pt;text-decoration:underline;font-family:\"Trebuchet MS\";padding-bottom:0pt;page-break-after:avoid}h5{widows:2;padding-top:8pt;line-height:1.15;orphans:2;text-align:left;color:#666666;font-size:11pt;font-family:\"Trebuchet MS\";padding-bottom:0pt;page-break-after:avoid}h6{widows:2;padding-top:8pt;line-height:1.15;orphans:2;text-align:left;color:#666666;font-style:italic;font-size:11pt;font-family:\"Trebuchet MS\";padding-bottom:0pt;page-break-after:avoid}</style></head><body class=\"c2\"><h1 class=\"c0 c1\"><a name=\"h.klimruoas4tf\"></a><span>Tittel</span></h1><h2 class=\"c0 c1\"><a name=\"h.wk14mrzgbisr\"></a><span>Stikktittel</span></h2><h4 class=\"c0 c1\"><a name=\"h.wlk3cuxmle39\"></a><span>Ingress</span></h4><p class=\"c0\"><span>tekst</span></p><h3 class=\"c0 c1\"><a name=\"h.i0585wht33oz\"></a><span>mellomtittel</span></h3><p class=\"c0\"><span>mere tekst</span></p><h3 class=\"c0 c1\"><a name=\"h.m6op3h3ghry3\"></a><span>ny mellomtittel</span></h3><p class=\"c0\"><span>enda mere tekst!</span></p><p class=\"c0 c3\"><span></span></p><p class=\"c0\"><span>pluss et nytt avsnitt her nede!</span></p></body></html>";
String expectedOut = "<h1>Tittel</h1><h2>Stikktittel</h2><h4>Ingress</h4><p>tekst</p><h3>mellomtittel</h3><p>mere tekst</p><h3>ny mellomtittel</h3><p>enda mere tekst!</p><p>pluss et nytt avsnitt her nede!</p>";
assertEquals(expectedOut, googleDocsTextConverter.convert(in));
}
@Test
public void ignoreComments() {
String in = "<html><head><title>testdrive2 - Momus</title><meta content=\"text/html; charset=UTF-8\" http-equiv=\"content-type\"><style type=\"text/css\">ol{margin:0;padding:0}.c4{vertical-align:baseline;color:#000000;font-size:11pt;font-style:normal;font-family:\"Arial\";text-decoration:none;font-weight:normal}.c0{line-height:1.0;padding-top:0pt;text-align:left;direction:ltr;padding-bottom:0pt}.c3{max-width:468pt;background-color:#ffffff;padding:72pt 72pt 72pt 72pt}.c1{widows:2;orphans:2;direction:ltr}.c5{margin:5px;border:1px solid black}.c2{page-break-after:avoid}.c6{height:11pt}.title{widows:2;padding-top:0pt;line-height:1.15;orphans:2;text-align:left;color:#000000;font-size:21pt;font-family:\"Trebuchet MS\";padding-bottom:0pt;page-break-after:avoid}.subtitle{widows:2;padding-top:0pt;line-height:1.15;orphans:2;text-align:left;color:#666666;font-style:italic;font-size:13pt;font-family:\"Trebuchet MS\";padding-bottom:10pt;page-break-after:avoid}li{color:#000000;font-size:11pt;font-family:\"Arial\"}p{color:#000000;font-size:11pt;margin:0;font-family:\"Arial\"}h1{widows:2;padding-top:10pt;line-height:1.15;orphans:2;text-align:left;color:#000000;font-size:16pt;font-family:\"Trebuchet MS\";padding-bottom:0pt;page-break-after:avoid}h2{widows:2;padding-top:10pt;line-height:1.15;orphans:2;text-align:left;color:#000000;font-size:13pt;font-family:\"Trebuchet MS\";font-weight:bold;padding-bottom:0pt;page-break-after:avoid}h3{widows:2;padding-top:8pt;line-height:1.15;orphans:2;text-align:left;color:#666666;font-size:12pt;font-family:\"Trebuchet MS\";font-weight:bold;padding-bottom:0pt;page-break-after:avoid}h4{widows:2;padding-top:8pt;line-height:1.15;orphans:2;text-align:left;color:#666666;font-size:11pt;text-decoration:underline;font-family:\"Trebuchet MS\";padding-bottom:0pt;page-break-after:avoid}h5{widows:2;padding-top:8pt;line-height:1.15;orphans:2;text-align:left;color:#666666;font-size:11pt;font-family:\"Trebuchet MS\";padding-bottom:0pt;page-break-after:avoid}h6{widows:2;padding-top:8pt;line-height:1.15;orphans:2;text-align:left;color:#666666;font-style:italic;font-size:11pt;font-family:\"Trebuchet MS\";padding-bottom:0pt;page-break-after:avoid}</style></head><body class=\"c3\"><h1 class=\"c1 c2\"><a name=\"h.klimruoas4tf\"></a><span>Tittel</span></h1><p class=\"c1\"><span>tekst</span></p><h3 class=\"c1 c2\"><a name=\"h.i0585wht33oz\"></a><span>mellomtittel</span></h3><p class=\"c1\"><span>m</span><span>ere</span><sup><a href=\"#cmnt1\" name=\"cmnt_ref1\">[a]</a></sup><sup><a href=\"#cmnt2\" name=\"cmnt_ref2\">[b]</a></sup><span> tekst</span></p><p class=\"c1 c6\"><span></span></p><p class=\"c1\"><span>pluss et </span><span>nytt avsnitt</span><sup><a href=\"#cmnt3\" name=\"cmnt_ref3\">[c]</a></sup><span> her</span></p><div class=\"c5\"><p class=\"c0\"><a href=\"#cmnt_ref1\" name=\"cmnt1\">[a]</a><span class=\"c4\">bra kommentar</span></p></div><div class=\"c5\"><p class=\"c0\"><a href=\"#cmnt_ref2\" name=\"cmnt2\">[b]</a><span class=\"c4\">enig</span></p></div><div class=\"c5\"><p class=\"c0\"><a href=\"#cmnt_ref3\" name=\"cmnt3\">[c]</a><span class=\"c4\">enda en kommentar</span></p></div></body></html>";
String expectedOut = "<h1>Tittel</h1><p>tekst</p><h3>mellomtittel</h3><p>mere tekst</p><p>pluss et nytt avsnitt her</p>";
assertEquals(expectedOut, googleDocsTextConverter.convert(in));
}
@Test
public void handleSpecialCharacters() {
String in = "<html><head><title>testdrive2 - Momus</title><meta content=\"text/html; charset=UTF-8\" http-equiv=\"content-type\"><style type=\"text/css\">ol{margin:0;padding:0}.c1{widows:2;orphans:2;direction:ltr;page-break-after:avoid}.c2{widows:2;orphans:2;height:11pt;direction:ltr}.c3{widows:2;orphans:2;direction:ltr}.c0{max-width:468pt;background-color:#ffffff;padding:72pt 72pt 72pt 72pt}.title{widows:2;padding-top:0pt;line-height:1.15;orphans:2;text-align:left;color:#000000;font-size:21pt;font-family:\"Trebuchet MS\";padding-bottom:0pt;page-break-after:avoid}.subtitle{widows:2;padding-top:0pt;line-height:1.15;orphans:2;text-align:left;color:#666666;font-style:italic;font-size:13pt;font-family:\"Trebuchet MS\";padding-bottom:10pt;page-break-after:avoid}li{color:#000000;font-size:11pt;font-family:\"Arial\"}p{color:#000000;font-size:11pt;margin:0;font-family:\"Arial\"}h1{widows:2;padding-top:10pt;line-height:1.15;orphans:2;text-align:left;color:#000000;font-size:16pt;font-family:\"Trebuchet MS\";padding-bottom:0pt;page-break-after:avoid}h2{widows:2;padding-top:10pt;line-height:1.15;orphans:2;text-align:left;color:#000000;font-size:13pt;font-family:\"Trebuchet MS\";font-weight:bold;padding-bottom:0pt;page-break-after:avoid}h3{widows:2;padding-top:8pt;line-height:1.15;orphans:2;text-align:left;color:#666666;font-size:12pt;font-family:\"Trebuchet MS\";font-weight:bold;padding-bottom:0pt;page-break-after:avoid}h4{widows:2;padding-top:8pt;line-height:1.15;orphans:2;text-align:left;color:#666666;font-size:11pt;text-decoration:underline;font-family:\"Trebuchet MS\";padding-bottom:0pt;page-break-after:avoid}h5{widows:2;padding-top:8pt;line-height:1.15;orphans:2;text-align:left;color:#666666;font-size:11pt;font-family:\"Trebuchet MS\";padding-bottom:0pt;page-break-after:avoid}h6{widows:2;padding-top:8pt;line-height:1.15;orphans:2;text-align:left;color:#666666;font-style:italic;font-size:11pt;font-family:\"Trebuchet MS\";padding-bottom:0pt;page-break-after:avoid}</style></head><body class=\"c0\"><h1 class=\"c1\"><a name=\"h.klimruoas4tf\"></a><span>Tittel</span></h1><p class=\"c3\"><span>tekst med æøå</span></p><p class=\"c2\"><span></span></p><p class=\"c3\"><span>og så ü og én</span></p><p class=\"c3\"><span>“Dette er quotes!”</span></p></body></html>";
String expectedOut = "<h1>Tittel</h1><p>tekst med æøå</p><p>og så ü og én</p><p>«Dette er quotes!»</p>";
assertEquals(expectedOut, googleDocsTextConverter.convert(in));
}
@Test
public void ignoreInvalidContent() {
String in = "<html><head><title>testdrive2 - Momus</title><meta content=\"text/html; charset=UTF-8\" http-equiv=\"content-type\"><style type=\"text/css\">ol{margin:0;padding:0}.c6{border-bottom-width:1pt;border-top-style:solid;width:234pt;border-right-style:solid;padding:5pt 5pt 5pt 5pt;border-bottom-color:#000000;border-top-width:1pt;border-bottom-style:solid;vertical-align:top;border-top-color:#000000;border-left-color:#000000;border-right-color:#000000;border-left-style:solid;border-right-width:1pt;border-left-width:1pt}.c3{vertical-align:baseline;color:#000000;font-size:11pt;font-style:normal;font-family:\"Arial\";text-decoration:none;font-weight:normal}.c0{line-height:1.0;padding-top:0pt;text-align:left;direction:ltr;padding-bottom:0pt}.c4{widows:2;orphans:2;direction:ltr;page-break-after:avoid}.c2{widows:2;orphans:2;height:11pt;direction:ltr}.c5{widows:2;orphans:2;direction:ltr}.c7{max-width:468pt;background-color:#ffffff;padding:72pt 72pt 72pt 72pt}.c8{margin-right:auto;border-collapse:collapse}.c1{height:0pt}.title{widows:2;padding-top:0pt;line-height:1.15;orphans:2;text-align:left;color:#000000;font-size:21pt;font-family:\"Trebuchet MS\";padding-bottom:0pt;page-break-after:avoid}.subtitle{widows:2;padding-top:0pt;line-height:1.15;orphans:2;text-align:left;color:#666666;font-style:italic;font-size:13pt;font-family:\"Trebuchet MS\";padding-bottom:10pt;page-break-after:avoid}li{color:#000000;font-size:11pt;font-family:\"Arial\"}p{color:#000000;font-size:11pt;margin:0;font-family:\"Arial\"}h1{widows:2;padding-top:10pt;line-height:1.15;orphans:2;text-align:left;color:#000000;font-size:16pt;font-family:\"Trebuchet MS\";padding-bottom:0pt;page-break-after:avoid}h2{widows:2;padding-top:10pt;line-height:1.15;orphans:2;text-align:left;color:#000000;font-size:13pt;font-family:\"Trebuchet MS\";font-weight:bold;padding-bottom:0pt;page-break-after:avoid}h3{widows:2;padding-top:8pt;line-height:1.15;orphans:2;text-align:left;color:#666666;font-size:12pt;font-family:\"Trebuchet MS\";font-weight:bold;padding-bottom:0pt;page-break-after:avoid}h4{widows:2;padding-top:8pt;line-height:1.15;orphans:2;text-align:left;color:#666666;font-size:11pt;text-decoration:underline;font-family:\"Trebuchet MS\";padding-bottom:0pt;page-break-after:avoid}h5{widows:2;padding-top:8pt;line-height:1.15;orphans:2;text-align:left;color:#666666;font-size:11pt;font-family:\"Trebuchet MS\";padding-bottom:0pt;page-break-after:avoid}h6{widows:2;padding-top:8pt;line-height:1.15;orphans:2;text-align:left;color:#666666;font-style:italic;font-size:11pt;font-family:\"Trebuchet MS\";padding-bottom:0pt;page-break-after:avoid}</style></head><body class=\"c7\"><p class=\"c4 title\"><a name=\"h.5mgvuy87q6ds\"></a><span>Supertittel</span></p><h1 class=\"c4\"><a name=\"h.t33j45bsjg6t\"></a><span>Tittel</span></h1><p class=\"c4 subtitle\"><a name=\"h.ue67lsjbbitu\"></a><span>subtitle</span></p><p class=\"c2\"><span></span></p><a href=\"#\" name=\"097dc335d7721ecbc3f98b8b9481c555cb82eec5\"></a><a href=\"#\" name=\"0\"></a><table cellpadding=\"0\" cellspacing=\"0\" class=\"c8\"><tbody><tr class=\"c1\"><td class=\"c6\" colspan=\"1\" rowspan=\"1\"><p class=\"c0\"><span class=\"c3\">table</span></p></td><td class=\"c6\" colspan=\"1\" rowspan=\"1\"><p class=\"c0\"><span class=\"c3\">table</span></p></td></tr><tr class=\"c1\"><td class=\"c6\" colspan=\"1\" rowspan=\"1\"><p class=\"c0\"><span class=\"c3\">table</span></p></td><td class=\"c6\" colspan=\"1\" rowspan=\"1\"><p class=\"c0\"><span class=\"c3\">tablelow</span></p></td></tr></tbody></table><p class=\"c2\"><span></span></p><p class=\"c5\"><span>bilde:</span></p><p class=\"c5\"><span style=\"overflow: hidden; display: inline-block; margin: 0.00px 0.00px; border: 0.00px solid #000000; transform: rotate(0.00rad) translateZ(0px); -webkit-transform: rotate(0.00rad) translateZ(0px); width: 624.00px; height: 350.67px;\"><img alt=\"File:Burosch Blue-Only Test\" src=\"https://lh5.googleusercontent.com/WVLeYr01PvwOhymQxbgdGY79IFp8amzGexKcnzSkQisHvb_brIzix6SEr-mvqF9JeaZ6h0Y7PvVffh4xBFFruM5k_6kUhbBA-1EiOeYVkQq6_adfVMxWU6rDyN84UqD_FA\" style=\"width: 624.00px; height: 350.67px; margin-left: 0.00px; margin-top: 0.00px; transform: rotate(0.00rad) translateZ(0px); -webkit-transform: rotate(0.00rad) translateZ(0px);\" title=\"\"></span></p></body></html>";
String expectedOut = "<p>Supertittel</p><h1>Tittel</h1><p>subtitle</p><p>bilde:</p>";
assertEquals(expectedOut, googleDocsTextConverter.convert(in));
}
@Test
public void lists() {
String in = "<html><head><title>testdrive2 - Momus</title><meta content=\"text/html; charset=UTF-8\" http-equiv=\"content-type\"><style type=\"text/css\">.lst-kix_9ltdnuo7r9ax-4>li{counter-increment:lst-ctn-kix_9ltdnuo7r9ax-4}.lst-kix_9ltdnuo7r9ax-3>li{counter-increment:lst-ctn-kix_9ltdnuo7r9ax-3}.lst-kix_9ltdnuo7r9ax-7>li{counter-increment:lst-ctn-kix_9ltdnuo7r9ax-7}.lst-kix_5lnkg6g390bd-3>li:before{content:\"\\0025cf \"}.lst-kix_9ltdnuo7r9ax-5>li:before{content:\"\" counter(lst-ctn-kix_9ltdnuo7r9ax-5,lower-roman) \". \"}ol.lst-kix_9ltdnuo7r9ax-8.start{counter-reset:lst-ctn-kix_9ltdnuo7r9ax-8 0}.lst-kix_5lnkg6g390bd-5>li:before{content:\"\\0027a2 \"}.lst-kix_9ltdnuo7r9ax-5>li{counter-increment:lst-ctn-kix_9ltdnuo7r9ax-5}.lst-kix_9ltdnuo7r9ax-6>li{counter-increment:lst-ctn-kix_9ltdnuo7r9ax-6}.lst-kix_5lnkg6g390bd-7>li:before{content:\"\\0025cf \"}.lst-kix_9ltdnuo7r9ax-0>li{counter-increment:lst-ctn-kix_9ltdnuo7r9ax-0}ol.lst-kix_9ltdnuo7r9ax-7.start{counter-reset:lst-ctn-kix_9ltdnuo7r9ax-7 0}.lst-kix_5lnkg6g390bd-0>li:before{content:\"\\002756 \"}ol.lst-kix_9ltdnuo7r9ax-4.start{counter-reset:lst-ctn-kix_9ltdnuo7r9ax-4 0}.lst-kix_9ltdnuo7r9ax-2>li{counter-increment:lst-ctn-kix_9ltdnuo7r9ax-2}.lst-kix_9ltdnuo7r9ax-1>li{counter-increment:lst-ctn-kix_9ltdnuo7r9ax-1}.lst-kix_5lnkg6g390bd-1>li:before{content:\"\\0027a2 \"}.lst-kix_9ltdnuo7r9ax-2>li:before{content:\"\" counter(lst-ctn-kix_9ltdnuo7r9ax-2,lower-roman) \". \"}ol.lst-kix_9ltdnuo7r9ax-0.start{counter-reset:lst-ctn-kix_9ltdnuo7r9ax-0 0}.lst-kix_5lnkg6g390bd-6>li:before{content:\"\\0025a0 \"}.lst-kix_9ltdnuo7r9ax-7>li:before{content:\"\" counter(lst-ctn-kix_9ltdnuo7r9ax-7,lower-latin) \". \"}.lst-kix_9ltdnuo7r9ax-8>li:before{content:\"\" counter(lst-ctn-kix_9ltdnuo7r9ax-8,lower-roman) \". \"}.lst-kix_5lnkg6g390bd-4>li:before{content:\"\\0025c6 \"}.lst-kix_9ltdnuo7r9ax-3>li:before{content:\"\" counter(lst-ctn-kix_9ltdnuo7r9ax-3,decimal) \". \"}.lst-kix_9ltdnuo7r9ax-8>li{counter-increment:lst-ctn-kix_9ltdnuo7r9ax-8}.lst-kix_5lnkg6g390bd-2>li:before{content:\"\\0025a0 \"}ol.lst-kix_9ltdnuo7r9ax-5.start{counter-reset:lst-ctn-kix_9ltdnuo7r9ax-5 0}ol.lst-kix_9ltdnuo7r9ax-8{list-style-type:none}ol.lst-kix_9ltdnuo7r9ax-7{list-style-type:none}ol.lst-kix_9ltdnuo7r9ax-1.start{counter-reset:lst-ctn-kix_9ltdnuo7r9ax-1 0}.lst-kix_9ltdnuo7r9ax-6>li:before{content:\"\" counter(lst-ctn-kix_9ltdnuo7r9ax-6,decimal) \". \"}.lst-kix_9ltdnuo7r9ax-1>li:before{content:\"\" counter(lst-ctn-kix_9ltdnuo7r9ax-1,lower-latin) \". \"}.lst-kix_9ltdnuo7r9ax-0>li:before{content:\"\" counter(lst-ctn-kix_9ltdnuo7r9ax-0,decimal) \". \"}.lst-kix_9ltdnuo7r9ax-4>li:before{content:\"\" counter(lst-ctn-kix_9ltdnuo7r9ax-4,lower-latin) \". \"}ol.lst-kix_9ltdnuo7r9ax-1{list-style-type:none}ol.lst-kix_9ltdnuo7r9ax-2{list-style-type:none}ol.lst-kix_9ltdnuo7r9ax-0{list-style-type:none}ol.lst-kix_9ltdnuo7r9ax-5{list-style-type:none}ol.lst-kix_9ltdnuo7r9ax-6{list-style-type:none}.lst-kix_5lnkg6g390bd-8>li:before{content:\"\\0025c6 \"}ol.lst-kix_9ltdnuo7r9ax-3{list-style-type:none}ol.lst-kix_9ltdnuo7r9ax-4{list-style-type:none}ol.lst-kix_9ltdnuo7r9ax-2.start{counter-reset:lst-ctn-kix_9ltdnuo7r9ax-2 0}ul.lst-kix_5lnkg6g390bd-0{list-style-type:none}ol.lst-kix_9ltdnuo7r9ax-6.start{counter-reset:lst-ctn-kix_9ltdnuo7r9ax-6 0}ul.lst-kix_5lnkg6g390bd-5{list-style-type:none}ul.lst-kix_5lnkg6g390bd-6{list-style-type:none}ul.lst-kix_5lnkg6g390bd-7{list-style-type:none}ul.lst-kix_5lnkg6g390bd-8{list-style-type:none}ul.lst-kix_5lnkg6g390bd-1{list-style-type:none}ul.lst-kix_5lnkg6g390bd-2{list-style-type:none}ul.lst-kix_5lnkg6g390bd-3{list-style-type:none}ul.lst-kix_5lnkg6g390bd-4{list-style-type:none}ol.lst-kix_9ltdnuo7r9ax-3.start{counter-reset:lst-ctn-kix_9ltdnuo7r9ax-3 0}ol{margin:0;padding:0}.c0{padding-left:0pt;widows:2;orphans:2;direction:ltr;margin-left:36pt}.c3{widows:2;orphans:2;direction:ltr}.c2{max-width:468pt;background-color:#ffffff;padding:72pt 72pt 72pt 72pt}.c1{margin:0;padding:0}.title{widows:2;padding-top:0pt;line-height:1.15;orphans:2;text-align:left;color:#000000;font-size:21pt;font-family:\"Trebuchet MS\";padding-bottom:0pt;page-break-after:avoid}.subtitle{widows:2;padding-top:0pt;line-height:1.15;orphans:2;text-align:left;color:#666666;font-style:italic;font-size:13pt;font-family:\"Trebuchet MS\";padding-bottom:10pt;page-break-after:avoid}li{color:#000000;font-size:11pt;font-family:\"Arial\"}p{color:#000000;font-size:11pt;margin:0;font-family:\"Arial\"}h1{widows:2;padding-top:10pt;line-height:1.15;orphans:2;text-align:left;color:#000000;font-size:16pt;font-family:\"Trebuchet MS\";padding-bottom:0pt;page-break-after:avoid}h2{widows:2;padding-top:10pt;line-height:1.15;orphans:2;text-align:left;color:#000000;font-size:13pt;font-family:\"Trebuchet MS\";font-weight:bold;padding-bottom:0pt;page-break-after:avoid}h3{widows:2;padding-top:8pt;line-height:1.15;orphans:2;text-align:left;color:#666666;font-size:12pt;font-family:\"Trebuchet MS\";font-weight:bold;padding-bottom:0pt;page-break-after:avoid}h4{widows:2;padding-top:8pt;line-height:1.15;orphans:2;text-align:left;color:#666666;font-size:11pt;text-decoration:underline;font-family:\"Trebuchet MS\";padding-bottom:0pt;page-break-after:avoid}h5{widows:2;padding-top:8pt;line-height:1.15;orphans:2;text-align:left;color:#666666;font-size:11pt;font-family:\"Trebuchet MS\";padding-bottom:0pt;page-break-after:avoid}h6{widows:2;padding-top:8pt;line-height:1.15;orphans:2;text-align:left;color:#666666;font-style:italic;font-size:11pt;font-family:\"Trebuchet MS\";padding-bottom:0pt;page-break-after:avoid}</style></head><body class=\"c2\"><p class=\"c3\"><span>noe tekst</span></p><ol class=\"c1 lst-kix_9ltdnuo7r9ax-0 start\" start=\"1\"><li class=\"c0\"><span>liste 1</span></li><li class=\"c0\"><span>liste 2</span></li></ol><p class=\"c3\"><span>mere tekst</span></p><ul class=\"c1 lst-kix_5lnkg6g390bd-0 start\"><li class=\"c0\"><span>punkt 1</span></li><li class=\"c0\"><span>punkt 2</span></li><li class=\"c0\"><span>punkt 3</span></li></ul><p class=\"c3\"><span>slutt</span></p></body></html>";
String expectedOut = "<p>noe tekst</p><ol><li>liste 1</li><li>liste 2</li></ol><p>mere tekst</p><ul><li>punkt 1</li><li>punkt 2</li><li>punkt 3</li></ul><p>slutt</p>";
assertEquals(expectedOut, googleDocsTextConverter.convert(in));
}
@Test
public void escapesHtmlTags() {
String in = "<html><head><title>driiiive - Momus</title><meta content=\"text/html; charset=UTF-8\" http-equiv=\"content-type\"><style type=\"text/css\">ol{margin:0;padding:0}.c1{widows:2;orphans:2;direction:ltr;page-break-after:avoid}.c0{widows:2;orphans:2;height:11pt;direction:ltr}.c2{max-width:468pt;background-color:#ffffff;padding:72pt 72pt 72pt 72pt}.c3{widows:2;orphans:2;direction:ltr}.title{widows:2;padding-top:0pt;line-height:1.15;orphans:2;text-align:left;color:#000000;font-size:21pt;font-family:\"Trebuchet MS\";padding-bottom:0pt;page-break-after:avoid}.subtitle{widows:2;padding-top:0pt;line-height:1.15;orphans:2;text-align:left;color:#666666;font-style:italic;font-size:13pt;font-family:\"Trebuchet MS\";padding-bottom:10pt;page-break-after:avoid}li{color:#000000;font-size:11pt;font-family:\"Arial\"}p{color:#000000;font-size:11pt;margin:0;font-family:\"Arial\"}h1{widows:2;padding-top:10pt;line-height:1.15;orphans:2;text-align:left;color:#000000;font-size:16pt;font-family:\"Trebuchet MS\";padding-bottom:0pt;page-break-after:avoid}h2{widows:2;padding-top:10pt;line-height:1.15;orphans:2;text-align:left;color:#000000;font-size:13pt;font-family:\"Trebuchet MS\";font-weight:bold;padding-bottom:0pt;page-break-after:avoid}h3{widows:2;padding-top:8pt;line-height:1.15;orphans:2;text-align:left;color:#666666;font-size:12pt;font-family:\"Trebuchet MS\";font-weight:bold;padding-bottom:0pt;page-break-after:avoid}h4{widows:2;padding-top:8pt;line-height:1.15;orphans:2;text-align:left;color:#666666;font-size:11pt;text-decoration:underline;font-family:\"Trebuchet MS\";padding-bottom:0pt;page-break-after:avoid}h5{widows:2;padding-top:8pt;line-height:1.15;orphans:2;text-align:left;color:#666666;font-size:11pt;font-family:\"Trebuchet MS\";padding-bottom:0pt;page-break-after:avoid}h6{widows:2;padding-top:8pt;line-height:1.15;orphans:2;text-align:left;color:#666666;font-style:italic;font-size:11pt;font-family:\"Trebuchet MS\";padding-bottom:0pt;page-break-after:avoid}</style></head><body class=\"c2\"><h1 class=\"c1\"><a name=\"h.a3lf94q9n7x7\"></a><span>Min artikkel, yay</span></h1><p class=\"c0\"><span></span></p><h3 class=\"c1\"><a name=\"h.pm5s5pceyhc9\"></a><span>hehehe <b>fet</b></span></h3><p class=\"c3\"><span>vanlig tekst < og >!</span></p><p class=\"c0\"><span></span></p></body></html>";
String expectedOut = "<h1>Min artikkel, yay</h1><h3>hehehe <b>fet</b></h3><p>vanlig tekst < og >!</p>";
assertEquals(expectedOut, googleDocsTextConverter.convert(in));
}
@Test
public void ignoreComments2() {
String in = "<html><head><title>testlive - Momus</title><meta content=\"text/html; charset=UTF-8\" http-equiv=\"content-type\"><style type=\"text/css\">ol{margin:0;padding:0}.c3{vertical-align:baseline;color:#000000;font-size:11pt;font-style:normal;font-family:\"Arial\";text-decoration:none;font-weight:normal}.c5{line-height:1.0;padding-top:0pt;text-align:left;direction:ltr;padding-bottom:0pt}.c1{max-width:468pt;background-color:#ffffff;padding:72pt 72pt 72pt 72pt}.c0{widows:2;orphans:2;direction:ltr}.c4{margin:5px;border:1px solid black}.c2{page-break-after:avoid}.c6{height:11pt}.title{widows:2;padding-top:0pt;line-height:1.15;orphans:2;text-align:left;color:#000000;font-size:21pt;font-family:\"Trebuchet MS\";padding-bottom:0pt;page-break-after:avoid}.subtitle{widows:2;padding-top:0pt;line-height:1.15;orphans:2;text-align:left;color:#666666;font-style:italic;font-size:13pt;font-family:\"Trebuchet MS\";padding-bottom:10pt;page-break-after:avoid}li{color:#000000;font-size:11pt;font-family:\"Arial\"}p{color:#000000;font-size:11pt;margin:0;font-family:\"Arial\"}h1{widows:2;padding-top:10pt;line-height:1.15;orphans:2;text-align:left;color:#000000;font-size:16pt;font-family:\"Trebuchet MS\";padding-bottom:0pt;page-break-after:avoid}h2{widows:2;padding-top:10pt;line-height:1.15;orphans:2;text-align:left;color:#000000;font-size:13pt;font-family:\"Trebuchet MS\";font-weight:bold;padding-bottom:0pt;page-break-after:avoid}h3{widows:2;padding-top:8pt;line-height:1.15;orphans:2;text-align:left;color:#666666;font-size:12pt;font-family:\"Trebuchet MS\";font-weight:bold;padding-bottom:0pt;page-break-after:avoid}h4{widows:2;padding-top:8pt;line-height:1.15;orphans:2;text-align:left;color:#666666;font-size:11pt;text-decoration:underline;font-family:\"Trebuchet MS\";padding-bottom:0pt;page-break-after:avoid}h5{widows:2;padding-top:8pt;line-height:1.15;orphans:2;text-align:left;color:#666666;font-size:11pt;font-family:\"Trebuchet MS\";padding-bottom:0pt;page-break-after:avoid}h6{widows:2;padding-top:8pt;line-height:1.15;orphans:2;text-align:left;color:#666666;font-style:italic;font-size:11pt;font-family:\"Trebuchet MS\";padding-bottom:0pt;page-break-after:avoid}</style></head><body class=\"c1\"><h1 class=\"c0 c2\"><a name=\"h.v73hro9b6ve\"></a><span>Live edit, funke</span><span>r du </span><span>bra</span><sup><a href=\"#cmnt1\" name=\"cmnt_ref1\">[a]</a></sup><span>?</span></h1><p class=\"c0\"><span>MEd formattering a?</span></p><p class=\"c0\"><span>yesh</span></p><p class=\"c0 c6\"><span></span></p><h2 class=\"c0 c2\"><a name=\"h.q7ppih1lv9lb\"></a><span>Noe greiieieer</span></h2><p class=\"c0\"><span>dette bør bli en ny historikk</span></p><h2 class=\"c0 c2\"><a name=\"h.2yvhwryft3t7\"></a><span>mer greir</span></h2><p class=\"c0\"><span>lalalaoooooo23</span></p><div class=\"c4\"><p class=\"c5\"><a href=\"#cmnt_ref1\" name=\"cmnt1\">[a]</a><span class=\"c3\">kommentar</span></p></div></body></html>";
String expectedOut = "<h1>Live edit, funker du bra?</h1><p>MEd formattering a?</p><p>yesh</p><h2>Noe greiieieer</h2><p>dette bør bli en ny historikk</p><h2>mer greir</h2><p>lalalaoooooo23</p>";
assertEquals(expectedOut, googleDocsTextConverter.convert(in));
}
@Test
public void keepItalicsAndBold() {
String in = "<html><head><title>Live - Momus</title><meta content=\"text/html; charset=UTF-8\" http-equiv=\"content-type\"><style type=\"text/css\">ol{margin:0;padding:0}.c2{widows:2;orphans:2;direction:ltr;page-break-after:avoid}.c1{widows:2;orphans:2;height:11pt;direction:ltr}.c3{widows:2;orphans:2;direction:ltr}.c4{max-width:468pt;background-color:#ffffff;padding:72pt 72pt 72pt 72pt}.c5{font-style:italic}.c0{font-weight:bold}.title{widows:2;padding-top:0pt;line-height:1.15;orphans:2;text-align:left;color:#000000;font-size:21pt;font-family:\"Trebuchet MS\";padding-bottom:0pt;page-break-after:avoid}.subtitle{widows:2;padding-top:0pt;line-height:1.15;orphans:2;text-align:left;color:#666666;font-style:italic;font-size:13pt;font-family:\"Trebuchet MS\";padding-bottom:10pt;page-break-after:avoid}li{color:#000000;font-size:11pt;font-family:\"Arial\"}p{color:#000000;font-size:11pt;margin:0;font-family:\"Arial\"}h1{widows:2;padding-top:10pt;line-height:1.15;orphans:2;text-align:left;color:#000000;font-size:16pt;font-family:\"Trebuchet MS\";padding-bottom:0pt;page-break-after:avoid}h2{widows:2;padding-top:10pt;line-height:1.15;orphans:2;text-align:left;color:#000000;font-size:13pt;font-family:\"Trebuchet MS\";font-weight:bold;padding-bottom:0pt;page-break-after:avoid}h3{widows:2;padding-top:8pt;line-height:1.15;orphans:2;text-align:left;color:#666666;font-size:12pt;font-family:\"Trebuchet MS\";font-weight:bold;padding-bottom:0pt;page-break-after:avoid}h4{widows:2;padding-top:8pt;line-height:1.15;orphans:2;text-align:left;color:#666666;font-size:11pt;text-decoration:underline;font-family:\"Trebuchet MS\";padding-bottom:0pt;page-break-after:avoid}h5{widows:2;padding-top:8pt;line-height:1.15;orphans:2;text-align:left;color:#666666;font-size:11pt;font-family:\"Trebuchet MS\";padding-bottom:0pt;page-break-after:avoid}h6{widows:2;padding-top:8pt;line-height:1.15;orphans:2;text-align:left;color:#666666;font-style:italic;font-size:11pt;font-family:\"Trebuchet MS\";padding-bottom:0pt;page-break-after:avoid}</style></head><body class=\"c4\"><h1 class=\"c2\"><a name=\"h.gft7o754ulsm\"></a><span>Overskrift</span></h1><h4 class=\"c2\"><a name=\"h.hurs0dinyy0h\"></a><span>Kul ingress</span></h4><p class=\"c1\"><span></span></p><p class=\"c3\"><span>En stilig tekst med noe </span><span class=\"c0\">fet</span><span> skrift, noe </span><span class=\"c5\">kursiv</span><span> og mer </span><span class=\"c0\">fet</span><span> skrift!</span></p><p class=\"c1\"><span></span></p><p class=\"c1\"><span></span></p></body></html>";
String expectedOut = "<h1>Overskrift</h1><h4>Kul ingress</h4><p>En stilig tekst med noe <b>fet</b> skrift, noe <i>kursiv</i> og mer <b>fet</b> skrift!</p>";
assertEquals(expectedOut, googleDocsTextConverter.convert(in));
}
}