/*
 * ---- 
 *
 * $Id: JUnitRSSParseInternalTests.java,v 1.15 2002/09/22 09:26:42 burton Exp $
 * $Project: http://reptile.openprivacy.org $
 * $CVSROOT: :pserver:anoncvs@sierra.openprivacy.org:/usr/local/cvs/public $
 * $WebCVS: http://www.openprivacy.org/cgi-bin/cvsweb/cvsweb.cgi/sierra/ $
 * $Mailing-List: http://www.openprivacy.org/lists/ $
 * $Bugzilla: http://bugzilla.openprivacy.org/ $
 *
 * ----
 *
 * Copyright 2001 OpenPrivacy.org  All rights reserved.
 *
 * This program is free software; you can redistribute it and/or modify it under
 * the terms of the LICENSE which you should have obtaind with this package. 
 *
 * This program is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 * FOR A PARTICULAR PURPOSE.
 */

package org.openprivacy.reptile.tests;

import java.io.*;
import java.net.*;
import java.util.*;

import org.openprivacy.reptile.*;
import org.openprivacy.reptile.actions.*;
import org.openprivacy.reptile.extensions.*;
import org.openprivacy.reptile.init.*;
import org.openprivacy.reptile.search.*;
import org.openprivacy.reptile.util.*;
import org.openprivacy.reptile.xml.*;
import org.openprivacy.reptile.xslt.*;

import talon.*;
import talon.components.*;
import talon.resources.*;
import talon.util.*;
import talon.util.net.*;

import junit.framework.*;

import org.jdom.*;

/**
 * 
 * @author <a href="mailto:burton@openprivacy.org">Kevin A. Burton</a>
 * @version $Id: JUnitRSSParseInternalTests.java,v 1.15 2002/09/22 09:26:42 burton Exp $
 */
public class JUnitRSSParseInternalTests  extends TestCase {

    /**
     * 
     * Create a new <code>JUnitRSSParseInternalTests</code> instance.
     *
     * @author <a href="mailto:burton@openprivacy.org">Kevin A. Burton</a>
     */
    public JUnitRSSParseInternalTests( String name ) {

        super( name );
        
    }

    public void testStrip() throws Exception {

        RSSContentSerializer test = new RSSContentSerializer();

        assertEquals( "foo bar", test.strip( "    foo     bar   " ) );
        
    }

    public void testIsJunkContent() throws Exception {

        RSSContentSerializer test = new RSSContentSerializer();

        assertTrue( test.isJunkContent( "<a href=\"asdf\">foo</a>" ) );
        
        assertTrue( test.isJunkContent( "<p>Hello World</p>" ) == false );
        
        assertTrue( test.isJunkContent( "<li><a href=\"http://linuxcentral.com/catalog/index.php3?prod_code=B000-297\">Cutomizing and Upgrading Linux</a>" ) );

        assertTrue( test.isJunkContent( "<a href=\"asdf\">foo</a> This is a real paragraph." ) == false );

        assertTrue( test.isJunkContent( "<a href=\"http://www.tuxedo.org/~esr/\" target=\"_top\">Eric Raymond</a> has released version 1.0.0 of <a href=\"http://www.tuxedo.org/~esr/doclifter/\" target=\"_top\">doclifter</a>, a Python 2.2 utility that converts man pages and other troff/nroff/groff documents to <a href=\"http://docbook.org/wiki/moin.cgi/DocBook\" target=\"_top\">DocBook</a> XML and SGML." ) == false );

        assertTrue( test.isJunkContent( "<span><a href=\"asdf\">foo</a></span>" ) );

        String content = "<span class=\"secondary\"> <a href=\"http://www.xml.com/cs/user/login\">Manage Your Account</a><br /> <a href=\"http://www.xml.com/cs/user/lostpw\">Forgot Your Password?</a></span>";

        assertTrue( test.isJunkContent( content ) );

        assertTrue( test.isJunkContent( "<img src=\"/images/blue-arrow.gif\" alt=\" \" width=\"10\" height=\"12\" border=\"0\" align=\"top\"> <img src=\"/universal/images/find.gif\" width=\"36\" height=\"11\" alt=\"Find\" border=\"0\"><br /> <a href=\"http://www.xml.com/search/\">Search</a><br /> <a href=\"/ArticlesbyTopic/\">Article Archive</a><br /> <a href=\"http://www.oreillynet.com/faqs/list.csp?id_subject=23\">FAQs</a><br /><br />" ) );

        assertTrue( test.isJunkContent( "<b class=\"a2\">\n<a href=\"/2020-1069-0.html\">News.context:</a>\n<a href=\"/2018-1070-0.html\">Special Reports</a>\n | \n<a href=\"/2005-1082-0.html\">Newsmakers</a>\n | \n<span class=\"g3\">Perspectives</span>\n </b>" ) );

        assertTrue( test.isJunkContent( "<A HREF=\"http://www.washingtonpost.com/ac2/wp-dyn?node=admin/registration/manage&destination=emailPreferences&nextstep=gather\">E-MAIL NEWSLETTERS</A> |<A HREF=\"http://www.washingtonpost.com/wp-adv/archives/front.htm\">ARCHIVES</A>" ) );

    }

    /**
     * Tests link expansion
     *
     * @author <a href="mailto:burton@openprivacy.org">Kevin A. Burton</a>
     */
    public void testExpand() throws Exception {

        //FIXME: how do I expand with directories that autoserve index.html and
        //the base site (http://www.foo.com
        
        RSSContentSerializer test = new RSSContentSerializer();

        //test off a file deep
        test.setResource( "http://www.cnn.com/foo/bar/index.html" );

        assertEquals( "http://www.cnn.com/foo/bar/foo.html", test.expand( "foo.html" ) );
        assertEquals( "http://www.cnn.com/foo/bar/index.html#name", test.expand( "#name" ) );
        assertEquals( "http://www.cnn.com/index.html", test.expand( "/index.html" ) );
        assertEquals( "http://www.cnn.com/foo/bar/cat/index.html", test.expand( "cat/index.html" ) );

        //test off the main site
        test = new RSSContentSerializer();        
        test.setResource( "http://www.cnn.com/" );

        assertEquals( "http://www.cnn.com/foo.html", test.expand( "foo.html" ) );
        assertEquals( "http://www.cnn.com/#name", test.expand( "#name" ) );
        assertEquals( "http://www.cnn.com/index.html", test.expand( "/index.html" ) );
        assertEquals( "http://www.cnn.com/cat/index.html", test.expand( "cat/index.html" ) );

        //test off a base file.
        test = new RSSContentSerializer();        
        test.setResource( "http://www.cnn.com/index.html" );
        assertEquals( "http://www.cnn.com/foo.html", test.expand( "foo.html" ) );
        assertEquals( "http://www.cnn.com/index.html#name", test.expand( "#name" ) );
        assertEquals( "http://www.cnn.com/index.html", test.expand( "/index.html" ) );
        assertEquals( "http://www.cnn.com/cat/index.html", test.expand( "cat/index.html" ) );

        //test off a deep file
        test = new RSSContentSerializer();        
        test.setResource( "http://www.cnn.com/foo/bar/" );
        assertEquals( "http://www.cnn.com/foo/bar/foo.html", test.expand( "foo.html" ) );
        assertEquals( "http://www.cnn.com/foo/bar/#name", test.expand( "#name" ) );
        assertEquals( "http://www.cnn.com/index.html", test.expand( "/index.html" ) );
        assertEquals( "http://www.cnn.com/foo/bar/cat/index.html", test.expand( "cat/index.html" ) );

        assertEquals( "http://www.cnn.com/foo/", test.expand( "../" ) );
        assertEquals( "http://www.cnn.com/foo/SRDF/index.html", test.expand( "../SRDF/index.html" ) );
        assertEquals( "http://www.cnn.com/foo/", test.expand( ".." ) );

        assertEquals( "http://www.cnn.com/", test.expand( "../.." ) );
        assertEquals( "http://www.cnn.com/", test.expand( "../../" ) );

        test = new RSSContentSerializer();        
        test.setResource( "http://www.theregister.co.uk/content/54/26810.html" );

        assertEquals( "http://www.theregister.co.uk", test.getSite() );
        
        assertEquals( "http://www.theregister.co.uk/images/reg_bullet.gif",
                      test.expand( "/images/reg_bullet.gif" ) );

        test = new RSSContentSerializer();        
        test.setResource( "http://www.politechbot.com" );
        assertEquals( "http://www.politechbot.com/p-03950.html",
                      test.expand( "p-03950.html" ) );

        
    }

    public void testCleanseHTML() throws Exception {

        RSSContentSerializer test = new RSSContentSerializer();

        assertEquals( "<p></p>", test.cleanseHTML( "<p></p>" ) );

        assertEquals( "<p></p>", test.cleanseHTML( "<p><!-- hello world --></p>" ) );

        assertEquals( "<p></p>", test.cleanseHTML( "<p><!-- <p>hello world</p> --></p>" ) );

        test = new RSSContentSerializer();
        test.setResource( "http://projects.localhost/tests/test2.html" );
        test.init();
        assertTrue( test.getHTML().indexOf( "<script" ) == -1 );
        
    }

    public void testCleansePCDATA() throws Exception {

        RSSContentSerializer test = new RSSContentSerializer();

        assertEquals( "Hello World",
                      test.cleansePCDATA( "<font FACE=\"verdana, arial, helvetica, sans-serif\" SIZE=2 COLOR=\"#333333\">Hello World</font>" ) );

    }

    /**
     *
     * @author <a href="mailto:burton@openprivacy.org">Kevin A. Burton</a>
     */
    public void testCleanseTitle() throws Exception {

        RSSContentSerializer test = new RSSContentSerializer();

        assertEquals( "Wifi commons and Wifi castles I got to thinking about the",
                      test.cleanseTitle( "10-september-2002 Wifi commons and Wifi castles I got to thinking about the" ) );

    }
        
    /**
     * Tests link expansion
     *
     * @author <a href="mailto:burton@openprivacy.org">Kevin A. Burton</a>
     */
    public void testRelativize() throws Exception {

        RSSContentSerializer test = new RSSContentSerializer();        
        test.setResource( "http://www.cnn.com/foo/bar/" );

        assertEquals( "<a href=\"http://www.cnn.com/foo/bar/foo.html\">foo</a>",
                      test.relativize( "<a href=\"foo.html\">foo</a>" ) );

        assertEquals( "<img src=\"http://www.cnn.com/foo/bar/bar.jpg\"></a>",
                      test.relativize( "<img src=\"bar.jpg\"></a>" ) );

        assertEquals( "<img SRC=\"http://www.cnn.com/foo/bar/bar.jpg\" BORDER=\"0\"></a>",
                      test.relativize( "<img SRC=\"bar.jpg\" BORDER=\"0\"></a>" ) );

        assertEquals( "<a href=\"http://www.cnn.com/foo/bar/foo.html\"><img src=\"http://www.cnn.com/foo/bar/bar.jpg\"></a>",
                      test.relativize( "<a href=\"foo.html\"><img src=\"bar.jpg\"></a>" ) );

        assertEquals( "foo bar",
                      test.relativize( "foo bar" ) );

    }

    /**
     * Tests link expansion
     *
     * @author <a href="mailto:burton@openprivacy.org">Kevin A. Burton</a>
     */
    public void testGetMinRepassContentLength() throws Exception {

        RSSContentSerializer test = new RSSContentSerializer();        
        test.setResource( "http://projects.localhost/tests/test2.html" );
        test.init();
        test.getMinRepassContentLength();

    }

    public void testTitleFromDescription() throws Exception {

        RSSContentSerializer test = new RSSContentSerializer();        
        test.setResource( "http://www.foxnews.com/story/0,2933,63426,00.html" );
        test.parse();

        String description = test.getDescription();

//         System.out.println( "-description-" );
//         System.out.println( description );
//         System.out.println( "-description-" );
        
//        System.out.println( test.getTitle( description ) );
        
    }

}

