/*
 *
 * BEGIN HEADER
 *
 * ---- 
 *
 * $ID: PantherProxy.java,v 1.6 2001/06/12 20:58:43 burton Exp $
 * $Project: http://panther.openprivacy.org $
 * $CVSROOT: :pserver:anoncvs@sierra.openprivacy.org:/usr/local/cvs/public $
 * $WebCVS: http://www.openprivacy.org/cgi-bin/cvsweb/cvsweb.cgi/panther/ $
 * $Mailing-List: http://www.openprivacy.org/lists/ $
 * $Bugzilla: http://bugzilla.openprivacy.org/ $
 * Copyright 2001 OpenPrivacy.org.  All rights reserved.
 *
 * ---- 
 *
 * Copyright 2001 OpenPrivacy.org.  All rights reserved.
 *
 * This program is free software; you can redistribute it and/or modify it under
 * the terms of the LICENSE which you should have received with this package. 
 *
 * This program is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 * FOR A PARTICULAR PURPOSE.
 *
 * END HEADER
 * 
 */

package org.openprivacy.reptile;

import java.io.*;
import java.net.*;
import java.util.*;

import javax.servlet.*;
import javax.servlet.http.*;

import org.openprivacy.reptile.actions.*;
import org.openprivacy.reptile.extensions.*;
import org.openprivacy.reptile.init.*;
import org.openprivacy.reptile.tasks.*;
import org.openprivacy.reptile.util.*;
import org.openprivacy.reptile.xslt.*;

import talon.*;
import talon.components.*;
import talon.resources.*;
import talon.util.*;
import talon.util.net.*;

import org.apache.regexp.*;

import org.jdom.*;
import org.jdom.output.*;

/*

FIXME:

- Support REGEXP for the names of the anchors...

- only include articles that are present on the current site.

- IE if we are on http://www.cnn.com don't include links to msnbc.

- link to the database and only return URLs that are recent.  IE int the last
  few weeks (this will get smarter over time)

- Support RSS autodiscovery so that I can pull out the latest discrete and then
  uplevel their feed to RSS 1.0 with mod_content
    
*/

/**
 *
 * @author <a href="mailto:burton@openprivacy.org">Kevin A. Burton</a>
 */
public class RSSANameChannelSerializer {

    private static final boolean DEBUG = true;

    private static final int MAX_ITEMS = 15;

    private RSSContentSerializer rcs = null;

    private String[] links = new String[0];

    private String html = null;
    
    /**
     * 
     * Create a new <code>RSSANameChannelSerializer</code> instance.
     *
     * @author <a href="mailto:burton@openprivacy.org">Kevin A. Burton</a>
     */
    public RSSANameChannelSerializer( String resource ) {

        this.rcs = new RSSContentSerializer();

        this.rcs.setResource( resource );

    }

    /**
     * Parse this channel.  This should be called before any other methods that
     * return any data.
     *
     * @author <a href="mailto:burton@openprivacy.org">Kevin A. Burton</a>
     */
    public void parse() throws Exception {

        this.rcs.parse();

        //now go through each paragraph finding URLs.

        //now get the content
        this.html = this.rcs.cleanseHTML( this.rcs.getResourceAsString() );
        
        //ok... now find links under this channel.

        this.links = getNamedAnchors( html );

    }

    /**
     * Get all links for this content.  Links are denoted as all <a href="">
     * links
     *
     * @author <a href="mailto:burton@openprivacy.org">Kevin A. Burton</a>
     */
    public String[] getNamedAnchors( String html ) throws Exception {

        int index = 0;

        //FIXME: what if href isn't the first attribute.
        RE regexp = new RE( "<a name=\"([^\"]+)\"", RE.MATCH_CASEINDEPENDENT | RE.MATCH_MULTILINE );

        Vector v = new Vector();

        while ( regexp.match( html, index ) && v.size() < MAX_ITEMS ) {

            String link = this.rcs.getResource() + "#" + regexp.getParen( 1 );

            debug( link );

            v.addElement( link );

            index = regexp.getParenEnd( 0 );

        } 

        String result[] = new String[v.size()];
        v.copyInto( result );
        
        return result;
        
    }

    /**
     * Get the resource as an RSS stream with mod_content
     *
     * @author <a href="mailto:burton@openprivacy.org">Kevin A. Burton</a>
     */
    public String getRSS() throws Exception {

        String site = this.rcs.getResource();
        String title = this.rcs.getTitle();
        String description = this.rcs.getDescription();

        //OK... use JDOM to serialize this to RSS 1.0
        
        Namespace content_ns = Namespace.getNamespace( "content", "http://purl.org/rss/1.0/modules/content/" );
        Namespace rdf_ns = Namespace.getNamespace( "rdf", "http://www.w3.org/1999/02/22-rdf-syntax-ns#" );
        Namespace rss_ns = Namespace.getNamespace( "rss", "http://purl.org/rss/1.0/" );
        
        //FIXME: pull out the site link for the item and use this for the about
        //and link for the channel.

        //root element
        Element rdf = new Element( "RDF", rdf_ns );

        //channel element
        Element channel = new Element( "channel", rss_ns );
        channel.setAttribute( "about", site, rdf_ns );

        rdf.addContent( channel );

        //add required elements under content
        
        if ( title == null ) {

            channel.addContent( new Element( "title", rss_ns ).setText( "content for: " + site ) );
            
        } else {

            channel.addContent( new Element( "title", rss_ns ).setText( title ) );             

        }

        channel.addContent( new Element( "link", rss_ns ).setText( site ) );

        if ( description == null ) {

            channel.addContent( new Element( "description", rss_ns )
                                .setText( "Serialized content for the following URL: " + site ) );

        } else {
            
            channel.addContent( new Element( "description", rss_ns )
                                .setText( description ) );

        }

        Element itemseq = new Element( "Seq", rdf_ns );
        
        //add the items element
        channel.addContent( new Element( "items" , rss_ns ).addContent( itemseq ) );

        for ( int i = 0; i < links.length; ++ i ) {

            String link = links[i];

            try {
                
                RSSContentSerializer nrcs = new RSSContentSerializer();
                nrcs.setResource( link );
                nrcs.setHTML( this.html );
                nrcs.setInitialized( true );
                nrcs.parse();

                if ( nrcs.getDescription() != null && nrcs.getDescription().equals( "" ) == false ) {

                    String ititle = nrcs.getTitle();

                    if ( ititle == null || ititle.equals( "" ) ) {

                        ititle = nrcs.getTitle( nrcs.getDescription() ) ;
                    
                    } 

                    Element iteme = new Element( "item", rss_ns );
                    iteme.setAttribute( "about", link, rdf_ns );
                    iteme.addContent( new Element( "title", rss_ns ).setText( ititle ) )
                        .addContent( new Element( "link", rss_ns ).setText( link ) )
                        .addContent( new Element( "description", rss_ns ).setText( nrcs.getDescription() ) )
                        .addContent( new Element( "encoded", content_ns )
                                     .setText( nrcs.getContent() ) );
                
                    rdf.addContent( iteme );

                    itemseq.addContent( new Element( "li", rdf_ns )
                                        .setAttribute( "resource", link, rdf_ns ) );

                } 

            } catch ( Exception e ) {
                //this is acceptable

                e.printStackTrace();
                
            }

        } 

        String encoding = "UTF-8";
        
        XMLOutputter outputter = new XMLOutputter( "    ", true, encoding );

        outputter.setEncoding( encoding );
        outputter.setOmitDeclaration( false );
        outputter.setOmitEncoding( false );
        
        return outputter.outputString( rdf );
        
    }

    /**
     * Display syntax.
     *
     * @author <a href="mailto:burton@openprivacy.org">Kevin A. Burton</a>
     */
    public static void syntax() {

        System.out.println( "SYNTAX: " + RSSANameChannelSerializer.class.getName() + " URL" );

    }

    /**
     * Handle operations from the command line.
     *
     * @author <a href="mailto:burton@openprivacy.org">Kevin A. Burton</a>
     */
    public static void main( String[] args ) {

        if ( args.length != 1 ) {

            syntax();

        } else {

            String resource = args[0];

            //System.out.println( "Working with resource: " + resource );

            RSSANameChannelSerializer rcs = new RSSANameChannelSerializer( resource );

            try {

                rcs.parse();
                
                System.out.println( rcs.getRSS() );
                
            } catch ( Exception e ) {

                e.printStackTrace();
                
            }

        }

    }

    /**
     * 
     *
     * @author <a href="mailto:burton@openprivacy.org">Kevin A. Burton</a>
     */
    private void debug( String message ) {

        if ( DEBUG ) {

            System.err.println( message );
            
        } 

    }

}
