/*
 *
 * BEGIN HEADER
 *
 * ---- 
 *
 * $ID: PantherProxy.java,v 1.6 2001/06/12 20:58:43 burton Exp $
 * $Project: http://panther.openprivacy.org $
 * $CVSROOT: :pserver:anoncvs@sierra.openprivacy.org:/usr/local/cvs/public $
 * $WebCVS: http://www.openprivacy.org/cgi-bin/cvsweb/cvsweb.cgi/panther/ $
 * $Mailing-List: http://www.openprivacy.org/lists/ $
 * $Bugzilla: http://bugzilla.openprivacy.org/ $
 * Copyright 2001 OpenPrivacy.org.  All rights reserved.
 *
 * ---- 
 *
 * Copyright 2001 OpenPrivacy.org.  All rights reserved.
 *
 * This program is free software; you can redistribute it and/or modify it under
 * the terms of the LICENSE which you should have received with this package. 
 *
 * This program is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 * FOR A PARTICULAR PURPOSE.
 *
 * END HEADER
 * 
 */

package org.openprivacy.reptile.feeds.xml;

import java.io.*;
import java.util.*;

import org.xml.sax.*;
import org.xml.sax.helpers.*;

import org.openprivacy.panther.components.*;

import talon.*;
import talon.util.*;
import talon.util.net.*;

import org.openprivacy.reptile.*;
import org.openprivacy.reptile.tasks.*;
import org.openprivacy.reptile.util.*;
import org.openprivacy.reptile.xml.*;
import org.openprivacy.reptile.om.*;

/**
 * Handles parsing out content feeds and starting them up.
 * 
 * Required features:
 * 
 * - reset state when we encounter the end of the entry which had an rdf:about
 * element
 * 
 * - only register supported formats
 * 
 * - overwrite the registration if a preferred format is found fora given URL.
 * IE if find an RSS 1.0 URL and there is already an RSS 0.9x URL then overwrite
 * it.
 * 
 * - it is possible that certain URLs have multiple formats... support this too.
 * 
 * - Have a reset method which can reset the state so that titles, etc are
 * reset.
 * 
 * @author <a href="mailto:burton@relativity.yi.org">burtonator</a>
 * @version $Id: OCSContentFeedsHandler.java,v 1.6 2002/02/26 06:48:04 burton Exp $
 */
public class OCSContentFeedsHandler extends DefaultHandler {

    private String baseLocation = ""; //current OCS feed URL (RSS) for the base ( level 1 nesting )

    private String formatLocation = ""; // current format URL (level 2 nesting )
    
    private int locationLevel = 0; //the rdf:description level we found this location on.

    private int level = 0; // the current level of rdf:description 
    
    private String title = ""; //current title for this entry

    private String description = ""; //description 

    private String format = ""; //contentType in some circles (or ocs:format)

    private String currentLocal = ""; //the current local element name.

    private String currentNS = ""; //the current namespace

    private ContentFeed feed = null;

    private ContentTransformerMap ctm = ContentTransformerMap.getInstance();

    private int totalFound = 0;

    private String feedLocation = "urn:undefined";
    
    /**
     * 
     *
     * @author <a href="mailto:burton@relativity.yi.org">burtonator</a>
     */
    public OCSContentFeedsHandler( InputStream is ) throws ReptileException {

        handle( new InputSource( is ) );

    }
    
    /**
     * 
     *
     * @author <a href="mailto:burton@relativity.yi.org">burtonator</a>
     */
    public OCSContentFeedsHandler( InputSource is ) throws ReptileException {

        handle( is );

    }

    /**
     * 
     * @author <a href="mailto:burton@relativity.yi.org">burtonator</a>
     */
    public OCSContentFeedsHandler( String feedLocation ) throws ReptileException {

        this.feedLocation = feedLocation;
        
        try {

            Proxy proxy = ReptileResources.getInstance().getProxy();
            
            InputSource is = new InputSource( proxy.get( feedLocation ).getInputStream() );

            handle( is );
            
        } catch ( ProxyException pe ) {

            throw new ReptileException( pe );
            
        } catch ( Throwable t ) {

            Log.error( t );
            
            throw new ReptileException( t );
            
        } 
        
    }

    /**
     * Used for testing purposes.  This allows us to give it a file and parse it
     * out manually.
     *
     * @author <a href="mailto:burton@openprivacy.org">Kevin A. Burton</a>
     */
    public OCSContentFeedsHandler( File file ) throws Exception {

        InputSource is = new InputSource( new FileInputStream( file ) );
            
        handle( is );

    }
    
    /**
     * Run this handler.
     *
     * @author <a href="mailto:burton@relativity.yi.org">burtonator</a>
     */
    private void handle( InputSource is ) throws ReptileException {

        try { 

            XMLReader xmlr = ReptileParserFactory.getXMLReader();

            xmlr.setContentHandler( this );

            xmlr.parse( is );

            Log.message( "OCS feed found " + totalFound + " channels. "  );
            
        } catch ( Throwable t ) {

            Log.error( t );
            
            throw new ReptileException( t );
            
        }

    }

    /**
     * 
     *
     * @author <a href="mailto:burton@relativity.yi.org">burtonator</a>
     */
    public void startElement( String ns_uri,
                              String local,
                              String raw,
                              Attributes attrs ) {

        //we need to update these for use by characters()
        this.currentLocal = local;
        this.currentNS = ns_uri;
        
        //if this has an about attribute we are describing a URL.  
        if ( ns_uri.equals( ReptileResources.RDF_NAMESPACE ) && local.toLowerCase().equals( "description" ) ) {

            ++level;

            //try to guess the location... level has to be > 0 because this is the OCS feed information

            String attr_about = attrs.getValue( "about" );

            if ( attr_about != null ) {

                if ( level == 2 ) {

                    baseLocation = attr_about;

                } else if ( level == 3 ) {

                    formatLocation = attr_about;

                }
                
            }
            
        }
        
    } 

    /**
     * 
     *
     * @author <a href="mailto:burton@relativity.yi.org">burtonator</a>
     */
    public void endElement( String ns_uri,
                            String local,
                            String qName) {

        //we need to update these for use by characters()
        this.currentLocal = local;
        this.currentNS = qName;
        
        //if we and on an rdf:description and we have all required items... this
        //should be a valid entry.

        if ( ns_uri.equals( ReptileResources.RDF_NAMESPACE ) &&
             local.toLowerCase().equals( "description" ) ) {

            //base level
            if ( level == 2 ) {

                //we should have found a valid entry by now...

                if ( this.feed != null ) {

                    ++totalFound; //increment the total number of found feeds...
                    
                    //register this channel with the DB.
                    try {

                        ChannelPeer.registerChannel( feed );
                         
                    } catch ( Exception e ) {

                        Log.error( e );
                        
                    } 

                    reset();

                } 

            }

            //end of an rdf:description.  find out if we have valid information...
            if ( level == 3 ) {

                //determine if we should use the given feed. 
                if ( feed == null && ctm.isSupportedContentType( format ) ) {

                    this.feed = getContentFeed();

                } else if ( feed != null && ctm.isPreferredContentType( format ) ) {

                    //if it is a prefered content type... and we already have a
                    //feed, override it!
                    
                    this.feed = getContentFeed();
                    
                } else if ( feed == null && format != null && format.equals( "" ) == false ) {

                    //ok... failover.  if we still haven't upgraded the format,
                    //and it isn't null, them this is an unsupported format.
                    //This is fine we just need to log it so that we can support
                    //it in the future.
                    
                    Log.debug( "Format is neither supported nor preferred: '" + format + "'" );
                    
                }
                
                
            }

            //always reduce the level on rdf:description elements.
            --level;

        }

    }

    /**
     * 
     *
     * @author <a href="mailto:burton@relativity.yi.org">burtonator</a>
     */
    public void characters( char[] ch,
                            int start,
                            int length ) {

        //FIXME: we are assuming here that characters will only be called once.
        //We need to fix this so that it can be called more than once for each
        //element.
        
        if ( isDCNamespace( currentNS ) && currentLocal.equals( "title" ) )
            this.title= new String( ch, start, length );

        //find the OCS format element.
        if ( isOCSNamespace( currentNS ) && currentLocal.equals( "format" ) )
            this.format= new String( ch, start, length );

        //get the channel description
        if ( isDCNamespace( currentNS ) && currentLocal.equals( "description" ) ) 
            this.description= new String( ch, start, length );
        
    }
    
    /**
     * Reset all parser state variables...
     *
     * @author <a href="mailto:burton@relativity.yi.org">burtonator</a>
     */
    private void reset() {

        title = "";
        description = "";
        baseLocation = "";
        formatLocation = "";
        format = "";

        feed = null;
        
    }

    /**
     * Get a ContentFeed from the current state.
     *
     * @author <a href="mailto:burton@relativity.yi.org">burtonator</a>
     */
    private ContentFeed getContentFeed() {

        return new ContentFeed( formatLocation,
                                format,
                                title, 
                                description,
                                feedLocation );

    }

    /**
     * Return true if the given namespace is an OCS namespace.
     *
     * @author <a href="mailto:burton@relativity.yi.org">Kevin A. Burton</a>
     */
    public static boolean isOCSNamespace( String namespace ) {

        return namespace.equals( ReptileResources.OCS_NAMESPACE_DIRECTORY ) ||
               namespace.equals( ReptileResources.OCS_NAMESPACE_SYNTAX );
        
    }

    /**
     * Return true if the given namespace is an OCS namespace.
     *
     * @author <a href="mailto:burton@relativity.yi.org">Kevin A. Burton<k/a>
     */
    public static boolean isDCNamespace( String namespace ) {

        return namespace.equals( ReptileResources.DC_NAMESPACE ) ||
               namespace.equals( ReptileResources.DC_OCLC_NAMESPACE );
        
    }

    public static void main( String[] args ) {

        if ( args.length != 1 ) {

            System.out.println( "SYNTAX: " + OCSContentFeedsHandler.class.getName() + " filename" );

        } else {

            String filename = args[0];

            try {
                
                new OCSContentFeedsHandler( new File( filename ) );
                 
            } catch ( Exception e ) {

                e.printStackTrace();
                
            }
            
        }
    
    }

}

