/*
 *
 * BEGIN HEADER
 *
 * ---- 
 *
 * $ID: PantherProxy.java,v 1.6 2001/06/12 20:58:43 burton Exp $
 * $Project: http://panther.openprivacy.org $
 * $CVSROOT: :pserver:anoncvs@sierra.openprivacy.org:/usr/local/cvs/public $
 * $WebCVS: http://www.openprivacy.org/cgi-bin/cvsweb/cvsweb.cgi/panther/ $
 * $Mailing-List: http://www.openprivacy.org/lists/ $
 * $Bugzilla: http://bugzilla.openprivacy.org/ $
 * Copyright 2001 OpenPrivacy.org.  All rights reserved.
 *
 * ---- 
 *
 * Copyright 2001 OpenPrivacy.org.  All rights reserved.
 *
 * This program is free software; you can redistribute it and/or modify it under
 * the terms of the LICENSE which you should have received with this package. 
 *
 * This program is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 * FOR A PARTICULAR PURPOSE.
 *
 * END HEADER
 * 
 */

package org.openprivacy.reptile.xml;

/**
 * Class that can cleanse a string so that nothing can be present to break an
 * XML parser.  This is a VERY non-portable class as it is meant to work just
 * with Xalan/Xerces and may remove more text and replace things that are
 * non-XML centric.  
 *
 * @author <a href="mailto:burton@peerfear.org">Kevin A. Burton</a>
 * @version $Id: XMLStringCleanser.java,v 1.1 2003/02/03 13:11:21 burton Exp $
 */
public class XMLStringCleanser {

    public static String cleanse( String content ) {

        StringBuffer buff = new StringBuffer();

        for ( int i = 0; i < content.length(); ++i ) {

            char c = content.charAt( i );
            
            if ( isXMLCharacter( c ) ) {

                buff.append( c );
                
            } 

        }

        return buff.toString();

    }

    /*
     * This is a utility function for determining whether a specified 
     * character is a character according to production 2 of the 
     * XML 1.0 specification.
     *
     * @param c <code>char</code> to check for XML compliance.
     * @return <code>boolean</code> - true if it's a character, 
     *                                false otherwise.
     */
    public static boolean isXMLCharacter(char c) {
    
        if (c == '\n') return true;
        if (c == '\r') return true;
        if (c == '\t') return true;
        
        if (c < 0x20) return false;  if (c <= 0x80) return true;
        if (c <= 0xFF) return false; if (c <= 0xD7FF) return true;
        if (c < 0xE000) return false;  if (c <= 0xFFFD) return true;
        if (c < 0x10000) return false;  if (c <= 0x10FFFF) return true;
        
        return false;
    }

}
