<?php
/**
 * Takes HTML and converts it to formatted, plain text.
 *
 * $Horde: framework/Text_Filter/Filter/html2text.php,v 1.4.10.1 2005/01/03 12:19:16 jan Exp $
 *
 * Copyright 2003-2005 Jon Abernathy <jon@chuggnutt.com>
 * Original source: http://www.chuggnutt.com/html2text.php
 * Copyright 2004-2005 Jan Schneider <jan@horde.org>
 *
 * See the enclosed file COPYING for license information (LGPL). If you did
 * not receive this file, see http://www.fsf.org/copyleft/lgpl.html.
 *
 * @author  Jon Abernathy <jon@chuggnutt.com>
 * @author  Jan Schneider <jan@horde.org>
 * @version $Revision: 1.4.10.1 $
 * @since   Horde 3.0
 * @package Horde_Text
 */
class Text_Filter_html2text extends Text_Filter {

    /**
     * Filter parameters.
     *
     * @var array $_params
     */
    var $_params = array('width' => 70);

    /**
     * Executes any code necessaray before applying the filter patterns.
     *
     * @param string $text  The text before the filtering.
     *
     * @return string  The modified text.
     */
    function preProcess($text)
    {
        global $_html2text_state;

        $_html2text_state['linkList'] = '';
        $_html2text_state['linkCount'] = 1;

        return trim($text);
    }

    /**
     * Returns a hash with replace patterns.
     *
     * @return array  Patterns hash.
     */
    function getPatterns()
    {
        $regexp = array(
            // Non-legal carriage return.
            "/\r/" => '',

            // Leading and trailing whitespace.
            '/^\s*(.*?)\s*$/m' => '\1',

            // Normalize <br>.
            "/<br[^>]*>([^\n])/i" => "<br>\n\\1",

            // Newlines and tabs.
            "/[\n\t]+/" => ' ',

            // <script>s -- which strip_tags() supposedly has problems
            // with.
            '/<script[^>]*>.*?<\/script>/i' => '',

            // <style>s -- which strip_tags() supposedly has problems
            // with.
            '/<style[^>]*>.*?<\/style>/i' => '',

            // Comments -- which strip_tags() might have a problem
            // with.
            //'/<!-- .* -->/' => '',

            // h1 - h3
            '/<h[123][^>]*>(.+?)<\/h[123]>/ie' => "strtoupper(\"\n\n\\1\n\n\")",

            // h4 - h6
            '/<h[456][^>]*>(.+?)<\/h[456]>/ie' => "ucwords(\"\n\n\\1\n\n\")",

            // <p>
            '/<p[^>]*>/i' => "\n\n  ",

            // <br>
            '/<br[^>]*> /i' => "\n",

            // <b>
            '/<b[^>]*>(.+?)<\/b>/ie' => 'strtoupper("\\1")',

            // <strong>
            '/<strong[^>]*>(.+?)<\/strong>/ie' => 'strtoupper("\\1")',
            '/<span\\s+style="font-weight:\\s*bold.*">(.+?)<\/span>/ie' => 'strtoupper("\\1")',

            // <i>
            '/<i[^>]*>(.+?)<\/i>/i' => '_\\1_',

            // <em>
            '/<em[^>]*>(.+?)<\/em>/i' => '_\\1_',

            // <ul>/<ol> and </ul>/</ol>
            '/(<(u|o)l[^>]*>| ?<\/(u|o)l>)/i' => "\n\n",

            // <li>
            '/ ?<li[^>]*>/i' => "\n  * ",

            // <a href="">
            '/ ?<a href="([^"]+)"[^>]*>(.+?)<\/a>/ie' => 'Text_Filter_html2text::_buildLinkList($GLOBALS["_html2text_state"]["linkCount"], "\\1", "\\2")',

            // <hr>
            '/<hr[^>]*>/i' => "\n-------------------------\n",

            // <table> and </table>
            '/(<table[^>]*>| ?<\/table>)/i' => "\n\n",

            // <tr>
            '/ ?<tr[^>]*>/i' => "\n\t",

            // <td> and </td>
            '/ ?<td[^>]*>(.+?)<\/td> ?/i' => "\\1\t\t",
            '/\t\t<\/tr>/i' => '',

            '/&nbsp;/i' => ' ',
            '/&quot;/i' => '"',
            '/&gt;/i' => '>',
            '/&lt;/i' => '<',
            '/&amp;/i' => '&',
            '/&copy;/i' => '(c)',
            '/&trade;/i' => '(tm)'
        );

        return array('regexp' => $regexp);
    }

    /**
     * Executes any code necessaray after applying the filter
     * patterns.
     *
     * @param string $text  The text after the filtering.
     *
     * @return string  The modified text.
     */
    function postProcess($text)
    {
        global $_html2text_state;

        /* Strip any other HTML tags. */
        $text = strip_tags($text);

        /* Bring down number of empty lines to 2 max. */
        $text = preg_replace("/\n[[:space:]]+\n/", "\n\n", $text);
        $text = preg_replace("/[\n]{3,}/", "\n\n", $text);

        /* Add link list. */
        if (!empty($_html2text_state['linkList'])) {
            $text .= "\n\n" . _("Links") . ":\n------\n" . $_html2text_state['linkList'];
        }

        /* Wrap the text to a readable format. */
        $text = wordwrap($text, $this->_params['width']);

        return $text;
    }

    /**
     * Helper function called by preg_replace() on link replacement.
     *
     * Maintains an internal list of links to be displayed at the end
     * of the text, with numeric indices to the original point in the
     * text they appeared.
     *
     * @access private
     *
     * @param integer $link_count  Counter tracking current link number.
     * @param string  $link        URL of the link.
     * @param string  $display     Part of the text to associate number with.
     *
     * @return string  The link replacement.
     */
    function _buildLinkList($link_count, $link, $display)
    {
        global $_html2text_state;

        $_html2text_state['linkCount']++;
        $_html2text_state['linkList'] .= '[' . $_html2text_state['linkCount'] . "] $link\n";
        return $display . '[' . $_html2text_state['linkCount'] . ']';
    }

}
