Results 1 to 2 of 2

Thread: BBCode Parser

  1. #1
    Join Date
    Aug 2007
    Location
    Ohio
    Posts
    79
    Thanks
    0
    Thanked 15 Times in 15 Posts

    Default BBCode Parser

    I've been working on building a custom forum software from the ground up the past few months. When I got to parsing messages, I wrote a TextParser class to handle bbcode, smilies and the like. It worked well until I realized that embedded bbcodes of the same type weren't being parsed.

    I found some discussions on the topic and they pointed to using preg_replace_callback to recursively replace the bbcodes. I had some trouble implementing it and got some strange results. In search for more answers I came across a blog post where someone said to avoid regular expressions to parse bbcode structures. How would I not use regular expressions to parse bbcode? Are there any other alternatives?

    For the curious, here's my class. My attempt at recursion is commented out and the original attempt isn't.

    PHP Code:
    <?php

    class Penelope_TextParser {

        private 
    $BBCodes = array();
        private 
    $smilies = array();
        private 
    $currentReplacement;
        private 
    $currentBBCode;
        
        public function 
    __construct() {
        
            
    $this->loadCodes();
            
    $this->loadSmilies();
            
        }
        
        private function 
    loadCodes() {
        
            
    // this function loads all the bbcodes in the database into the object for use later when parsing is needed.
            
            // get the database connection
            
    $db Penelope_DB::getInstance();
            
            
    $getCodes $db->prepare("SELECT * FROM bbcodes");
            
    $getCodes->execute();
            
    $this->BBCodes $getCodes->fetchAll(PDO::FETCH_ASSOC);
            
            if(
    $getCodes->errorCode() == "0000") {
                return 
    true;
            }
            else {
                return 
    false;
            }
        
        }
        
        private function 
    loadSmilies() {
        
            
    // this function loads all the smilies in the database into the object for use later when parsing
            
            
    $db Penelope_DB::getInstance();
            
            
    $getSmilies $db->prepare("SELECT * FROM smilies");
            
    $getSmilies->execute();
            
    $this->smilies $getSmilies->fetchAll(PDO::FETCH_ASSOC);
            
            if(
    $getSmilies->errorCode() == "0000")
            {
                return 
    true;
            }
            else {
                return 
    false;
            }
            
        }
        
        private function 
    prepareReplacement($replacement$use_option) {
        
            if(
    $use_option) {
                
    $replacement preg_replace('/{option}/''\$1'$replacement);
            
                
    $replacement preg_replace('/{param}/''\$2'$replacement);
            }
            else {
                
    $replacement preg_replace('/{param}/''\$1'$replacement);
            }
            
            return 
    $replacement;
        
        }
        
        public function 
    parse($text) {
            
            
    $text htmlentities($text);
            
    $text $this->parseBBCode($text);
            
    $text $this->parseSmilies($text);
            
            
    $text nl2br($text);
                    
            return 
    $text;
        
        }
        
        protected function 
    recursiveReplaceSimple($input) {
        
            
    //$arr = $this->currentBBCode;
        
            
    if(is_array($input)) {
                
                
    $replacement $this->currentReplacement;
                
                
    $input str_replace('{param}'$input[1], $replacement);
            }
        
            return 
    preg_replace_callback('/\['.$arr['tag'].'\](.+)\[\/'.$arr['tag'].'\]/is', array(&$this'recursiveReplaceSimple'), $input5);
        
        }
        
        protected function 
    recursiveReplaceAdvanced($input) {
        
            
    $arr $this->currentBBCode;
            
            if(
    is_array($input)) {
                
                
    $replacement $this->currentReplacement;
                
                
    $option $input[1];
                
    $param $input[2];
                
                
    $input str_replace('{param}'$param$replacement);
                
    $input str_replace('{option}'$option$input);
            }
        
            return 
    preg_replace_callback('/\['.$arr['tag'].'\](.+)\[\/'.$arr['tag'].'\]/is', array(&$this'recursiveReplaceAdvanced'), $input5);
        
        }
        
        public function 
    parseBBCode($text) {
        
            foreach(
    $this->BBCodes as $key => $arr)
            {
                
                if(!
    $arr['use_option']) {
                    
                    
    /*
                    $this->currentReplacement = $arr['replacement'];
                    $this->currentBBcode = $arr;
                    
                    // RECURSIVE REPLACEMENT
                    
                    $text = preg_replace_callback('/\['.$arr['tag'].'\](.+)\[\/'.$arr['tag'].'\]/is', array(&$this, 'recursiveReplaceSimple'), $text, 5);
                    */
                    
                    //ORIGINAL REPLACEMENT:
                    
                    
    $replacement $this->prepareReplacement($arr['replacement'], 0);
                    
                    
    $text preg_replace('/\['.$arr['tag'].'\](.+)\[\/'.$arr['tag'].'\]/isU'$replacement$text);
                    
                    
                    
                }
                
                else {
                    
    /*            
                    $this->currentReplacement = $arr['replacement'];
                    $this->currentBBCode = $arr;
                    
                    // RECURSIVE REPLACEMENT
                    $text = preg_replace_callback('/\['.$arr['tag'].'=([^\]]+)\](.+)\[\/'.$arr['tag'].'\]/is', array(&$this, 'recursiveReplaceAdvanced'), $text, 5);
                    */
                    
                    //ORIGINAL REPLACEMENT:
                    
                    
    $replacement $this->prepareReplacement($arr['replacement'], 1);
                    
                    
    $text preg_replace('/\['.$arr['tag'].'=([^\]]+)\](.+)\[\/'.$arr['tag'].'\]/isU'$replacement$text);
                    
                
                }
                
            }
            
            return 
    $text;
        
        }
        
        public function 
    parseSmilies($text) {
        
            
    $config Penelope_Config::getInstance();
            
    $website_url $config->getProp("website_url");
        
            foreach(
    $this->smilies as $key => $arr)
            {
                
    $text str_replace($arr['search'], '<img src="'.$website_url.'/imgs/smilies/'.$arr['filepath'].'" alt="'.$arr['name'].'" />'$text);
            }
            
            return 
    $text;
        
        }
        
        public function 
    addDefaultBBCode($text$font$color$size) {
            
            
    $tmp "";
            
            if(
    $font) {
                
    $tmp .= "[font=".$font."]";
            }
            if(
    $color) {
                
    $tmp .= "[color=".$color."]";
            }
            if(
    $size) {
                
    $tmp .= "[size=".$size."]";
            }
            
            
    $tmp .= $text;
            
            if(
    $size) {
                
    $tmp .= "[/size]";
            }
            if(
    $color) {
                
    $tmp .= "[/color]";
            }
            if(
    $font) {
                
    $tmp .= "[/font]";
            }
            
            return 
    $tmp;
            
        }

    }

    ?>

  2. #2
    Join Date
    Aug 2007
    Location
    Ohio
    Posts
    79
    Thanks
    0
    Thanked 15 Times in 15 Posts

    Default

    After many many hours of fighting with this damn problem, I think I've finally reached a solution. I ended up creating a parser that converts the bbcode into a tree structure, and then converts the tree structure into html. I really like this solution, because if I ever had to parse into a different format instead of html, all I need is a new conversion table. For those who are curious, here is my new class.

    PHP Code:
    <?php

    class Penelope_TextParser {

        private 
    $supportedTags = array();
        private 
    $smilies = array();
        
        private 
    $text "";
        private 
    $tree = array();
        
        
        public function 
    setInput($text) {
        
            
    $this->text htmlentities($text);
            
    $this->resetTree();
            
            return 
    true;
            
        }
        
        public function 
    resetTree() {
            
    $this->tree  = array("nodeid" => 0"parentNodeId" => -1"type" => "element""tagname" => "body""children" => array());
        }
        
        public function 
    __construct() {
        
            
    $this->resetTree();
        
            
    $this->loadCodes();
            
    $this->loadSmilies();
            
            
    // Initialize built in bbcodes
            /*
            $this->addBBCode("b", 0, "<strong>{param}</strong>", 1);
            $this->addBBCode("i", 0, "<em>{param}</em>", 1);
            $this->addBBCode("url", 1, "<a href=\"{option}\">{param}</a>", 0);
            $this->addBBCode("color", 1, "<span style=\"color: {option};\">{param}</span>", 1);
            $this->addBBCode("img", 0, "<img src=\"{param}\" />", 0);*/
            
            
        
    }
        
        private function 
    loadCodes() {
        
            
    // this function loads all the bbcodes in the database into the object for use later when parsing is needed.
            
            // get the database connection
            
    $db Penelope_DB::getInstance();
            
            
    $getCodes $db->prepare("SELECT * FROM bbcodes");
            
    $getCodes->execute();
            while(
    $code $getCodes->fetch(PDO::FETCH_ASSOC)) {
                
    $this->addBBCode($code['tag'], $code['use_option'], $code['replacement'], $code['parse_content']);
            }
            
            if(
    $getCodes->errorCode() == "0000") {
                return 
    true;
            }
            else {
                return 
    false;
            }
        
        }
        
        private function 
    loadSmilies() {
        
            
    // this function loads all the smilies in the database into the object for use later when parsing
            
            
    $db Penelope_DB::getInstance();
            
            
    $getSmilies $db->prepare("SELECT * FROM smilies");
            
    $getSmilies->execute();
            
    $this->smilies $getSmilies->fetchAll(PDO::FETCH_ASSOC);
            
            if(
    $getSmilies->errorCode() == "0000")
            {
                return 
    true;
            }
            else {
                return 
    false;
            }
            
        }
        
        public function 
    parse($text NULL) {
        
            if(
    $text != NULL) {
                
    $this->setInput($text);
            }
            
            
    // make the bbcode tree
            
    $this->constructTree();
            
    $result $this->parseTreeAsHTML();
            
            
    $result $this->parseSmilies($result);
            
            
    $result nl2br($result);
                    
            return 
    $result;
        
        }
        
        public function 
    parseSmilies($text) {
        
            
    $config Penelope_Config::getInstance();
            
    $website_url $config->getProp("website_url");
        
            foreach(
    $this->smilies as $key => $arr)
            {
                
    $text str_replace($arr['search'], '<img src="'.$website_url.'/imgs/smilies/'.$arr['filepath'].'" alt="'.$arr['name'].'" />'$text);
            }
            
            return 
    $text;
        
        }
        
        
    // Tree Parsing Functions
        
        
    public function parseTreeAsHTML() {
            
            
    $parsedText "";
            foreach(
    $this->tree['children'] as $key => $arr) {
                
    $parsedText .= $this->parseAsHTML($arr1);
            }
            
            return 
    $parsedText;
            
        }
        
        private function 
    parseAsHTML($el$parse_content) {
            
            
    $parsedText "";
        
            if(
    $el['type'] == 'text') {
                
    // regular text is easy, just print that baby!
                
    $parsedText .= $el['data'];
            }
            else if(
    $parse_content == 1) {
        
                if(
    $el['attribute']) {
                    
    // there's an attribute, so find the bbcode that matches both the tagname and the option
                    
    $replacement $this->getReplacement($el['tagname'], 1);
                        
                    if(
    $replacement) {
                        
                        
    // replace the {option} with the attribute we found
                        
    $use_option 1;
                        
    $replacement str_replace('{option}'$el['attribute'], $replacement);
                    }
                }
                else {
                    
    // find the bbcode with the right tagname and has no option                
                    
    $replacement $this->getReplacement($el['tagname'], 0);
                    
    $use_option 0;    
                    if(
    $replacement === false) {
                        
    // there's no bbcode for this. 
                        // Maybe they want to use the bbcode with an option, but shorthand
                            
                        
    if(count($el['children']) == AND $el['children'][0]['type'] == "text") {
                            
    // the element only has 1 child. Let's use the child as the option.
                            
    $replacement $this->getReplacement($el['tagname'], 1);
                            if(
    $replacement) {
                                
    $replacement str_replace('{option}'$el['children'][0]['data'], $replacement);
                                
    $use_option 1;
                            }
                        }
                    }
                }
                    
                if(
    $replacement) {
                    
    // get the two parts that are split by the {param}
                    
    $replacementParts explode('{param}'$replacement);
                    
                }
                else {
                    
    // okay, this tag isn't supported. Just show the raw tag.
                        
                    
    if($el['attribute']) {
                        
    $replacementParts[0] = "[".$el['tagname']."=".$el['attribute']."]";
                    }
                    else {
                        
    $replacementParts[0] = "[".$el['tagname']."]";
                    }
                        
                    
    $replacementParts[1] = "[/".$el['tagname']."]";
                        
                }
                
                
    // print the first part,
                
    $parsedText .= $replacementParts[0];
            
                
                
    // print any children
                
    if(count($el['children']) > 0) {
                    
    $parse_content $this->getParseContent($el['tagname'], $use_option);
                    
                    foreach(
    $el['children'] as $key => $arr) {
                        
    $parsedText .= $this->parseAsHTML($arr$parse_content);
                    }
                }
                
                if(
    $replacementParts[1]) {
                    
    // now print the second part (usually a closing tag)
                    
    $parsedText .= $replacementParts[1];
                }
            }
            else {
                
    // Don't parse any elements...
                
    if($el['attribute']) {
                    
    $replacementParts[0] = "[".$el['tagname']."=".$el['attribute']."]";
                }
                else {
                    
    $replacementParts[0] = "[".$el['tagname']."]";
                }
                        
                
    $replacementParts[1] = "[/".$el['tagname']."]";
                
                
    $parsedText .= $replacementParts[0];
                
                
    // print any children
                
    if(count($el['children']) > 0) {
                    foreach(
    $el['children'] as $key => $arr) {
                        
    $parsedText .= $this->parseAsHTML($arr$parse_content);
                    }
                }
                            
                
    $parsedText .= $replacementParts[1];
                
            }
            
            return 
    $parsedText;
        }
        
        
        private function 
    getReplacement($tagname$option 0) {
                    
            foreach(
    $this->supportedTags as $key => $arr) {
                if(
    $arr['tagname'] == $tagname && $arr['option'] == $option)
                {
                    
                    return 
    $arr['replacement'];
                }
            }
            
            return 
    false;
            
        }
        
        private function 
    getParseContent($tagname$option 0) {
            
            foreach(
    $this->supportedTags as $key => $arr) {
                if(
    $arr['tagname'] == $tagname && $arr['option'] == $option)
                {
                    
                    return 
    $arr['parse_content'];
                }
            }
            
            return 
    false;
            
        }
        
        
    // Tree Manipulation
        
        
    public function constructTree() {
        
            
    $text $this->text;
            
            
    $data preg_split('/([\[\]])/'$text, -1PREG_SPLIT_DELIM_CAPTURE);
            
            
    $inTag false;
            
    $endingTag false;
            
    $tagName null;
            
    $nodeid 0;
            
    $parentNodeId 0;
            
    $attribute null;
            
            foreach(
    $data as $key => $piece) {
            
                
    $nodeid++;
                
                if(
    $piece == '[') {
                    
    $inTag true;
                }
                else if(
    $piece == ']' && $inTag === true) {
                    
    $inTag false;
                    
                    if(!
    $endingTag) {
                        
    // add the element
                        
    $this->_treeAppendChild($parentNodeId, array(
                                
    "nodeid" => $nodeid,
                                
    "parentNodeId" => $parentNodeId,
                                
    "type" => "element",
                                
    "tagname" => $tagName,
                                
    "attribute" => $attribute,
                                
    "children" => array()));
                                    
                        
    $parentNodeId $nodeid;
                    }
                    else {
                        
    // we're ending this element
                        
    $endingTag false;
                        
                        
    $parent $this->_treeGetElementById($parentNodeId);
                        
    $parentNodeId $parent['parentNodeId'];
                        
                    }
                }
                
                else {
                    if(
    $inTag) {
                        
    // $piece is a tag name
                        
    if(substr($piece01) == '/') {
                            
    // ending tag
                            
                            
    $tagName null;
                            
    $endingTag true;
                            
                        }
                        else {
                            
    // beginning tag
                            
    if(stripos($piece'=')) {
                                
    // there's an attribute
                                
    $parts explode('='$piece);
                                
    $tagName $parts[0];
                                
    $attribute $parts[1];
                            }
                            else {
                                
    $attribute null;
                                
    $tagName $piece;
                            }
                        }
                    }
                    else {
                        
    // $piece is regular text, add it to it's parent
                        
    $this->_treeAppendChild($parentNodeId, array(
                                    
    "nodeid" => $nodeid,
                                    
    "parentNodeId" => $parentNodeId,
                                    
    "type" => "text",
                                    
    "data" => $piece));
                                    
                    }
                }
                
                
            }
        }
        
        private function 
    _treeAppendChild($parentId$elInfo) {
        
            
    $parent =& $this->_treeGetElementById($parentId);
            
            if(isset(
    $parent['children']) && is_array($parent['children'])) {
                
    array_push($parent['children'], $elInfo);
                return 
    true;
            }
            
            else {
                
    // nothing to add the element to
                
    return false;
            }
        
        }
        
        private function &
    _treeGetElementById($id) {
        
            return 
    $this->_treeGetElementByIdHelper($id, &$this->tree);
        
        }
        
        private function &
    _treeGetElementByIdHelper($id, &$arr) {
            if(
    $arr['nodeid'] == $id) {
                
    // this is the array we're looking for!
                // return a reference to the array!
                
    return $arr;
            }
            else {
                if(
    is_array($arr['children']) && count($arr['children']) > 0) {
                    
    // loop through the children
                    
    foreach($arr['children'] as $key => $child) {
                        
    $result = & $this->_treeGetElementByIdHelper($id, &$arr['children'][$key]);
                        
                        if(
    $result != false) {
                            
    // they found it
                            
    return $result;
                        }
                    }
                }
            }
            
            return 
    false;
        }
        
        
    // BBCodes List Manipulation
        
        
    public function addBBCode($tagname$use_option$replacement$parse_content) {
        
            
    $this->supportedTags[] = array(
                                    
    "tagname" => $tagname
                                    
    "option" => $use_option
                                    
    "replacement" => $replacement,
                                    
    "parse_content" => $parse_content);
        
        }
        
        public function 
    printSupportedBBCodes() {
            echo 
    "<p><strong>Supported BBCodes:</strong></p>";
            echo 
    "<pre>";
            foreach(
    $this->supportedTags as $key => $arr) {
                
    $text print_r($arrtrue);
                echo 
    htmlentities($text);
            }
            echo 
    "</pre>";
            
        }
        

    }

    ?>

Bookmarks

Posting Permissions

  • You may not post new threads
  • You may not post replies
  • You may not post attachments
  • You may not edit your posts
  •