jackbenimble4
06-05-2008, 08:17 PM
I've been working on building a custom forum software from the ground up the past few months. When I got to parsing messages, I wrote a TextParser class to handle bbcode, smilies and the like. It worked well until I realized that embedded bbcodes of the same type weren't being parsed.
I found some discussions on the topic and they pointed to using preg_replace_callback to recursively replace the bbcodes. I had some trouble implementing it and got some strange results. In search for more answers I came across a blog post where someone said to avoid regular expressions to parse bbcode structures. How would I not use regular expressions to parse bbcode? Are there any other alternatives?
For the curious, here's my class. My attempt at recursion is commented out and the original attempt isn't.
<?php
class Penelope_TextParser {
private $BBCodes = array();
private $smilies = array();
private $currentReplacement;
private $currentBBCode;
public function __construct() {
$this->loadCodes();
$this->loadSmilies();
}
private function loadCodes() {
// this function loads all the bbcodes in the database into the object for use later when parsing is needed.
// get the database connection
$db = Penelope_DB::getInstance();
$getCodes = $db->prepare("SELECT * FROM bbcodes");
$getCodes->execute();
$this->BBCodes = $getCodes->fetchAll(PDO::FETCH_ASSOC);
if($getCodes->errorCode() == "0000") {
return true;
}
else {
return false;
}
}
private function loadSmilies() {
// this function loads all the smilies in the database into the object for use later when parsing
$db = Penelope_DB::getInstance();
$getSmilies = $db->prepare("SELECT * FROM smilies");
$getSmilies->execute();
$this->smilies = $getSmilies->fetchAll(PDO::FETCH_ASSOC);
if($getSmilies->errorCode() == "0000")
{
return true;
}
else {
return false;
}
}
private function prepareReplacement($replacement, $use_option) {
if($use_option) {
$replacement = preg_replace('/{option}/', '\$1', $replacement);
$replacement = preg_replace('/{param}/', '\$2', $replacement);
}
else {
$replacement = preg_replace('/{param}/', '\$1', $replacement);
}
return $replacement;
}
public function parse($text) {
$text = htmlentities($text);
$text = $this->parseBBCode($text);
$text = $this->parseSmilies($text);
$text = nl2br($text);
return $text;
}
protected function recursiveReplaceSimple($input) {
//$arr = $this->currentBBCode;
if(is_array($input)) {
$replacement = $this->currentReplacement;
$input = str_replace('{param}', $input[1], $replacement);
}
return preg_replace_callback('/\['.$arr['tag'].'\](.+)\[\/'.$arr['tag'].'\]/is', array(&$this, 'recursiveReplaceSimple'), $input, 5);
}
protected function recursiveReplaceAdvanced($input) {
$arr = $this->currentBBCode;
if(is_array($input)) {
$replacement = $this->currentReplacement;
$option = $input[1];
$param = $input[2];
$input = str_replace('{param}', $param, $replacement);
$input = str_replace('{option}', $option, $input);
}
return preg_replace_callback('/\['.$arr['tag'].'\](.+)\[\/'.$arr['tag'].'\]/is', array(&$this, 'recursiveReplaceAdvanced'), $input, 5);
}
public function parseBBCode($text) {
foreach($this->BBCodes as $key => $arr)
{
if(!$arr['use_option']) {
/*
$this->currentReplacement = $arr['replacement'];
$this->currentBBcode = $arr;
// RECURSIVE REPLACEMENT
$text = preg_replace_callback('/\['.$arr['tag'].'\](.+)\[\/'.$arr['tag'].'\]/is', array(&$this, 'recursiveReplaceSimple'), $text, 5);
*/
//ORIGINAL REPLACEMENT:
$replacement = $this->prepareReplacement($arr['replacement'], 0);
$text = preg_replace('/\['.$arr['tag'].'\](.+)\[\/'.$arr['tag'].'\]/isU', $replacement, $text);
}
else {
/*
$this->currentReplacement = $arr['replacement'];
$this->currentBBCode = $arr;
// RECURSIVE REPLACEMENT
$text = preg_replace_callback('/\['.$arr['tag'].'=([^\]]+)\](.+)\[\/'.$arr['tag'].'\]/is', array(&$this, 'recursiveReplaceAdvanced'), $text, 5);
*/
//ORIGINAL REPLACEMENT:
$replacement = $this->prepareReplacement($arr['replacement'], 1);
$text = preg_replace('/\['.$arr['tag'].'=([^\]]+)\](.+)\[\/'.$arr['tag'].'\]/isU', $replacement, $text);
}
}
return $text;
}
public function parseSmilies($text) {
$config = Penelope_Config::getInstance();
$website_url = $config->getProp("website_url");
foreach($this->smilies as $key => $arr)
{
$text = str_replace($arr['search'], '<img src="'.$website_url.'/imgs/smilies/'.$arr['filepath'].'" alt="'.$arr['name'].'" />', $text);
}
return $text;
}
public function addDefaultBBCode($text, $font, $color, $size) {
$tmp = "";
if($font) {
$tmp .= "";
}
if($color) {
$tmp .= "";
}
if($size) {
$tmp .= "";
}
$tmp .= $text;
if($size) {
$tmp .= "";
}
if($color) {
$tmp .= "";
}
if($font) {
$tmp .= "";
}
return $tmp;
}
}
?>
I found some discussions on the topic and they pointed to using preg_replace_callback to recursively replace the bbcodes. I had some trouble implementing it and got some strange results. In search for more answers I came across a blog post where someone said to avoid regular expressions to parse bbcode structures. How would I not use regular expressions to parse bbcode? Are there any other alternatives?
For the curious, here's my class. My attempt at recursion is commented out and the original attempt isn't.
<?php
class Penelope_TextParser {
private $BBCodes = array();
private $smilies = array();
private $currentReplacement;
private $currentBBCode;
public function __construct() {
$this->loadCodes();
$this->loadSmilies();
}
private function loadCodes() {
// this function loads all the bbcodes in the database into the object for use later when parsing is needed.
// get the database connection
$db = Penelope_DB::getInstance();
$getCodes = $db->prepare("SELECT * FROM bbcodes");
$getCodes->execute();
$this->BBCodes = $getCodes->fetchAll(PDO::FETCH_ASSOC);
if($getCodes->errorCode() == "0000") {
return true;
}
else {
return false;
}
}
private function loadSmilies() {
// this function loads all the smilies in the database into the object for use later when parsing
$db = Penelope_DB::getInstance();
$getSmilies = $db->prepare("SELECT * FROM smilies");
$getSmilies->execute();
$this->smilies = $getSmilies->fetchAll(PDO::FETCH_ASSOC);
if($getSmilies->errorCode() == "0000")
{
return true;
}
else {
return false;
}
}
private function prepareReplacement($replacement, $use_option) {
if($use_option) {
$replacement = preg_replace('/{option}/', '\$1', $replacement);
$replacement = preg_replace('/{param}/', '\$2', $replacement);
}
else {
$replacement = preg_replace('/{param}/', '\$1', $replacement);
}
return $replacement;
}
public function parse($text) {
$text = htmlentities($text);
$text = $this->parseBBCode($text);
$text = $this->parseSmilies($text);
$text = nl2br($text);
return $text;
}
protected function recursiveReplaceSimple($input) {
//$arr = $this->currentBBCode;
if(is_array($input)) {
$replacement = $this->currentReplacement;
$input = str_replace('{param}', $input[1], $replacement);
}
return preg_replace_callback('/\['.$arr['tag'].'\](.+)\[\/'.$arr['tag'].'\]/is', array(&$this, 'recursiveReplaceSimple'), $input, 5);
}
protected function recursiveReplaceAdvanced($input) {
$arr = $this->currentBBCode;
if(is_array($input)) {
$replacement = $this->currentReplacement;
$option = $input[1];
$param = $input[2];
$input = str_replace('{param}', $param, $replacement);
$input = str_replace('{option}', $option, $input);
}
return preg_replace_callback('/\['.$arr['tag'].'\](.+)\[\/'.$arr['tag'].'\]/is', array(&$this, 'recursiveReplaceAdvanced'), $input, 5);
}
public function parseBBCode($text) {
foreach($this->BBCodes as $key => $arr)
{
if(!$arr['use_option']) {
/*
$this->currentReplacement = $arr['replacement'];
$this->currentBBcode = $arr;
// RECURSIVE REPLACEMENT
$text = preg_replace_callback('/\['.$arr['tag'].'\](.+)\[\/'.$arr['tag'].'\]/is', array(&$this, 'recursiveReplaceSimple'), $text, 5);
*/
//ORIGINAL REPLACEMENT:
$replacement = $this->prepareReplacement($arr['replacement'], 0);
$text = preg_replace('/\['.$arr['tag'].'\](.+)\[\/'.$arr['tag'].'\]/isU', $replacement, $text);
}
else {
/*
$this->currentReplacement = $arr['replacement'];
$this->currentBBCode = $arr;
// RECURSIVE REPLACEMENT
$text = preg_replace_callback('/\['.$arr['tag'].'=([^\]]+)\](.+)\[\/'.$arr['tag'].'\]/is', array(&$this, 'recursiveReplaceAdvanced'), $text, 5);
*/
//ORIGINAL REPLACEMENT:
$replacement = $this->prepareReplacement($arr['replacement'], 1);
$text = preg_replace('/\['.$arr['tag'].'=([^\]]+)\](.+)\[\/'.$arr['tag'].'\]/isU', $replacement, $text);
}
}
return $text;
}
public function parseSmilies($text) {
$config = Penelope_Config::getInstance();
$website_url = $config->getProp("website_url");
foreach($this->smilies as $key => $arr)
{
$text = str_replace($arr['search'], '<img src="'.$website_url.'/imgs/smilies/'.$arr['filepath'].'" alt="'.$arr['name'].'" />', $text);
}
return $text;
}
public function addDefaultBBCode($text, $font, $color, $size) {
$tmp = "";
if($font) {
$tmp .= "";
}
if($color) {
$tmp .= "";
}
if($size) {
$tmp .= "";
}
$tmp .= $text;
if($size) {
$tmp .= "";
}
if($color) {
$tmp .= "";
}
if($font) {
$tmp .= "";
}
return $tmp;
}
}
?>