PHP Code:
<?php
//**BEGIN USER CONFIG**
//Page to display by default (if no URL is supplied)
$default_url = "http://www.flamehtmlstudios.com";
//Tag to prepend page titles
$title_tag = "School Stuff PF --";
//Attempt to load media (images, movies, scripts, etc.) through the proxy (EXPERIMENTAL)
$proxify_media = true;
//**END USER CONFIG**
$start_time = microtime();
//Finds the nth position of a string within a string. (Stolen from http://us3.php.net/strings).
function strnpos($haystack, $needle, $occurance, $pos = 0) {
for ($i = 1; $i <= $occurance; $i++) {
$pos = strpos($haystack, $needle, $pos) + 1;
}
return $pos - 1;
}
//URL parser that works better than PHP's built-in function.
function parseURL($url)
{
//protocol(1), auth user(2), auth password(3), hostname(4), path(5), filename(6), file extension(7) and query(8)
$pattern = "/^(?:(http[s]?):\/\/(?:(.*):(.*)@)?([^\/]+))?((?:[\/])?(?:[^\.]*?)?(?:[\/])?)?(?:([^\/^\.]+)\.([^\?]+))?(?:\?(.+))?$/i";
preg_match($pattern, $url, $matches);
$URI_PARTS["scheme"] = $matches[1];
$URI_PARTS["host"] = $matches[4];
$URI_PARTS["path"] = $matches[5];
return $URI_PARTS;
}
//Turns any local URLs into fully qualified URLs
function completeURLs($HTML, $url)
{
$URI_PARTS = parseURL($url);
$path = trim($URI_PARTS["path"], "/");
$host_url = trim($URI_PARTS["host"], "/");
//$host = $URI_PARTS["scheme"]."://".trim($URI_PARTS["host"], "/")."/".$path; //ORIGINAL
$host = $URI_PARTS["scheme"]."://".$host_url."/".$path."/";
$host_no_path = $URI_PARTS["scheme"]."://".$host_url."/";
//Proxifies local META redirects
$HTML = preg_replace('@<META HTTP-EQUIV(.*)URL=/@', "<META HTTP-EQUIV\$1URL=".$_SERVER['PHP_SELF']."?url=".$host_no_path, $HTML);
//Make sure the host doesn't end in '//'
$host = rtrim($host, '/')."/";
//Replace '//' with 'http://'
$pattern = "#(?<=\"|'|=)\/\/#"; //the '|=' is experimental as it's probably not necessary
$HTML = preg_replace($pattern, "http://", $HTML);
//Fully qualifies '"/'
$HTML = preg_replace("#\"\/#", "\"".$host, $HTML);
//Fully qualifies "'/"
$HTML = preg_replace("#\'\/#", "\'".$host, $HTML);
//Matches [src|href|background|action]="/ because in the following pattern the '/' shouldn't stay
$HTML = preg_replace("#(src|href|background|action)(=\"|='|=(?!'|\"))\/#i", "\$1\$2".$host_no_path, $HTML);
$HTML = preg_replace("#(href|src|background|action)(=\"|=(?!'|\")|=')(?!http|ftp|https|\"|'|javascript:|mailto:)#i", "\$1\$2".$host, $HTML);
//Points all form actions back to the proxy
$HTML = preg_replace('/<form.+?action=\s*(["\']?)([^>\s"\']+)\\1[^>]*>/i', "<form action=\"{$_SERVER['PHP_SELF']}\"><input type=\"hidden\" name=\"original_url\" value=\"$2\">", $HTML);
//Matches '/[any assortment of chars or nums]/../'
$HTML = preg_replace("#\/(\w*?)\/\.\.\/(.*?)>#ims", "/\$2>", $HTML);
//Matches '/./'
$HTML = preg_replace("#\/\.\/(.*?)>#ims", "/\$1>", $HTML);
//Handles CSS2 imports
if (strpos($HTML, "import url(\"http") == false && (strpos($HTML, "import \"http") == false) && strpos($HTML, "import url(\"www") == false && (strpos($HTML, "import \"www") == false)) {
$pattern = "#import .(.*?).;#ims";
$mainurl = substr($host, 0, strnpos($host, "/", 3));
$replace = "import '".$mainurl."\$1';";
$HTML = preg_replace($pattern, $replace, $HTML);
}
return $HTML;
}
//Redirects link targets through this proxy
function proxyURLs($HTML)
{
$edited_tag = "PF"; //used to check if the link has already been modified by the proxy
//BASE tag needs to be removed for sites like yahoo.com
//OR make the proxy insert the FULL URL to itself
$pattern = "#\<base(.*?)\>#ims";
$replacement = "<!-- <base\$1> -->"; //comment it out for now//
$HTML = preg_replace($pattern, $replacement, $HTML);
//edit <link tags so that 'edited="$edit_tag" ' is just before 'href'
$HTML = preg_replace("#\<link(.*?)(\shref=)#ims", "<link\$1 edited=\"".$edited_tag."\"\$2", $HTML);
//matches everything with an </a> after it on the same line....fails to match when that is on another line.
$pattern = "#(?<!edited=\"".$edited_tag."\"\s)(href='|href=\"|href=(?!'|\"))(?=(.+)\</a\>)(?!mailto:|http://ftp|ftp|javascript:|'|\")#ims";
$HTML = preg_replace($pattern, "edited=\"".$edited_tag."\" \$1".$_SERVER['PHP_SELF'].'?url=', $HTML);
return $HTML;
}
//Calculates the differences in microtime captures
function microtime_diff($a, $b)
{
list($a_dec, $a_sec) = explode(" ", $a);
list($b_dec, $b_sec) = explode(" ", $b);
return $b_sec - $a_sec + $b_dec - $a_dec;
}
//Retrieves a file from the web.
function getFile($fileLoc)
{
$ch = curl_init($fileLoc);
curl_setopt($ch, CURLOPT_HEADER, false);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt ($ch, CURLOPT_FAILONERROR, true);
$file = curl_exec($ch);
curl_close($ch);
return $file;
}
//Get ready to display data...
$url = $_GET['url'];
if(empty($url)) $url = $default_url;
//Check the URL for protocol, etc....
if(substr($url, 0, 7) != "http://") //didn't start with 'http://'...we have a problem.
{
$url = "http://".$url;
}
//Checks if there was a form redirected to this proxy.
if(!empty($_POST['original_url']))
{
$form_submission = true;
}
else if(!empty($_GET['original_url']))
{
//have to strip off any unwanted stuff from original_url
$url = explode(" ", $_GET['original_url']);
$url = $url[0];
$form_submission = false;
$url = urldecode($url)."?".str_replace("original_url=".urlencode($_GET['original_url'])."&", "", $_SERVER['QUERY_STRING']);
}
if(!$form_submission) //OK, no redirected form so go ahead and fetch a page.
{
$HTML = getFile($url);
if (!strstr($html,"html")) {
$HTML = preg_replace("#\<(title|TITLE)\>#", "<\$1>".$title_tag, $HTML, 1);
$HTML = completeURLs($HTML, $url); //Complete local links so that they are fully qualified URIs
$HTML = proxyURLs($HTML); //Complete links so that they pass through this proxy
//Point all media back to proxy--EXPERIMENTAL!
if ($proxify_media) {
$pattern = '/src=\s*(["\']?)([^>\s"\']+)\\1[^>]*>/i';
$replace = "src=\"{$_SERVER['PHP_SELF']}?url=$2\">";
$HTML = preg_replace($pattern, $replace, $HTML);
}
}
print_r($HTML); //Output the page using print_r so that frames at least partially are written
flush();
//Calculate time and add HTML comment with that info
$duration = microtime_diff($start_time, microtime());
$duration = sprintf("%0.3f", $duration);
echo ("\n<!-- PageForward v1.5b2 took $duration seconds to construct this page.-->");
}
?>
Bookmarks