Snoopy.class.inc

  1: <?php
  2: 
  3: /*************************************************
  4: 
  5: Snoopy - the PHP net client
  6: Author: Monte Ohrt <monte@ispi.net>
  7: Copyright (c): 1999-2000 ispi, all rights reserved
  8: Version: 1.0
  9: 
 10:  * This library is free software; you can redistribute it and/or
 11:  * modify it under the terms of the GNU Lesser General Public
 12:  * License as published by the Free Software Foundation; either
 13:  * version 2.1 of the License, or (at your option) any later version.
 14:  *
 15:  * This library is distributed in the hope that it will be useful,
 16:  * but WITHOUT ANY WARRANTY; without even the implied warranty of
 17:  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 18:  * Lesser General Public License for more details.
 19:  *
 20:  * You should have received a copy of the GNU Lesser General Public
 21:  * License along with this library; if not, write to the Free Software
 22:  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 23: 
 24: You may contact the author of Snoopy by e-mail at:
 25: monte@ispi.net
 26: 
 27: Or, write to:
 28: Monte Ohrt
 29: CTO, ispi
 30: 237 S. 70th suite 220
 31: Lincoln, NE 68510
 32: 
 33: The latest version of Snoopy can be obtained from:
 34: http://snoopy.sourceforge.com
 35: 
 36: *************************************************/
 37: 
 38: class Snoopy
 39: {
 40: 	/**** Public variables ****/
 41: 
 42: 	/* user definable vars */
 43: 
 44: 	var $host			=	"www.php.net";		// host name we are connecting to
 45: 	var $port			=	80;					// port we are connecting to
 46: 	var $proxy_host		=	"";					// proxy host to use
 47: 	var $proxy_port		=	"";					// proxy port to use
 48: 	var $agent			=	"Snoopy v1.0";		// agent we masquerade as
 49: 	var	$referer		=	"";					// referer info to pass
 50: 	var $cookies		=	array();			// array of cookies to pass
 51: 												// $cookies["username"]="joe";
 52: 	var	$rawheaders		=	array();			// array of raw headers to send
 53: 												// $rawheaders["Content-type"]="text/html";
 54: 
 55: 	var $maxredirs		=	5;					// http redirection depth maximum. 0 = disallow
 56: 	var $lastredirectaddr	=	"";				// contains address of last redirected address
 57: 	var	$offsiteok		=	true;				// allows redirection off-site
 58: 	var $maxframes		=	0;					// frame content depth maximum. 0 = disallow
 59: 	var $expandlinks	=	true;				// expand links to fully qualified URLs.
 60: 												// this only applies to fetchlinks()
 61: 												// or submitlinks()
 62: 	var $passcookies	=	true;				// pass set cookies back through redirects
 63: 												// NOTE: this currently does not respect
 64: 												// dates, domains or paths.
 65: 
 66: 	var	$user			=	"";					// user for http authentication
 67: 	var	$pass			=	"";					// password for http authentication
 68: 
 69: 	// http accept types
 70: 	var $accept			=	"image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, */*";
 71: 
 72: 	var $results		=	"";					// where the content is put
 73: 
 74: 	var $error			=	"";					// error messages sent here
 75: 	var	$response_code	=	"";					// response code returned from server
 76: 	var	$headers		=	array();			// headers returned from server sent here
 77: 	var	$maxlength		=	500000;				// max return data length (body)
 78: 	var $read_timeout	=	0;					// timeout on read operations, in seconds
 79: 												// supported only since PHP 4 Beta 4
 80: 												// set to 0 to disallow timeouts
 81: 	var $timed_out		=	false;				// if a read operation timed out
 82: 	var	$status			=	0;					// http request status
 83: 
 84: 	var	$curl_path		=	"/usr/bin/curl";
 85: 												// Snoopy will use cURL for fetching
 86: 												// SSL content if a full system path to
 87: 												// the cURL binary is supplied here.
 88: 												// set to false if you do not have
 89: 												// cURL installed. See http://curl.haxx.se
 90: 												// for details on installing cURL.
 91: 												// Snoopy does *not* use the cURL
 92: 												// library functions built into php,
 93: 												// as these functions are not stable
 94: 												// as of this Snoopy release.
 95: 
 96: 	// send Accept-encoding: gzip?
 97: 	var $use_gzip		= true;
 98: 
 99: 	/**** Private variables ****/
100: 
101: 	var	$_maxlinelen	=	4096;				// max line length (headers)
102: 
103: 	var $_httpmethod	=	"GET";				// default http request method
104: 	var $_httpversion	=	"HTTP/1.0";			// default http request version
105: 	var $_submit_method	=	"POST";				// default submit method
106: 	var $_submit_type	=	"application/x-www-form-urlencoded";	// default submit type
107: 	var $_mime_boundary	=   "";					// MIME boundary for multipart/form-data submit type
108: 	var $_redirectaddr	=	false;				// will be set if page fetched is a redirect
109: 	var $_redirectdepth	=	0;					// increments on an http redirect
110: 	var $_frameurls		= 	array();			// frame src urls
111: 	var $_framedepth	=	0;					// increments on frame depth
112: 
113: 	var $_isproxy		=	false;				// set if using a proxy server
114: 	var $_fp_timeout	=	30;					// timeout for socket connection
115: 
116: /*======================================================================*\
117: 	Function:	fetch
118: 	Purpose:	fetch the contents of a web page
119: 				(and possibly other protocols in the
120: 				future like ftp, nntp, gopher, etc.)
121: 	Input:		$URI	the location of the page to fetch
122: 	Output:		$this->results	the output text from the fetch
123: \*======================================================================*/
124: 
125: 	function fetch($URI)
126: 	{
127: 
128: 		//preg_match("|^([^:]+)://([^:/]+)(:[\d]+)*(.*)|",$URI,$URI_PARTS);
129: 		$URI_PARTS = parse_url($URI);
130: 		if (!empty($URI_PARTS["user"]))
131: 			$this->user = $URI_PARTS["user"];
132: 		if (!empty($URI_PARTS["pass"]))
133: 			$this->pass = $URI_PARTS["pass"];
134: 
135: 		switch($URI_PARTS["scheme"])
136: 		{
137: 			case "http":
138: 				$this->host = $URI_PARTS["host"];
139: 				if(!empty($URI_PARTS["port"]))
140: 					$this->port = $URI_PARTS["port"];
141: 				if($this->_connect($fp))
142: 				{
143: 					if($this->_isproxy)
144: 					{
145: 						// using proxy, send entire URI
146: 						$this->_httprequest($URI,$fp,$URI,$this->_httpmethod);
147: 					}
148: 					else
149: 					{
150: 						$path = $URI_PARTS["path"].(isset($URI_PARTS["query"]) ? "?".$URI_PARTS["query"] : "");
151: 						// no proxy, send only the path
152: 						$this->_httprequest($path, $fp, $URI, $this->_httpmethod);
153: 					}
154: 
155: 					$this->_disconnect($fp);
156: 
157: 					if($this->_redirectaddr)
158: 					{
159: 						/* url was redirected, check if we've hit the max depth */
160: 						if($this->maxredirs > $this->_redirectdepth)
161: 						{
162: 							// only follow redirect if it's on this site, or offsiteok is true
163: 							if(preg_match("|^http://".preg_quote($this->host)."|i",$this->_redirectaddr) || $this->offsiteok)
164: 							{
165: 								/* follow the redirect */
166: 								$this->_redirectdepth++;
167: 								$this->lastredirectaddr=$this->_redirectaddr;
168: 								$this->fetch($this->_redirectaddr);
169: 							}
170: 						}
171: 					}
172: 
173: 					if($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0)
174: 					{
175: 						$frameurls = $this->_frameurls;
176: 						$this->_frameurls = array();
177: 
178: 						while(list(,$frameurl) = each($frameurls))
179: 						{
180: 							if($this->_framedepth < $this->maxframes)
181: 							{
182: 								$this->fetch($frameurl);
183: 								$this->_framedepth++;
184: 							}
185: 							else
186: 								break;
187: 						}
188: 					}
189: 				}
190: 				else
191: 				{
192: 					return false;
193: 				}
194: 				return true;
195: 				break;
196: 			case "https":
197: 				if(!$this->curl_path || (!is_executable($this->curl_path))) {
198: 					$this->error = "Bad curl ($this->curl_path), can't fetch HTTPS \n";
199: 					return false;
200: 				}
201: 				$this->host = $URI_PARTS["host"];
202: 				if(!empty($URI_PARTS["port"]))
203: 					$this->port = $URI_PARTS["port"];
204: 				if($this->_isproxy)
205: 				{
206: 					// using proxy, send entire URI
207: 					$this->_httpsrequest($URI,$URI,$this->_httpmethod);
208: 				}
209: 				else
210: 				{
211: 					$path = $URI_PARTS["path"].($URI_PARTS["query"] ? "?".$URI_PARTS["query"] : "");
212: 					// no proxy, send only the path
213: 					$this->_httpsrequest($path, $URI, $this->_httpmethod);
214: 				}
215: 
216: 				if($this->_redirectaddr)
217: 				{
218: 					/* url was redirected, check if we've hit the max depth */
219: 					if($this->maxredirs > $this->_redirectdepth)
220: 					{
221: 						// only follow redirect if it's on this site, or offsiteok is true
222: 						if(preg_match("|^http://".preg_quote($this->host)."|i",$this->_redirectaddr) || $this->offsiteok)
223: 						{
224: 							/* follow the redirect */
225: 							$this->_redirectdepth++;
226: 							$this->lastredirectaddr=$this->_redirectaddr;
227: 							$this->fetch($this->_redirectaddr);
228: 						}
229: 					}
230: 				}
231: 
232: 				if($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0)
233: 				{
234: 					$frameurls = $this->_frameurls;
235: 					$this->_frameurls = array();
236: 
237: 					while(list(,$frameurl) = each($frameurls))
238: 					{
239: 						if($this->_framedepth < $this->maxframes)
240: 						{
241: 							$this->fetch($frameurl);
242: 							$this->_framedepth++;
243: 						}
244: 						else
245: 							break;
246: 					}
247: 				}
248: 				return true;
249: 				break;
250: 			default:
251: 				// not a valid protocol
252: 				$this->error	=	'Invalid protocol "'.$URI_PARTS["scheme"].'"\n';
253: 				return false;
254: 				break;
255: 		}
256: 		return true;
257: 	}
258: 
259: 
260: 
261: /*======================================================================*\
262: 	Private functions
263: \*======================================================================*/
264: 
265: 
266: /*======================================================================*\
267: 	Function:	_striplinks
268: 	Purpose:	strip the hyperlinks from an html document
269: 	Input:		$document	document to strip.
270: 	Output:		$match		an array of the links
271: \*======================================================================*/
272: 
273: 	function _striplinks($document)
274: 	{
275: 		preg_match_all("'<\s*a\s+.*href\s*=\s*			# find <a href=
276: 						([\"\'])?					# find single or double quote
277: 						(?(1) (.*?)\\1 | ([^\s\>]+))		# if quote found, match up to next matching
278: 													# quote, otherwise match up to next space
279: 						'isx",$document,$links);
280: 
281: 
282: 		// catenate the non-empty matches from the conditional subpattern
283: 
284: 		while(list($key,$val) = each($links[2]))
285: 		{
286: 			if(!empty($val))
287: 				$match[] = $val;
288: 		}
289: 
290: 		while(list($key,$val) = each($links[3]))
291: 		{
292: 			if(!empty($val))
293: 				$match[] = $val;
294: 		}
295: 
296: 		// return the links
297: 		return $match;
298: 	}
299: 
300: /*======================================================================*\
301: 	Function:	_stripform
302: 	Purpose:	strip the form elements from an html document
303: 	Input:		$document	document to strip.
304: 	Output:		$match		an array of the links
305: \*======================================================================*/
306: 
307: 	function _stripform($document)
308: 	{
309: 		preg_match_all("'<\/?(FORM|INPUT|SELECT|TEXTAREA|(OPTION))[^<>]*>(?(2)(.*(?=<\/?(option|select)[^<>]*>[\r\n]*)|(?=[\r\n]*))|(?=[\r\n]*))'Usi",$document,$elements);
310: 
311: 		// catenate the matches
312: 		$match = implode("\r\n",$elements[0]);
313: 
314: 		// return the links
315: 		return $match;
316: 	}
317: 
318: 
319: 
320: /*======================================================================*\
321: 	Function:	_striptext
322: 	Purpose:	strip the text from an html document
323: 	Input:		$document	document to strip.
324: 	Output:		$text		the resulting text
325: \*======================================================================*/
326: 
327: 	function _striptext($document)
328: 	{
329: 
330: 		// I didn't use preg eval (//e) since that is only available in PHP 4.0.
331: 		// so, list your entities one by one here. I included some of the
332: 		// more common ones.
333: 
334: 		$search = array("'<script[^>]*?>.*?</script>'si",	// strip out javascript
335: 						"'<[\/\!]*?[^<>]*?>'si",			// strip out html tags
336: 						"'([\r\n])[\s]+'",					// strip out white space
337: 						"'&(quote|#34);'i",					// replace html entities
338: 						"'&(amp|#38);'i",
339: 						"'&(lt|#60);'i",
340: 						"'&(gt|#62);'i",
341: 						"'&(nbsp|#160);'i",
342: 						"'&(iexcl|#161);'i",
343: 						"'&(cent|#162);'i",
344: 						"'&(pound|#163);'i",
345: 						"'&(copy|#169);'i"
346: 						);
347: 		$replace = array(	"",
348: 							"",
349: 							"\\1",
350: 							"\"",
351: 							"&",
352: 							"<",
353: 							">",
354: 							" ",
355: 							chr(161),
356: 							chr(162),
357: 							chr(163),
358: 							chr(169));
359: 
360: 		$text = preg_replace($search,$replace,$document);
361: 
362: 		return $text;
363: 	}
364: 
365: /*======================================================================*\
366: 	Function:	_expandlinks
367: 	Purpose:	expand each link into a fully qualified URL
368: 	Input:		$links			the links to qualify
369: 				$URI			the full URI to get the base from
370: 	Output:		$expandedLinks	the expanded links
371: \*======================================================================*/
372: 
373: 	function _expandlinks($links,$URI)
374: 	{
375: 
376: 		preg_match("/^[^\?]+/",$URI,$match);
377: 
378: 		$match = preg_replace("|/[^\/\.]+\.[^\/\.]+$|","",$match[0]);
379: 
380: 		$search = array( 	"|^http://".preg_quote($this->host)."|i",
381: 							"|^(?!http://)(\/)?(?!mailto:)|i",
382: 							"|/\./|",
383: 							"|/[^\/]+/\.\./|"
384: 						);
385: 
386: 		$replace = array(	"",
387: 							$match."/",
388: 							"/",
389: 							"/"
390: 						);
391: 
392: 		$expandedLinks = preg_replace($search,$replace,$links);
393: 
394: 		return $expandedLinks;
395: 	}
396: 
397: /*======================================================================*\
398: 	Function:	_httprequest
399: 	Purpose:	go get the http data from the server
400: 	Input:		$url		the url to fetch
401: 				$fp			the current open file pointer
402: 				$URI		the full URI
403: 				$body		body contents to send if any (POST)
404: 	Output:
405: \*======================================================================*/
406: 
407: 	function _httprequest($url,$fp,$URI,$http_method,$content_type="",$body="")
408: 	{
409: 		if($this->passcookies && $this->_redirectaddr)
410: 			$this->setcookies();
411: 
412: 		$URI_PARTS = parse_url($URI);
413: 		if(empty($url))
414: 			$url = "/";
415: 		$headers = $http_method." ".$url." ".$this->_httpversion."\r\n";
416: 		if(!empty($this->agent))
417: 			$headers .= "User-Agent: ".$this->agent."\r\n";
418: 		if(!empty($this->host) && !isset($this->rawheaders['Host']))
419: 			$headers .= "Host: ".$this->host."\r\n";
420: 		if(!empty($this->accept))
421: 			$headers .= "Accept: ".$this->accept."\r\n";
422: 
423: 		if($this->use_gzip) {
424: 			// make sure PHP was built with --with-zlib
425: 			// and we can handle gzipp'ed data
426: 			if ( function_exists(gzinflate) ) {
427: 			   $headers .= "Accept-encoding: gzip\r\n";
428: 			}
429: 			else {
430: 			   trigger_error(
431: 			   	"use_gzip is on, but PHP was built without zlib support.".
432: 				"  Requesting file(s) without gzip encoding.",
433: 				E_USER_NOTICE);
434: 			}
435: 		}
436: 
437: 		if(!empty($this->referer))
438: 			$headers .= "Referer: ".$this->referer."\r\n";
439: 		if(!empty($this->cookies))
440: 		{
441: 			if(!is_array($this->cookies))
442: 				$this->cookies = (array)$this->cookies;
443: 
444: 			reset($this->cookies);
445: 			if ( count($this->cookies) > 0 ) {
446: 				$cookie_headers .= 'Cookie: ';
447: 				foreach ( $this->cookies as $cookieKey => $cookieVal ) {
448: 				$cookie_headers .= $cookieKey."=".urlencode($cookieVal)."; ";
449: 				}
450: 				$headers .= substr($cookie_headers,0,-2) . "\r\n";
451: 			}
452: 		}
453: 		if(!empty($this->rawheaders))
454: 		{
455: 			if(!is_array($this->rawheaders))
456: 				$this->rawheaders = (array)$this->rawheaders;
457: 			while(list($headerKey,$headerVal) = each($this->rawheaders))
458: 				$headers .= $headerKey.": ".$headerVal."\r\n";
459: 		}
460: 		if(!empty($content_type)) {
461: 			$headers .= "Content-type: $content_type";
462: 			if ($content_type == "multipart/form-data")
463: 				$headers .= "; boundary=".$this->_mime_boundary;
464: 			$headers .= "\r\n";
465: 		}
466: 		if(!empty($body))
467: 			$headers .= "Content-length: ".strlen($body)."\r\n";
468: 		if(!empty($this->user) || !empty($this->pass))
469: 			$headers .= "Authorization: BASIC ".base64_encode($this->user.":".$this->pass)."\r\n";
470: 
471: 		$headers .= "\r\n";
472: 
473: 		// set the read timeout if needed
474: 		if ($this->read_timeout > 0)
475: 			socket_set_timeout($fp, $this->read_timeout);
476: 		$this->timed_out = false;
477: 
478: 		fwrite($fp,$headers.$body,strlen($headers.$body));
479: 
480: 		$this->_redirectaddr = false;
481: 		unset($this->headers);
482: 
483: 		// content was returned gzip encoded?
484: 		$is_gzipped = false;
485: 
486: 		while($currentHeader = fgets($fp,$this->_maxlinelen))
487: 		{
488: 			if ($this->read_timeout > 0 && $this->_check_timeout($fp))
489: 			{
490: 				$this->status=-100;
491: 				return false;
492: 			}
493: 
494: 		//	if($currentHeader == "\r\n")
495: 			if(preg_match("/^\r?\n$/", $currentHeader) )
496: 			      break;
497: 
498: 			// if a header begins with Location: or URI:, set the redirect
499: 			if(preg_match("/^(Location:|URI:)/i",$currentHeader))
500: 			{
501: 				// get URL portion of the redirect
502: 				preg_match("/^(Location:|URI:)\s+(.*)/",chop($currentHeader),$matches);
503: 				// look for :// in the Location header to see if hostname is included
504: 				if(!preg_match("|\:\/\/|",$matches[2]))
505: 				{
506: 					// no host in the path, so prepend
507: 					$this->_redirectaddr = $URI_PARTS["scheme"]."://".$this->host.":".$this->port;
508: 					// eliminate double slash
509: 					if(!preg_match("|^/|",$matches[2]))
510: 							$this->_redirectaddr .= "/".$matches[2];
511: 					else
512: 							$this->_redirectaddr .= $matches[2];
513: 				}
514: 				else
515: 					$this->_redirectaddr = $matches[2];
516: 			}
517: 
518: 			if(preg_match("|^HTTP/|",$currentHeader))
519: 			{
520:                 if(preg_match("|^HTTP/[^\s]*\s(.*?)\s|",$currentHeader, $status))
521: 				{
522: 					$this->status= $status[1];
523:                 }
524: 				$this->response_code = $currentHeader;
525: 			}
526: 
527: 			if (preg_match("/Content-Encoding: gzip/", $currentHeader) ) {
528: 				$is_gzipped = true;
529: 			}
530: 
531: 			$this->headers[] = $currentHeader;
532: 		}
533: 
534: 		# $results = fread($fp, $this->maxlength);
535: 		$results = "";
536: 		while ( $data = fread($fp, $this->maxlength) ) {
537: 		    $results .= $data;
538: 		    if (
539: 		        strlen($results) > $this->maxlength ) {
540: 		        break;
541: 		    }
542: 		}
543: 
544: 		// gunzip
545: 		if ( $is_gzipped ) {
546: 			// per http://www.php.net/manual/en/function.gzencode.php
547: 			$results = substr($results, 10);
548: 			$results = gzinflate($results);
549: 		}
550: 
551: 		if ($this->read_timeout > 0 && $this->_check_timeout($fp))
552: 		{
553: 			$this->status=-100;
554: 			return false;
555: 		}
556: 
557: 		// check if there is a a redirect meta tag
558: 
559: 		if(preg_match("'<meta[\s]*http-equiv[^>]*?content[\s]*=[\s]*[\"\']?\d+;[\s]+URL[\s]*=[\s]*([^\"\']*?)[\"\']?>'i",$results,$match))
560: 		{
561: 			$this->_redirectaddr = $this->_expandlinks($match[1],$URI);
562: 		}
563: 
564: 		// have we hit our frame depth and is there frame src to fetch?
565: 		if(($this->_framedepth < $this->maxframes) && preg_match_all("'<frame\s+.*src[\s]*=[\'\"]?([^\'\"\>]+)'i",$results,$match))
566: 		{
567: 			$this->results[] = $results;
568: 			for($x=0; $x<count($match[1]); $x++)
569: 				$this->_frameurls[] = $this->_expandlinks($match[1][$x],$URI_PARTS["scheme"]."://".$this->host);
570: 		}
571: 		// have we already fetched framed content?
572: 		elseif(is_array($this->results))
573: 			$this->results[] = $results;
574: 		// no framed content
575: 		else
576: 			$this->results = $results;
577: 
578: 		return true;
579: 	}
580: 
581: /*======================================================================*\
582: 	Function:	_httpsrequest
583: 	Purpose:	go get the https data from the server using curl
584: 	Input:		$url		the url to fetch
585: 				$URI		the full URI
586: 				$body		body contents to send if any (POST)
587: 	Output:
588: \*======================================================================*/
589: 
590: 	function _httpsrequest($url,$URI,$http_method,$content_type="",$body="")
591: 	{
592: 		if($this->passcookies && $this->_redirectaddr)
593: 			$this->setcookies();
594: 
595: 		$headers = array();
596: 
597: 		$URI_PARTS = parse_url($URI);
598: 		if(empty($url))
599: 			$url = "/";
600: 		// GET ... header not needed for curl
601: 		//$headers[] = $http_method." ".$url." ".$this->_httpversion;
602: 		if(!empty($this->agent))
603: 			$headers[] = "User-Agent: ".$this->agent;
604: 		if(!empty($this->host))
605: 			$headers[] = "Host: ".$this->host;
606: 		if(!empty($this->accept))
607: 			$headers[] = "Accept: ".$this->accept;
608: 		if(!empty($this->referer))
609: 			$headers[] = "Referer: ".$this->referer;
610: 		if(!empty($this->cookies))
611: 		{
612: 			if(!is_array($this->cookies))
613: 				$this->cookies = (array)$this->cookies;
614: 
615: 			reset($this->cookies);
616: 			if ( count($this->cookies) > 0 ) {
617: 				$cookie_str = 'Cookie: ';
618: 				foreach ( $this->cookies as $cookieKey => $cookieVal ) {
619: 				$cookie_str .= $cookieKey."=".urlencode($cookieVal)."; ";
620: 				}
621: 				$headers[] = substr($cookie_str,0,-2);
622: 			}
623: 		}
624: 		if(!empty($this->rawheaders))
625: 		{
626: 			if(!is_array($this->rawheaders))
627: 				$this->rawheaders = (array)$this->rawheaders;
628: 			while(list($headerKey,$headerVal) = each($this->rawheaders))
629: 				$headers[] = $headerKey.": ".$headerVal;
630: 		}
631: 		if(!empty($content_type)) {
632: 			if ($content_type == "multipart/form-data")
633: 				$headers[] = "Content-type: $content_type; boundary=".$this->_mime_boundary;
634: 			else
635: 				$headers[] = "Content-type: $content_type";
636: 		}
637: 		if(!empty($body))
638: 			$headers[] = "Content-length: ".strlen($body);
639: 		if(!empty($this->user) || !empty($this->pass))
640: 			$headers[] = "Authorization: BASIC ".base64_encode($this->user.":".$this->pass);
641: 
642: 		for($curr_header = 0; $curr_header < count($headers); $curr_header++) {
643: 			$cmdline_params .= " -H \"".$headers[$curr_header]."\"";
644: 		}
645: 
646: 		if(!empty($body))
647: 			$cmdline_params .= " -d \"$body\"";
648: 
649: 		if($this->read_timeout > 0)
650: 			$cmdline_params .= " -m ".$this->read_timeout;
651: 
652: 		$headerfile = uniqid(time());
653: 
654: 		# accept self-signed certs
655: 		$cmdline_params .= " -k";
656: 		exec($this->curl_path." -D \"/tmp/$headerfile\"".escapeshellcmd($cmdline_params)." ".escapeshellcmd($URI),$results,$return);
657: 
658: 		if($return)
659: 		{
660: 			$this->error = "Error: cURL could not retrieve the document, error $return.";
661: 			return false;
662: 		}
663: 
664: 
665: 		$results = implode("\r\n",$results);
666: 
667: 		$result_headers = file("/tmp/$headerfile");
668: 
669: 		$this->_redirectaddr = false;
670: 		unset($this->headers);
671: 
672: 		for($currentHeader = 0; $currentHeader < count($result_headers); $currentHeader++)
673: 		{
674: 
675: 			// if a header begins with Location: or URI:, set the redirect
676: 			if(preg_match("/^(Location: |URI: )/i",$result_headers[$currentHeader]))
677: 			{
678: 				// get URL portion of the redirect
679: 				preg_match("/^(Location: |URI:)(.*)/",chop($result_headers[$currentHeader]),$matches);
680: 				// look for :// in the Location header to see if hostname is included
681: 				if(!preg_match("|\:\/\/|",$matches[2]))
682: 				{
683: 					// no host in the path, so prepend
684: 					$this->_redirectaddr = $URI_PARTS["scheme"]."://".$this->host.":".$this->port;
685: 					// eliminate double slash
686: 					if(!preg_match("|^/|",$matches[2]))
687: 							$this->_redirectaddr .= "/".$matches[2];
688: 					else
689: 							$this->_redirectaddr .= $matches[2];
690: 				}
691: 				else
692: 					$this->_redirectaddr = $matches[2];
693: 			}
694: 
695: 			if(preg_match("|^HTTP/|",$result_headers[$currentHeader]))
696: 			{
697: 			    $this->response_code = $result_headers[$currentHeader];
698: 			    if(preg_match("|^HTTP/[^\s]*\s(.*?)\s|",$this->response_code, $match))
699: 			    {
700: 				$this->status= $match[1];
701:                 	    }
702: 			}
703: 			$this->headers[] = $result_headers[$currentHeader];
704: 		}
705: 
706: 		// check if there is a a redirect meta tag
707: 
708: 		if(preg_match("'<meta[\s]*http-equiv[^>]*?content[\s]*=[\s]*[\"\']?\d+;[\s]+URL[\s]*=[\s]*([^\"\']*?)[\"\']?>'i",$results,$match))
709: 		{
710: 			$this->_redirectaddr = $this->_expandlinks($match[1],$URI);
711: 		}
712: 
713: 		// have we hit our frame depth and is there frame src to fetch?
714: 		if(($this->_framedepth < $this->maxframes) && preg_match_all("'<frame\s+.*src[\s]*=[\'\"]?([^\'\"\>]+)'i",$results,$match))
715: 		{
716: 			$this->results[] = $results;
717: 			for($x=0; $x<count($match[1]); $x++)
718: 				$this->_frameurls[] = $this->_expandlinks($match[1][$x],$URI_PARTS["scheme"]."://".$this->host);
719: 		}
720: 		// have we already fetched framed content?
721: 		elseif(is_array($this->results))
722: 			$this->results[] = $results;
723: 		// no framed content
724: 		else
725: 			$this->results = $results;
726: 
727: 		unlink("/tmp/$headerfile");
728: 
729: 		return true;
730: 	}
731: 
732: /*======================================================================*\
733: 	Function:	setcookies()
734: 	Purpose:	set cookies for a redirection
735: \*======================================================================*/
736: 
737: 	function setcookies()
738: 	{
739: 		for($x=0; $x<count($this->headers); $x++)
740: 		{
741: 		if(preg_match("/^set-cookie:[\s]+([^=]+)=([^;]+)/i", $this->headers[$x],$match))
742: 			$this->cookies[$match[1]] = $match[2];
743: 		}
744: 	}
745: 
746: 
747: /*======================================================================*\
748: 	Function:	_check_timeout
749: 	Purpose:	checks whether timeout has occurred
750: 	Input:		$fp	file pointer
751: \*======================================================================*/
752: 
753: 	function _check_timeout($fp)
754: 	{
755: 		if ($this->read_timeout > 0) {
756: 			$fp_status = socket_get_status($fp);
757: 			if ($fp_status["timed_out"]) {
758: 				$this->timed_out = true;
759: 				return true;
760: 			}
761: 		}
762: 		return false;
763: 	}
764: 
765: /*======================================================================*\
766: 	Function:	_connect
767: 	Purpose:	make a socket connection
768: 	Input:		$fp	file pointer
769: \*======================================================================*/
770: 
771: 	function _connect(&$fp)
772: 	{
773: 		if(!empty($this->proxy_host) && !empty($this->proxy_port))
774: 			{
775: 				$this->_isproxy = true;
776: 				$host = $this->proxy_host;
777: 				$port = $this->proxy_port;
778: 			}
779: 		else
780: 		{
781: 			$host = $this->host;
782: 			$port = $this->port;
783: 		}
784: 
785: 		$this->status = 0;
786: 
787: 		if($fp = fsockopen(
788: 					$host,
789: 					$port,
790: 					$errno,
791: 					$errstr,
792: 					$this->_fp_timeout
793: 					))
794: 		{
795: 			// socket connection succeeded
796: 
797: 			return true;
798: 		}
799: 		else
800: 		{
801: 			// socket connection failed
802: 			$this->status = $errno;
803: 			switch($errno)
804: 			{
805: 				case -3:
806: 					$this->error="socket creation failed (-3)";
807: 				case -4:
808: 					$this->error="dns lookup failure (-4)";
809: 				case -5:
810: 					$this->error="connection refused or timed out (-5)";
811: 				default:
812: 					$this->error="connection failed (".$errno.")";
813: 			}
814: 			return false;
815: 		}
816: 	}
817: /*======================================================================*\
818: 	Function:	_disconnect
819: 	Purpose:	disconnect a socket connection
820: 	Input:		$fp	file pointer
821: \*======================================================================*/
822: 
823: 	function _disconnect($fp)
824: 	{
825: 		return(fclose($fp));
826: 	}
827: 
828: 
829: /*======================================================================*\
830: 	Function:	_prepare_post_body
831: 	Purpose:	Prepare post body according to encoding type
832: 	Input:		$formvars  - form variables
833: 				$formfiles - form upload files
834: 	Output:		post body
835: \*======================================================================*/
836: 
837: 	function _prepare_post_body($formvars, $formfiles)
838: 	{
839: 		settype($formvars, "array");
840: 		settype($formfiles, "array");
841: 
842: 		if (count($formvars) == 0 && count($formfiles) == 0)
843: 			return;
844: 
845: 		switch ($this->_submit_type) {
846: 			case "application/x-www-form-urlencoded":
847: 				reset($formvars);
848: 				while(list($key,$val) = each($formvars)) {
849: 					if (is_array($val) || is_object($val)) {
850: 						while (list($cur_key, $cur_val) = each($val)) {
851: 							$postdata .= urlencode($key)."[]=".urlencode($cur_val)."&";
852: 						}
853: 					} else
854: 						$postdata .= urlencode($key)."=".urlencode($val)."&";
855: 				}
856: 				break;
857: 
858: 			case "multipart/form-data":
859: 				$this->_mime_boundary = "Snoopy".md5(uniqid(microtime()));
860: 
861: 				reset($formvars);
862: 				while(list($key,$val) = each($formvars)) {
863: 					if (is_array($val) || is_object($val)) {
864: 						while (list($cur_key, $cur_val) = each($val)) {
865: 							$postdata .= "--".$this->_mime_boundary."\r\n";
866: 							$postdata .= "Content-Disposition: form-data; name=\"$key\[\]\"\r\n\r\n";
867: 							$postdata .= "$cur_val\r\n";
868: 						}
869: 					} else {
870: 						$postdata .= "--".$this->_mime_boundary."\r\n";
871: 						$postdata .= "Content-Disposition: form-data; name=\"$key\"\r\n\r\n";
872: 						$postdata .= "$val\r\n";
873: 					}
874: 				}
875: 
876: 				reset($formfiles);
877: 				while (list($field_name, $file_names) = each($formfiles)) {
878: 					settype($file_names, "array");
879: 					while (list(, $file_name) = each($file_names)) {
880: 						if (!is_readable($file_name)) continue;
881: 
882: 						$fp = fopen($file_name, "r");
883: 						$file_content = fread($fp, filesize($file_name));
884: 						fclose($fp);
885: 						$base_name = basename($file_name);
886: 
887: 						$postdata .= "--".$this->_mime_boundary."\r\n";
888: 						$postdata .= "Content-Disposition: form-data; name=\"$field_name\"; filename=\"$base_name\"\r\n\r\n";
889: 						$postdata .= "$file_content\r\n";
890: 					}
891: 				}
892: 				$postdata .= "--".$this->_mime_boundary."--\r\n";
893: 				break;
894: 		}
895: 
896: 		return $postdata;
897: 	}
898: }
899: 
900: ?>