We frequently need to grab data from remote site and there are several processes to do that. I liked process where we can garb using XML Path(xPath). XPath is used to navigate through elements and attributes in an XML document. XPath return a node result set to the calling method or application. A node is a complete element within an XML document.

Here is a process whether you can grab data from HTML/XHTML source using XPath and cURL.

The Basic Xpath Class:

Class xpath{
	public $html;
	public $patten;
	public $childnodes = 0;
	public $attribute = 0;
	public $forbidden = 0;
	public $search = 0;
	public $br2nl = 1;
	public $return = "string";
	public $charset = "utf8";
	private $retrund;
	
	function __construct($html, $patten){
		$this->html = $html; 
		$this->patten = $patten;
	}
	public function execute(){
		$xpath = new DOMXPath($this->html); 
		$basenodes = $xpath->query($this->patten);
		foreach ($basenodes as $basenode){
			if($this->childnodes){
				foreach($basenode->childNodes as $childnode){
					$this->buffer($childnode);
				}
			}else{
				$this->buffer($basenode);
			}
		}
	}
	private function buffer($value){
		$preparedvalue = $this->prepare($value);
		if($preparedvalue){
			if($this->returnstring()){ 
				$this->returnd .= $preparedvalue;
			}else{
				$this->returnd[] = $preparedvalue;
			}
		}
	}
	private function returnstring(){
		if($this->return == "string"){
			return true;
		}else{
			return false;
		}
	}
	private function returnutf8(){
		if($this->charset == "utf8"){
			return true;
		}else{
			return false;
		}
	}
	private function prepare($before){
		if($before->tagName == "br" AND $this->br2nl){
			$before = "\n";
		}else{
			if($this->attribute){
				if($this->search){
					if(in_array($before->textContent, $this->search)){
						$before = $before->getAttribute($this->attribute);	
					}else{
						$before = 0;
					}
				}else{
					$before = $before->getAttribute($this->attribute);
				}
			}else{
				if($this->search){
					if(in_array($before->textContent, $this->search)){
						$before = trim($before->textContent);
					}
				}else{
					$before = trim($before->textContent);
				}
			}
		}
		if(!$this->returnutf8()){
			$before = utf8_decode($before);
		}
		if($this->forbidden){
			if(in_array($before, $this->forbidden)){
				return false;
			}else{
				return $before;
			}
		}else{
			return $before;
		}
	}
	public function get(){
		return $this->returnd;
	}
}

We have to create xpath object to grab data using HTML ids/classes as nodes. At first, We need to grab page source using PHP cURL. When you have that source you can easily filter data using attributes/nodes.

Here is the example of fetching data using node and the process of reading HTML attributes.

	$grab_path = trim("Your URL goes here");
				
	$grab_path = str_replace("&","&",$grab_path);
	$url = urlencode($grab_path);
	$html = new DOMDocument(); 
	$html->loadHtmlFile($url); 
	
    $description = new xpath($html, '//div[@class="body_container"]'); // div id name
    $description->return = "string";
    $description->childnodes = 1;
    $description->forbidden = array("Body");
    $description->execute();

    $title = new xpath($html, '//h1'); //Grab h1 data 
    $title->execute();

    $image = new xpath($html, '//*[@id="main_image"]'); //Grab Images
    $image->attribute = "src";
    $image->execute();

    $adparams = new xpath($html, '//table[@class="params UnderlinedLinks"]/tr[1]/td[1]'); //fetch tables
    $adparams->childnodes = 1;
    $adparams->return = "array";
    $adparams->execute();

   $info[] = array(
		'title_name' => $title->get(), 
		'image' => $image->get(), 
	);

Its very effective to retrieve data from HTML source and its easily modifiable according to CSS and HTML changes.

Advertisements