781
Sometimes it is necessary to verify that a given page really contains a specific link. This is usually done when checking for a reciprocal link in link exchange scripts and so on.
Several things need to be considered in this situation :
- Only actual links count. A plain-text URL should not be accepted.
- Links inside HTML comments () are are no good.
- Nofollow’ed links are out as well.
Here’s a PHP function that satisfies these requirements
function contains_link($page_url, $link_url) { /* Get the page at page_url */ $ch = curl_init(); curl_setopt($ch, CURLOPT_URL, $page_url); curl_setopt($ch, CURLOPT_RETURNTRANSFER,1); curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1); curl_setopt($ch, CURLOPT_USERAGENT, 'Mozilla/4.0 (compatible; MSIE 5.01; Windows NT 5.0)'); curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 30); curl_setopt($ch, CURLOPT_TIMEOUT, 60); curl_setopt($ch, CURLOPT_FAILONERROR, true); $html = curl_exec($ch); curl_close($ch); if(!$html) return false; /* Remove HTML comments and their contents */ $html = preg_replace('//i', '', $html); /* Extract all links */ if (!preg_match_all($regexp, $html, $matches, PREG_SET_ORDER)) { return false; /* No links on page */ }; /* Check each link */ foreach($matches as $match){ /* Skip links that contain rel=nofollow */ if(preg_match('/rel\s*=\s*[\'\"]?nofollow[\'\"]?/i', $match[0])) continue; /* If URL = backlink_url, we've found the backlink */ if ($match[2]==$link_url) return true; } return false;}/* Usage example */ echo 'Reciprocal link found.';} else { echo 'Reciprocal link not found.';};