510
Sometimes it is necessary to verify that a given page really contains a specific link. This is usually done when checking for a reciprocal link in link exchange scripts and so on.
Several things need to be considered in this situation :
- Only actual links count. A plain-text URL should not be accepted.
- Links inside HTML comments () are are no good.
- Nofollow’ed links are out as well.
Here’s a PHP function that satisfies these requirements
function
contains_link(
$page_url
,
$link_url
) {
/* Get the page at page_url */
$ch
= curl_init();
curl_setopt(
$ch
, CURLOPT_URL,
$page_url
);
curl_setopt(
$ch
, CURLOPT_RETURNTRANSFER,1);
curl_setopt(
$ch
, CURLOPT_FOLLOWLOCATION, 1);
curl_setopt(
$ch
, CURLOPT_USERAGENT,
'Mozilla/4.0 (compatible; MSIE 5.01; Windows NT 5.0)'
);
curl_setopt(
$ch
, CURLOPT_CONNECTTIMEOUT, 30);
curl_setopt(
$ch
, CURLOPT_TIMEOUT, 60);
curl_setopt(
$ch
, CURLOPT_FAILONERROR, true);
$html
= curl_exec(
$ch
);
curl_close(
$ch
);
if
(!
$html
)
return
false;
/* Remove HTML comments and their contents */
$html
= preg_replace(
'//i'
,
''
,
$html
);
/* Extract all links */
if
(!preg_match_all(
$regexp
,
$html
,
$matches
, PREG_SET_ORDER)) {
return
false;
/* No links on page */
};
/* Check each link */
foreach
(
$matches
as
$match
){
/* Skip links that contain rel=nofollow */
if
(preg_match(
'/rel\s*=\s*[\'\"]?nofollow[\'\"]?/i'
,
$match
[0]))
continue
;
/* If URL = backlink_url, we've found the backlink */
if
(
$match
[2]==
$link_url
)
return
true;
}
return
false;
}
/* Usage example */
echo
'Reciprocal link found.'
;
}
else
{
echo
'Reciprocal link not found.'
;
};