function relative_path_to_absolute_url($relative_url, $base_url) {
if (parse_url($relative_url, PHP_URL_SCHEME) != '') {
return $relative_url;
}
if ($relative_url[0] == '#' || $relative_url[0] == '?') {
return $base_url.$relative_url;
}
extract(parse_url($base_url)); // parse $scheme, $host, $path
$path = preg_replace('#/[^/]*$#', '', $path);
if ($relative_url[0] == '/') {
$path = '';
}
$absolute_url = "$host$path/$relative_url";
$arr = array('#(/\.?/)#', '#/(?!\.\.)[^/]+/\.\./#');
for ($n = 1; $n > 0; $absolute_url = preg_replace($arr, '/', $absolute_url, -1, $n)) {}
return $scheme.'://'.$absolute_url;
}
function get_html($url) {
$handle = curl_init();
curl_setopt($handle, CURLOPT_HTTPGET, true);
curl_setopt($handle, CURLOPT_HEADER, true);
curl_setopt($handle, CURLOPT_URL, $url);
curl_setopt($handle, CURLOPT_FOLLOWLOCATION, true);
curl_setopt($handle, CURLOPT_RETURNTRANSFER, true);
$output = curl_exec($handle);
curl_close($handle);
$separator = "\r\n\r\n";
$header = substr($output, 0, strpos($output, $separator));
$body_start = strlen($header) + strlen($separator);
$html = substr($output, $body_start, strlen($output) - $body_start);
return $html;
}
$url = "https://www.collectivesolver.com/";
$base_url = "https://www.collectivesolver.com/";
$html = get_html($url);
$doc = new DOMDocument();
libxml_use_internal_errors(true);
$doc->loadHTML($html);
$link_tags = $doc->getElementsByTagName('a');
foreach ($link_tags as $linktag) {
if (($href = $linktag->getAttribute('href'))) {
$link_absolute = relative_path_to_absolute_url($href, $base_url);
echo $link_absolute . "<br />";
}
}
/*
run:
https://www.collectivesolver.com/
https://www.collectivesolver.com/questions
https://www.collectivesolver.com/tags
https://www.collectivesolver.com/users
https://www.collectivesolver.com/tag/python
https://www.collectivesolver.com/tag/php
https://www.collectivesolver.com/tag/c%23
https://www.collectivesolver.com/tag/java
https://www.collectivesolver.com/tag/cpp
https://www.collectivesolver.com/tag/c
https://www.collectivesolver.com/tag/javascript
https://www.collectivesolver.com/tag/vb%23
...
*/