from scrapy.spiders import CrawlSpider, Rule
from scrapy.linkextractors import LinkExtractor
class Spider(CrawlSpider):
name = "test"
allowed_domains = ['https://www.collectivesolver.com']
start_urls = ['https://www.collectivesolver.com']
def __init__(self):
self.links = []
def parse(self, response):
self.links.append(response.url)
for href in response.css('a::attr(href)'):
yield {
'url': response.follow(href, self.parse).url
}
# Windows 10
# Visual Studio Code - In TERMINAL
# scrapy crawl test -o data.json
'''
run:
[
{"url": "https://www.collectivesolver.com/"},
{"url": "https://www.collectivesolver.com/questions"},
{"url": "https://www.collectivesolver.com/tags"},
{"url": "https://www.collectivesolver.com/users"},
{"url": "https://www.collectivesolver.com/tag/python"},
{"url": "https://www.collectivesolver.com/tag/php"},
{"url": "https://www.collectivesolver.com/tag/java"},
{"url": "https://www.collectivesolver.com/tag/c%23"},
{"url": "https://www.collectivesolver.com/tag/javascript"},
{"url": "https://www.collectivesolver.com/tag/cpp"},
{"url": "https://www.collectivesolver.com/tag/c"},
{"url": "https://www.collectivesolver.com/tag/vb%23"},
{"url": "https://www.collectivesolver.com/tag/html"},
{"url": "https://www.collectivesolver.com/tag/bootstrap"},
{"url": "https://www.collectivesolver.com/tag/css"},
{"url": "https://www.collectivesolver.com/tag/sql"},
{"url": "https://www.collectivesolver.com/tag/go"},
{"url": "https://www.collectivesolver.com/tag/mysql"},
{"url": "https://www.collectivesolver.com/tag/jquery"},
{"url": "https://www.collectivesolver.com/tag/nodejs"},
{"url": "https://www.collectivesolver.com/tag/reactjs"},
{"url": "https://www.collectivesolver.com/tag/nodejs-express"},
{"url": "https://www.collectivesolver.com/tag/angularjs"},
{"url": "https://www.collectivesolver.com/tag/html5"},
{"url": "https://www.collectivesolver.com/tag/postgresql"},
{"url": "https://www.collectivesolver.com/tag/dom"},
{"url": "https://www.collectivesolver.com/tag/winapi"},
{"url": "https://www.collectivesolver.com/tag/win32"},
{"url": "https://www.collectivesolver.com/tag/android-java"},
{"url": "https://www.collectivesolver.com/tag/bootstrap4"},
{"url": "https://www.collectivesolver.com/tag/css3"},
{"url": "https://www.collectivesolver.com/tag/visual-studio-code"},
{"url": "https://www.collectivesolver.com/tag/software"},
{"url": "https://www.collectivesolver.com/tag/web-hosting"},
{"url": "https://www.collectivesolver.com/tag/binary"},
{"url": "https://www.collectivesolver.com/tag/wordpress"},
{"url": "https://www.collectivesolver.com/tag/phpmyadmin"},
{"url": "https://www.collectivesolver.com/tag/firefox"},
{"url": "https://www.collectivesolver.com/tag/wpf"},
{"url": "https://www.collectivesolver.com/tag/clisp"},
{"url": "https://www.collectivesolver.com/tag/laravel"},
{"url": "https://www.collectivesolver.com/tag/netbeans"},
{"url": "https://www.collectivesolver.com/tag/prototype"},
{"url": "https://www.collectivesolver.com/tag/ide"},
{"url": "https://www.collectivesolver.com/tag/dot-net-library"},
{"url": "https://www.collectivesolver.com/tag/opengl"},
{"url": "https://www.collectivesolver.com/tag/xampp"},
{"url": "https://www.collectivesolver.com/tag/windows"},
{"url": "https://www.collectivesolver.com/tag/download"},
{"url": "https://www.collectivesolver.com/tag/fontawesome5"},
{"url": "https://www.collectivesolver.com/tag/xdebug"},
{"url": "https://www.collectivesolver.com/tag/iphone"},
{"url": "https://www.collectivesolver.com/tag/hardware"},
{"url": "https://www.collectivesolver.com/tag/apache"},
{"url": "https://www.collectivesolver.com/tag/webgl"},
{"url": "https://www.collectivesolver.com/tag/xml"},
{"url": "https://www.collectivesolver.com/tag/xhtml"},
{"url": "https://www.collectivesolver.com/tag/browser"},
{"url": "https://www.collectivesolver.com/tag/programming"},
{"url": "https://www.collectivesolver.com/user/avibootz"},
{"url": "https://www.collectivesolver.com/tag/python"},
{"url": "https://www.collectivesolver.com/31926/how-to-use-yield-in-python"},
...
]
'''