c4pone / yolo_crawler
An event based domain crawler
dev-master
2015-03-02 04:05 UTC
Requires
- chuyskywalker/rolling-curl: 3.1.0
- guzzlehttp/guzzle: 5.0.0
- symfony/dom-crawler: 2.5.9
- symfony/event-dispatcher: 2.7.*@dev
- webignition/absolute-url-deriver: dev-master
Requires (Dev)
- mockery/mockery: dev-master
- phpunit/phpunit: 3.7.x-dev
- satooshi/php-coveralls: dev-master
- squizlabs/php_codesniffer: 1.5.1
This package is not auto-updated.
Last update: 2025-03-01 18:50:58 UTC
README
#yolo crawler
Status Label | Status Value |
---|---|
Build | |
Code Quality |
find broken links example
require 'bootstrap/autoload.php'; use WP\Crawler\LinkFinder; use WP\Crawler\DomainCrawler; use WP\Crawler\Queue\QueueManager; use WP\Crawler\Queue\ArrayQueue; use WP\Crawler\Queue\Store\ArrayStore; use WP\Crawler\Queue\Validator\ValidFileExtension; use WP\Crawler\Queue\Validator\NoPseudoUrl; use WP\Crawler\Event\LogSubscriber; use WP\Crawler\Event\BrokenLinkFinderSubscriber; use Symfony\Component\EventDispatcher\EventDispatcher; if (isset($argv[1])) { $domain = $argv[1]; $manager = new QueueManager(new ArrayQueue(), new ArrayStore()); $manager->addValidator(new NoPseudoUrl()) ->addValidator(new ValidFileExtension()); $crawler = new DomainCrawler( $manager, new LinkFinder() ); if (isset($argv[2])) $crawler->setWaitTime($argv[2]); $dispatcher = $crawler->getEventDispatcher(); $dispatcher->addSubscriber(new LogSubscriber); $dispatcher->addSubscriber(new BrokenLinkFinderSubscriber); $crawler->crawl($domain); } else { echo "\n"; echo ("Usage " . $argv[0] . ' {domain} {time to wait}' . "\n"); }