maikelvanmaurik / schrapert
v0.0.3
2020-04-01 11:41 UTC
Requires
- php: >=5.4.0
- psr/http-message: ~1.0
- psr/log: ^1.0.2
- react/dns: ^0.4.2
- react/event-loop: ^0.4
- react/filesystem: dev-master
- react/http-client: ^0.4.10
- react/promise: ~2.0
- react/stream: ~0.4
Requires (Dev)
- clue/block-react: ^1.1
- phpunit/phpunit: 5.2.*
This package is auto-updated.
Last update: 2025-01-16 07:37:17 UTC
README
Schrapert is a scraping/crawler library which is inspired by scrapy. It makes use of React for various operations such as downloading requests and writing files.
Example of a simple spider:
namespace Crawl; use Schrapert\Spider; use Schrapert\Crawl\ResponseInterface; use Schrapert\Http\ResponseInterface as HttpResponse; use Schrapert\Http\Request as HttpRequest; use DOMDocument; use DOMXPath; use DOMElement; class BlogSpider extends Spider { public function parse(ResponseInterface $response) { if(!$response instanceof HttpResponse) { return; } $doc = new DOMDocument('1.0'); $doc->loadHTML((string)$response->getBody()); $xpath = new DOMXPath($doc); $nodes = $xpath->query('//a'); foreach($nodes as $node) { /* @var $node DOMElement */ $uri = $this->uri->join($node->getAttribute('href'), $response->getUri()); yield new HttpRequest($uri); } } }