kurozumi / web-scraper-bundle
Installs: 20
Dependents: 0
Suggesters: 0
Security: 0
Stars: 0
Watchers: 2
Forks: 0
Open Issues: 0
Type:symfony-bundle
Requires
- php: >=8.1
- symfony/css-selector: ^6.1
- symfony/dom-crawler: ^6.1
- symfony/framework-bundle: ^6.1
- symfony/http-client: ^6.1
Requires (Dev)
- phpunit/phpunit: ^9.5
README
Manage multiple Web Scraper bundle for Symfony.
Install
composer req kurozumi/web-scraper-bundle
How to use
namespace App\Command;
use Kurozumi\WebScraperBundle\Service\Context;
use Kurozumi\WebScraperBundle\Service\Scraper\RssScraper;
use Symfony\Component\Console\Attribute\AsCommand;
use Symfony\Component\Console\Command\Command;
#[AsCommand(
name: 'app:scraper'
)]
class ScraperCommand extends Command
{
private Context $context;
public function __construct(Context $context)
{
$this->context = $context;
parent::__construct();
}
protected function execute(InputInterface $input, OutputInterface $output): int
{
$feeds = [
'https://aaa.rss.xml',
'https://bbb.rss.xml'
];
$items = [];
foreach ($feeds as $feed) {
$data = $this->content->getData($feed);
if (null !== $data) {
foreach ($data['items'] as $item) {
switch ($data['name']) {
case RssScraper::class:
$items[] = [
'title' => $item->filter('title')->text(),
'url' => $item->filter('link')->text()
];
break;
}
}
}
}
print_r($items);
return Command::SUCCESS;
}
}
Custom Scraper
Feed
<?php
namespace App\Service\Scraper;
final class YouTubeFeedScraper extends AbstractScraper
{
/**
* @param string $url
* @return \ArrayIterator
* @throws \Symfony\Contracts\HttpClient\Exception\ClientExceptionInterface
* @throws \Symfony\Contracts\HttpClient\Exception\RedirectionExceptionInterface
* @throws \Symfony\Contracts\HttpClient\Exception\ServerExceptionInterface
* @throws \Symfony\Contracts\HttpClient\Exception\TransportExceptionInterface
*/
public function getItems(string $url): \ArrayIterator
{
$response = $this->getResponse($url);
$items = new \ArrayIterator();
$crawler = $this->getCrawler($response);
if ('feed' === $crawler->nodeName()) {
$crawler->setDefaultNamespacePrefix('m', 'http://search.yahoo.com/mrss/');
foreach ($crawler->filter('m|entry') as $item) {
$items->append($this->getItem($item));
}
}
return $items;
}
}
Html
<?php
namespace App\Service\Scraper;
final class HtmlScraper extends AbstractScraper
{
/**
* @param string $url
* @return \ArrayIterator
* @throws \Symfony\Contracts\HttpClient\Exception\ClientExceptionInterface
* @throws \Symfony\Contracts\HttpClient\Exception\RedirectionExceptionInterface
* @throws \Symfony\Contracts\HttpClient\Exception\ServerExceptionInterface
* @throws \Symfony\Contracts\HttpClient\Exception\TransportExceptionInterface
*/
public function getItems(string $url): \ArrayIterator
{
$response = $this->getResponse($url);
$items = new \ArrayIterator();
$crawler = $this->getCrawler($response);
if ('html' === $crawler->nodeName()) {
foreach ($crawler->filter('ul > li') as $item) {
$items->append($this->getItem($item));
}
}
return $items;
}
}