capimichi/crawler

There is no license information available for the latest version (v1.2.5) of this package.

Crawler utils

v1.2.5 2017-07-24 18:53 UTC

README

Use this library to scrape your favourite websites.

Get started

<?php

use Crawler\Downloader\CacheDownloader;
use Crawler\Downloader\SimpleDownloader;

require_once "/path/to/composer/autoload.php";

$startUrl = "http://random-url.com/";

$downloader = new SimpleDownloader();
$cacheDownloader = new CacheDownloader($downloader, __DIR__ . "/var/cache/", ".html");
$mainCategoriesWebPage = new MainCategoriesWebPage($startUrl, $cacheDownloader);

$categoryUrls = $mainCategoriesWebPage->getCategoryUrls();

foreach ($categoryUrls as $categoryUrl) {

    do {
        $productsWebPage = new ProductsWebPage($categoryUrl, $cacheDownloader);

        $productUrls = $productsWebPage->getChildUrls();

        foreach ($productUrls as $productUrl) {
            $productWebPage = new ProductWebPage($productUrl, $cacheDownloader);
            $title = $productWebPage->getTitle();
        }

        $categoryUrl = $productsWebPage->getNextPageUrl();
    } while ($categoryUrl != null);
}