rabbyte/scraper

There is no license information available for the latest version (dev-master) of this package.

scrape data on iranian e-commerce like Torob, Digikala and Divar

Installs: 7

Dependents: 0

Suggesters: 0

Security: 0

Stars: 6

Watchers: 1

Forks: 0

Open Issues: 0

pkg:composer/rabbyte/scraper

dev-master 2025-07-10 16:17 UTC

This package is auto-updated.

Last update: 2026-01-10 17:19:40 UTC


README

scraper Divar and Digikala Supermarket _ just for learning purposes

how to scrape?

add this line to composer.json

    "require": {
        "rabbyte/scraper": "dev-master"
    }

then composer install

supported categories Divar for now:

<?php
/**
 * example usage of Divar data scraping
 */
require 'vendor/autoload.php';

use Rabbyte\Scraper\divar\divarApi;

$categories = [
    'stationery',
    "clothing",
    "health-beauty",
    "rhinestones",
    "shoes-belt-bag",
    "childrens-clothing-and-shoe"
];
function scrape($categories, $layerPage, $filterPrice)
{
    if(!empty($categories)) {
        $divar = new divarApi();
        $promises = [];
        foreach ($categories as $category) {
            $asyncCategory = $divar->asyncStruct($category, $layerPage);

            $promises[$category] = $asyncCategory;

        }

        if (isset($promises)) {
            // Run requests concurrently
            $results = $divar->asyncRequest($promises);
            // Process responses
            foreach ($results as $categoryName => $response) {
                if ($response['state'] === 'fulfilled') {

                    $rsp = $response['value']->getBody();
                    $status = $divar->parseExport($filterPrice, $categoryName . "/simple/", $rsp);

                    // next layer date ads
                    if (!$status) {
                        $categories = array_filter($categories, function ($value) use (&$categoryName) {
                            return $value !== $categoryName; // Keeps all elements except $categoryName
                        });
                    }
                } else {
                    echo $categoryName . ": Failed - " . $response['reason'];
                }
            }

            sleep(5);
            $layerPage++;

            scrape($categories, $layerPage, $filterPrice);

        }
    }
}

scrape($categories, 0, 10000000);

supported categories Digikala Supermarket for now:

<?php
/**
 * example usage of digikala supermarket data scraping
 */
require 'vendor/autoload.php';

use Rabbyte\Scraper\digikala\supermarket\spDigikalaApi;

$categories = [
    'oil',
    'chocolate-and-cocoa-products',
    'rice',
    'spaghetti-pasta',
    'sugar',
    'sugar-candy',
    'cereals',
    'bread',
    'types-paste'
];

$digikala = new spDigikalaApi('127.0.0.1:8082');
$promises = [];
foreach ($categories as $category) {
    $asyncCategory = $digikala->asyncStruct($category, 1);

    $promises[$category] = $asyncCategory;

}

$results = $digikala->asyncRequest($promises);
foreach ($results as $categoryName => $response) {
    if ($response['state'] === 'fulfilled') {
        $rsp = (string)$response['value']->getBody();
        $json = json_decode($rsp);
        
        echo $json->data;
        
    }else {
        echo $categoryName . ": Failed - " . $response['reason'];
    }
}

supported categories Torob for now:

<?php
/**
 * example usage of torob data scraping
 */
require 'vendor/autoload.php';

use Rabbyte\Scraper\torob\torobApi;

$categories = [
    'mobile'
];

$brands = [
  'apple', 'xiaomi','samsung'
];

$sort = [
    '', // sort based on 'محبوب ترین'
    'price', // sort based on 'ارزان ترین'
    '-price', // sort based on 'گران ترین'
    '-date' // sort based on 'جدیدترین'
];

$torob = new torobApi('127.0.0.1:8080');
$promises = [];
for ($i=0;$i<count($brands); $i++) {

    $asyncCategory = $torob->asyncStruct($categories[0], $brands[$i], $sort[$i],  2);

    $promises[$brands[$i]] = $asyncCategory;

}

$results = $torob->asyncRequest($promises);
foreach ($results as $categoryName => $response) {
    if ($response['state'] === 'fulfilled') {
        $rsp = (string)$response['value']->getBody();
        $json = json_decode($rsp);

        var_dump($json);

    }else {
        echo $categoryName . ": Failed - " . $response['reason'];
    }
}