helgesverre/firecrawl-php-sdk

PHP SDK for Firecrawl API - Web scraping and crawling service

Installs: 1

Dependents: 0

Suggesters: 0

Security: 0

Stars: 0

Watchers: 0

Forks: 0

pkg:composer/helgesverre/firecrawl-php-sdk

v1.0.1 2025-10-12 07:28 UTC

README

Modern PHP SDK for the Firecrawl API - Web scraping and crawling service.

Latest Version on Packagist PHP Version License Total Downloads

Installation

Install via Composer:

composer require helgesverre/firecrawl-php-sdk

Quick Start

<?php

require 'vendor/autoload.php';

use HelgeSverre\Firecrawl\FirecrawlClient;
use HelgeSverre\Firecrawl\Enums\Format;

// Initialize the client
$client = new FirecrawlClient(
    apiKey: 'your-api-key-here'
);

// Scrape a single URL
$document = $client->scrape('https://example.com');
echo $document->markdown;

Configuration

The client can be configured in multiple ways:

// Option 1: Pass API key directly
$client = new FirecrawlClient(apiKey: 'fc-...');

// Option 2: Use environment variable
// Set FIRECRAWL_API_KEY environment variable
$client = new FirecrawlClient();

// Option 3: Full configuration
$client = new FirecrawlClient(
    apiKey: 'fc-...',
    apiUrl: 'https://api.firecrawl.dev',  // Optional, default 'https://api.firecrawl.dev/'
    timeoutMs: 90000,                      // Optional, default 60000
    maxRetries: 5,                         // Optional, default 3
    backoffFactor: 1.0                     // Optional, default 0.5
);

Usage Examples

Scraping

Basic Scraping

use HelgeSverre\Firecrawl\FirecrawlClient;

$client = new FirecrawlClient();
$document = $client->scrape('https://example.com');

// Access different formats
echo $document->markdown;
echo $document->html;
echo $document->metadata?->title;

Scraping with Options

use HelgeSverre\Firecrawl\DTO\ScrapeOptions;
use HelgeSverre\Firecrawl\Enums\Format;

$options = new ScrapeOptions(
    formats: [Format::Markdown, Format::Html, Format::Links],
    onlyMainContent: true
);

$document = $client->scrape('https://example.com', $options);

Using the Fluent Builder

use HelgeSverre\Firecrawl\Builders\ScrapeOptionsBuilder;
use HelgeSverre\Firecrawl\Enums\Format;
use HelgeSverre\Firecrawl\Enums\ProxyType;

$options = ScrapeOptionsBuilder::create()
    ->formats([Format::Markdown, Format::Screenshot])
    ->onlyMainContent(true)
    ->mobile(true)
    ->proxy(ProxyType::Stealth)
    ->blockAds(true)
    ->build();

$document = $client->scrape('https://example.com', $options);

Advanced Scraping with Actions

use HelgeSverre\Firecrawl\DTO\Actions\ClickAction;
use HelgeSverre\Firecrawl\DTO\Actions\WaitAction;
use HelgeSverre\Firecrawl\DTO\Actions\ScreenshotAction;
use HelgeSverre\Firecrawl\DTO\ScrapeOptions;
use HelgeSverre\Firecrawl\Enums\Format;

$options = new ScrapeOptions(
    formats: [Format::Markdown, Format::Screenshot],
    actions: [
        new WaitAction(milliseconds: 2000),
        new ClickAction(selector: 'button.load-more'),
        new WaitAction(selector: '.content-loaded'),
        new ScreenshotAction(fullPage: true),
    ]
);

$document = $client->scrape('https://example.com', $options);

Crawling

Start a Crawl (Async)

use HelgeSverre\Firecrawl\DTO\CrawlOptions;

$options = new CrawlOptions(
    limit: 100,
    maxDiscoveryDepth: 3,
    excludePaths: ['admin/*', 'login'],
    includePaths: ['blog/*', 'docs/*']
);

$response = $client->startCrawl('https://example.com', $options);
echo "Crawl started with ID: {$response->id}\n";

Check Crawl Status

$job = $client->getCrawlStatus($response->id);

echo "Status: {$job->status->value}\n";
echo "Completed: {$job->completed}/{$job->total}\n";

foreach ($job->data as $document) {
    echo "- {$document->metadata?->url}\n";
}

Wait for Crawl to Complete (Blocking)

use HelgeSverre\Firecrawl\DTO\CrawlOptions;

$options = new CrawlOptions(
    limit: 50,
    maxDiscoveryDepth: 2
);

// This will block until the crawl completes
$job = $client->crawl(
    url: 'https://example.com',
    options: $options,
    pollInterval: 2,  // Check every 2 seconds (default)
    timeout: 600      // Max 10 minutes (default is 300 seconds/5 minutes)
);

echo "Crawl completed! Found {$job->total} pages.\n";

Cancel a Crawl

$cancelled = $client->cancelCrawl($crawlId);
if ($cancelled) {
    echo "Crawl cancelled successfully\n";
}

Natural Language Crawl

use HelgeSverre\Firecrawl\DTO\CrawlOptions;

$options = new CrawlOptions(
    prompt: "Find all product pages with pricing information"
);

$job = $client->crawl('https://example-store.com', $options);

Batch Scraping

Start Batch Scrape

use HelgeSverre\Firecrawl\DTO\BatchScrapeOptions;
use HelgeSverre\Firecrawl\DTO\ScrapeOptions;
use HelgeSverre\Firecrawl\Enums\Format;

$urls = [
    'https://example.com/page1',
    'https://example.com/page2',
    'https://example.com/page3',
];

$scrapeOptions = new ScrapeOptions(
    formats: [Format::Markdown]
);

$batchOptions = new BatchScrapeOptions(
    options: $scrapeOptions,
    maxConcurrency: 5
);

$response = $client->startBatchScrape($urls, $batchOptions);
echo "Batch scrape started: {$response->id}\n";

Wait for Batch to Complete

$job = $client->batchScrape(
    urls: $urls,
    options: $batchOptions,
    pollInterval: 2,  // Check every 2 seconds (default)
    timeout: 300      // Max 5 minutes (default)
);

echo "Batch completed: {$job->completed}/{$job->total}\n";
foreach ($job->data as $document) {
    echo $document->markdown . "\n\n";
}

Site Mapping

use HelgeSverre\Firecrawl\DTO\MapOptions;
use HelgeSverre\Firecrawl\Enums\SitemapMode;

$options = new MapOptions(
    search: 'documentation'
);

$result = $client->map('https://example.com', $options);
print_r($result);

Search

$results = $client->search('artificial intelligence', [
    'sources' => ['web', 'news'],
    'limit' => 10,
]);

print_r($results);

Extract (AI-Powered)

// Start extraction
$response = $client->startExtract([
    'urls' => ['https://example.com/article'],
    'prompt' => 'Extract the main article title, author, and publication date',
]);

// Or use the blocking version
$result = $client->extract(
    options: [
        'urls' => ['https://example.com/article'],
        'schema' => [
            'type' => 'object',
            'properties' => [
                'title' => ['type' => 'string'],
                'author' => ['type' => 'string'],
                'date' => ['type' => 'string'],
            ],
        ],
    ],
    pollInterval: 2,  // Check every 2 seconds (default)
    timeout: 120      // Max 2 minutes (default is 300 seconds/5 minutes)
);

print_r($result);

Webhooks

use HelgeSverre\Firecrawl\DTO\WebhookConfig;
use HelgeSverre\Firecrawl\DTO\CrawlOptions;
use HelgeSverre\Firecrawl\Enums\WebhookEvent;

$webhook = new WebhookConfig(
    url: 'https://your-server.com/webhook',
    headers: ['Authorization' => 'Bearer your-token'],
    events: [WebhookEvent::Completed, WebhookEvent::Failed]
);

$options = new CrawlOptions(
    webhook: $webhook,
    limit: 100
);

$response = $client->startCrawl('https://example.com', $options);

Pagination

use HelgeSverre\Firecrawl\DTO\PaginationConfig;

$pagination = new PaginationConfig(
    autoPaginate: true,
    maxPages: 5,
    maxResults: 500,
    maxWaitTime: 60
);

$job = $client->getCrawlStatus($crawlId, $pagination);
// All pages automatically fetched and merged

Usage Monitoring

// Get current credit usage
$credits = $client->getCreditUsage();
echo "Remaining credits: {$credits['remainingCredits']}\n";

// Get token usage
$tokens = $client->getTokenUsage();
echo "Remaining tokens: {$tokens['remainingTokens']}\n";

// Get concurrency info
$concurrency = $client->getConcurrency();
echo "Active jobs: {$concurrency['concurrency']}/{$concurrency['maxConcurrency']}\n";

// Get queue status
$queue = $client->getQueueStatus();
echo "Jobs in queue: {$queue['jobsInQueue']}\n";

// Historical usage
$historical = $client->getCreditUsageHistorical(byApiKey: true);
print_r($historical);

Error Handling

use HelgeSverre\Firecrawl\Exceptions\ApiException;
use HelgeSverre\Firecrawl\Exceptions\TimeoutException;
use HelgeSverre\Firecrawl\Exceptions\ValidationException;
use HelgeSverre\Firecrawl\Exceptions\FirecrawlException;

try {
    $document = $client->scrape('https://example.com');
} catch (ValidationException $e) {
    echo "Validation error: {$e->getMessage()}\n";
} catch (ApiException $e) {
    echo "API error: {$e->getMessage()}\n";
    echo "Status code: {$e->statusCode}\n";
    echo "Error code: {$e->errorCode}\n";
} catch (TimeoutException $e) {
    echo "Request timed out: {$e->getMessage()}\n";
} catch (FirecrawlException $e) {
    echo "General error: {$e->getMessage()}\n";
}

Advanced Features

Custom Headers

use HelgeSverre\Firecrawl\DTO\ScrapeOptions;

$options = new ScrapeOptions(
    headers: [
        'User-Agent' => 'Custom Bot 1.0',
        'X-Custom-Header' => 'value',
    ]
);

$document = $client->scrape('https://example.com', $options);

Location-Based Scraping

use HelgeSverre\Firecrawl\DTO\LocationConfig;
use HelgeSverre\Firecrawl\DTO\ScrapeOptions;

$location = new LocationConfig(
    country: 'US',
    languages: ['en-US', 'en']
);

$options = new ScrapeOptions(location: $location);
$document = $client->scrape('https://example.com', $options);

Browser Actions

use HelgeSverre\Firecrawl\DTO\Actions\*;
use HelgeSverre\Firecrawl\DTO\ScrapeOptions;
use HelgeSverre\Firecrawl\Enums\ScrollDirection;

$options = new ScrapeOptions(
    actions: [
        new ClickAction(selector: '#accept-cookies'),
        new WaitAction(milliseconds: 1000),
        new WriteAction(text: 'search query'),
        new PressAction(key: 'Enter'),
        new WaitAction(selector: '.results'),
        new ScrollAction(direction: ScrollDirection::Down),
        new ExecuteJavascriptAction(script: 'window.scrollTo(0, document.body.scrollHeight)'),
        new ScreenshotAction(fullPage: true),
    ]
);

$document = $client->scrape('https://example.com', $options);

Testing

Using Docker

# Build Docker image
make build

# Run tests
make test

# Run tests with coverage
make coverage

# Open shell in container
make shell

# Clean up
make clean

Alternative Docker Commands

# Run tests
docker compose run --rm runner

# Run with coverage
docker compose run --rm runner composer coverage

# Interactive shell
docker compose run --rm runner bash

# Custom command
docker compose run --rm runner vendor/bin/pest --filter=FirecrawlClient

Local Testing (PHP 8.3+ Required)

# Install dependencies
composer install

# Run tests
composer test

# Run with coverage (requires Xdebug or PCOV)
composer test:coverage

# Generate HTML coverage report
composer test:coverage:html
open coverage/index.html

Development

git clone https://github.com/HelgeSverre/firecrawl-php-sdk.git
cd firecrawl-php-sdk

# Add your FIRECRAWL_API_KEY to .env (for running API tests)
cp .env.example .env

# Install dependencies
composer install

# Composer scripts
composer format               # Format code with Pint
composer test                 # Run pest tests (v4)
composer coverage             # Run tests with coverage
composer type-coverage        # Check type coverage
composer analyse              # Static analysis with PHPStan

You can also use Docker to run these commands in a consistent environment:

docker compose run --rm runner composer test
docker compose run --rm runner composer coverage
docker compose run --rm runner composer type-coverage
docker compose run --rm runner composer analyse
docker compose run --rm runner composer format

Or use the shorter Makefile commands:

make test                     # Run tests
make coverage                 # Run tests with coverage
make type-coverage            # Run type coverage analysis
make analyse                  # Run static analysis
make clean                    # Clean up containers and cache

License

MIT License - see LICENSE file for details.