dimabdc/php-fast-simple-html-dom-parser

PHP Fast Simple HTML DOM parser.

1.4 2020-08-23 22:47 UTC

This package is auto-updated.

Last update: 2024-10-29 04:47:23 UTC


README

Build Status Total Downloads Latest Stable Version License

PHP Fast Simple HTML DOM Parser - fast and low mamory usage HTML DOM Parser with syntax like PHP Simple HTML DOM Parser

Установка

Для установки выполните команду:

composer require dimabdc/php-fast-simple-html-dom-parser

Быстрый старт

require_once "vendor/autoload.php";
use FastSimpleHTMLDom\Document;

// Create DOM from URL
$html = Document::file_get_html('https://habrahabr.ru/interesting/');

// Find all post blocks
$post = [];
foreach($html->find('div.post') as $post) {
    $item['title']   = $post->find('h1.title', 0)->plaintext;
    $item['hubs']    = $post->find('div.hubs', 0)->plaintext;
    $item['content'] = $post->find('div.content', 0)->plaintext;
    $post[] = $item;
}

print_r($post);

Как создать HTML DOM объект

// Create a DOM object from a string
$html = new Document('<html><body>Hello!</body></html>');

// Create a DOM object from a string
$html = new Document();
$html->loadHtml('<html><body>Hello!</body></html>');

// Create a DOM object from a HTML file
$html = new Document();
$html->loadHtmlFile('test.htm');

// Create a DOM object from a URL
$html = new Document(file_get_contents('https://habrahabr.ru/interesting/'));

Как искать HTML DOM элементы?

Основа

// Find all anchors, returns a array of element objects
$ret = $html->find('a');

// Find (N)th anchor, returns element object or null if not found (zero based)
$ret = $html->find('a', 0);

// Find lastest anchor, returns element object or null if not found (zero based)
$ret = $html->find('a', -1); 

// Find all <div> with the id attribute
$ret = $html->find('div[id]');

// Find all <div> which attribute id=foo
$ret = $html->find('div[id=foo]'); 

Часто используемое

// Find all element which id=foo
$ret = $html->find('#foo');

// Find all element which class=foo
$ret = $html->find('.foo');

// Find all element has attribute id
$ret = $html->find('*[id]'); 

// Find all anchors and images 
$ret = $html->find('a, img'); 

// Find all anchors and images with the "title" attribute
$ret = $html->find('a[title], img[title]');

Слекторы потомков

// Find all <li> in <ul> 
$es = $html->find('ul li');

// Find Nested <div> tags
$es = $html->find('div div div'); 

// Find all <td> in <table> which class=hello 
$es = $html->find('table.hello td');

// Find all td tags with attribite align=center in table tags 
$es = $html->find('table td[align=center]');

Вложенные селекторы

// Find all <li> in <ul> 
foreach($html->find('ul') as $ul) 
{
       foreach($ul->find('li') as $li) 
       {
             // do something...
       }
}

// Find first <li> in first <ul> 
$e = $html->find('ul', 0)->find('li', 0);

Фильтр атрибутов

Текст, комментарии

// Find all text blocks 
$es = $html->find('text');

// Find all comment (<!--...-->) blocks 
$es = $html->find('comment');

Доступ к атрибутам

Получение, установка и удаление атрибутов

// Get a attribute ( If the attribute is non-value attribute (eg. checked, selected...), it will returns true or false)
$value = $e->href;

// Set a attribute(If the attribute is non-value attribute (eg. checked, selected...), set it's value as true or false)
$e->href = 'my link';

// Remove a attribute, set it's value as null! 
$e->href = null;

// Determine whether a attribute exist? 
if(isset($e->href)) 
        echo 'href exist!';

"Магические" атрибуты

// Example
$html = str_get_html('<div>foo <b>bar</b></div>'); 
$e = $html->find('div', 0);

echo $e->tag; // Returns: "div"
echo $e->outertext; // Returns: "<div>foo <b>bar</b></div>"
echo $e->innertext; // Returns: "foo <b>bar</b>"
echo $e->plaintext; // Returns: "foo bar"

Трюки

// Extract contents from HTML 
echo $html->plaintext;

// Wrap a element
$e->outertext = '<div class="wrap">' . $e->outertext . '<div>';

// Remove a element, set it's outertext as an empty string 
$e->outertext = '';

// Append a element
$e->outertext = $e->outertext . '<div>foo<div>';

// Insert a element
$e->outertext = '<div>foo<div>' . $e->outertext;

Прогон по DOM-дереву

// If you are not so familiar with HTML DOM, check this link to learn more... 

// Example
echo $html->find('#div1', 0)->children(1)->children(1)->children(2)->id;
// or 
echo $html->getElementById('div1')->childNodes(1)->childNodes(1)->childNodes(2)->getAttribute('id');

API-справочник

Методы и свойства DOM

Методы и свойства элементов

Прогон по дереву DOM

camelCase эквиваленты

string $e->getAttribute($name)
string $e->attribute

void $e->setAttribute($name, $value)
void $value = $e->attribute

bool $e->hasAttribute($name)
bool isset($e->attribute)

void $e->removeAttribute($name)
void $e->attribute = null

element $e->getElementById($id)
mixed $e->find("#$id", 0)

mixed $e->getElementsById($id [,$index])
mixed $e->find("#$id" [, int $index])

element $e->getElementByTagName($name)
mixed $e->find($name, 0)

mixed $e->getElementsByTagName($name [, $index])
mixed $e->find($name [, int $index])

element $e->parentNode()
element $e->parent()

mixed $e->childNodes([$index])
mixed $e->children([int $index])

element $e->firstChild()
element $e->first_child()

element $e->lastChild()
element $e->last_child()

element $e->nextSibling()
element $e->next_sibling()

element $e->previousSibling()
element $e->prev_sibling()