shtrihstr/html-serializer

Convert HTML to JSON

dev-master 2016-08-18 12:44 UTC

This package is not auto-updated.

Last update: 2024-05-17 21:15:34 UTC


README

Convert HTML to Array or JSON

Installation

$ composer require shtrihstr/html-serializer

Usage

HTML to JSON

$html = new HtmlSerializer\Html('<div class="content"><p>Hello World</p><img src="img.png" alt="" class="image" /></div>');
$json = $html->toJson();

Result

[
    {
        "node": "div",
        "attributes": {
            "class": "content"
        },
        "children": [
            {
                "node": "p",
                "children": [
                    {
                        "node": "text",
                        "text": "Hello World"
                    }
                ]
            },
            {
                "node": "img",
                "attributes": {
                    "src": "img.png",
                    "alt": "",
                    "class": "image"
                }
            }
        ]
    }
]

HTML to Array

$array = $html->toArray();

Result

[
    [
        'node' => 'div',
        'attributes' => [
            'class' => 'content',
        ],
        'children' => [
            [
                'node' => 'p',
                'children' => [
                    [
                        'node' => 'text',
                        'text' => 'Hello World',
                    ],
                ]
            ],
            [
                'node' => 'img',
                'attributes' => [
                    'src' => 'img.png',
                    'alt' => '',
                    'class' => 'image',
                ]
            ],
        ],
    ],
]

Inline CSS

$html = new HtmlSerializer\Html('<div style="color: red; background: url(img.png?foo;bar)">Hello World</div>');
$html->parseCss(); // enabled by default
$json = $html->toJson();

Result

[
    {
        "node": "div",
        "attributes": {
            "style": {
                "color": "red",
                "background": "url(img.png?foo;bar)"
            }
        },
        "children": [
            {
                "node": "text",
                "text": "Hello World"
            }
        ]
    }
]
$html->parseCss(false);

Result

[
    {
        "node": "div",
        "attributes": {
            "style": "color: red; background: url(img.png?foo;bar)"
        },
        "children": [
            {
                "node": "text",
                "text": "Hello World"
            }
        ]
    }
]

Remove empty strings

$html = new HtmlSerializer\Html('<div> <p> foo</p> <span> bar </span>  </div>');
$html->removeEmptyStrings(); // enabled by default
$json = $html->toJson();

Result

[
    {
        "node": "div",
        "children": [
            {
                "node": "p",
                "children": [
                    {
                        "node": "text",
                        "text": " foo"
                    }
                ]
            },
            {
                "node": "span",
                "children": [
                    {
                        "node": "text",
                        "text": " bar "
                    }
                ]
            }
        ]
    }
]
$html->removeEmptyStrings(false)

Result

[
    {
        "node": "div",
        "children": [
            {
                "node": "text",
                "text": " "
            },
            {
                "node": "p",
                "children": [
                    {
                        "node": "text",
                        "text": " foo"
                    }
                ]
            },
            {
                "node": "text",
                "text": " "
            },
            {
                "node":"span",
                "children": [
                    {
                        "node": "text",
                        "text": " bar "
                    }
                ]
            },
            {
                "node": "text",
                "text": "  "
            }
        ]
    }
]