arraypress/wp-extract-utils

A lean WordPress library for extracting and splitting structured data from strings

dev-main 2025-07-04 12:57 UTC

This package is auto-updated.

Last update: 2025-09-02 14:52:26 UTC


README

A lean WordPress library for extracting and splitting structured data from strings. Provides utility functions for extracting mentions, hashtags, URLs, IDs, amounts, and other common patterns from WordPress content.

Installation

Install via Composer:

composer require arraypress/wp-extract-utils

Extract Class

The Extract class provides methods for extracting structured data from strings.

Content Extraction

mentions( string $string ): array

Extract @username mentions while excluding email addresses.

$content = "Hey @john and @jane_doe, check out support@company.com!";
$mentions = Extract::mentions($content);
// Returns: ['john', 'jane_doe']

hashtags( string $string ): array

Extract #hashtag patterns from content.

$content = "Love #wordpress and #php development! #coding";
$hashtags = Extract::hashtags($content);
// Returns: ['wordpress', 'php', 'coding']

social_handles( string $string ): array

Extract social media handles (@username) excluding email addresses.

$content = "Follow @company_name and @founder on Twitter! Email: support@company.com";
$handles = Extract::social_handles($content);
// Returns: ['company_name', 'founder']

social_urls( string $string, array $platforms = [] ): array

Extract social media URLs from major platforms.

$content = "Visit https://twitter.com/user and https://youtube.com/channel/123";
$social_urls = Extract::social_urls($content);
// Returns: ['https://twitter.com/user', 'https://youtube.com/channel/123']

// Extract specific platforms only
$youtube_only = Extract::social_urls($content, ['youtube']);
// Returns: ['https://youtube.com/channel/123']

URL & Media Extraction

urls( string $string ): array

Extract and validate URLs from content.

$content = "Visit https://example.com and check out http://wordpress.org";
$urls = Extract::urls($content);
// Returns: ['https://example.com', 'http://wordpress.org']

media_urls( string $string, array $extensions = [], bool $wp_only = false ): array

Extract media URLs using WordPress supported file types.

$content = "Image: https://example.com/image.jpg and PDF: https://site.com/doc.pdf";
$media = Extract::media_urls($content);
// Returns: ['https://example.com/image.jpg', 'https://site.com/doc.pdf']

// WordPress uploads only
$wp_media = Extract::media_urls($content, [], true);
// Returns only URLs containing '/wp-content/uploads/'

// Specific extensions only
$images = Extract::media_urls($content, ['jpg', 'png', 'gif']);
// Returns: ['https://example.com/image.jpg']

oembed_urls( string $string ): array

Extract URLs that support WordPress oEmbed.

$content = "Watch https://www.youtube.com/watch?v=123 and https://example.com/page";
$oembed = Extract::oembed_urls($content);
// Returns: ['https://www.youtube.com/watch?v=123'] (only oEmbed-supported URLs)

Financial Data

amounts( string $string, bool $include_negative = false ): array

Extract monetary amounts as floats.

$content = "Price: $29.99, discount $5.50, total $24.49";
$amounts = Extract::amounts($content);
// Returns: [29.99, 5.50, 24.49]

$content_with_negative = "Profit: $100.00, loss: -$25.50";
$all_amounts = Extract::amounts($content_with_negative, true);
// Returns: [100.00, -25.50]

currencies( string $string, bool $include_negative = false ): array

Extract currency amounts with symbols and values.

$content = "Prices: $29.99, €25.50, £22.00";
$currencies = Extract::currencies($content);
// Returns: [
//     ['original' => '$29.99', 'symbol' => '$', 'value' => 29.99],
//     ['original' => '€25.50', 'symbol' => '€', 'value' => 25.50],
//     ['original' => '£22.00', 'symbol' => '£', 'value' => 22.00]
// ]

vat_numbers( string $string, string $country = '' ): array

Extract VAT numbers from European countries.

$content = "Our VAT numbers: GB123456789, DE987654321, FR12345678901";
$vat_numbers = Extract::vat_numbers($content);
// Returns: ['GB123456789', 'DE987654321', 'FR12345678901']

// Specific country only
$uk_vat = Extract::vat_numbers($content, 'GB');
// Returns: ['GB123456789']

Contact Information

emails( string $string ): array

Extract and validate email addresses.

$content = "Contact info@example.com or support@company.org for help";
$emails = Extract::emails($content);
// Returns: ['info@example.com', 'support@company.org']

phone_numbers( string $string ): array

Extract international phone numbers (ITU-T E.123 standard).

$content = "Call +1 234 567 8900 or +44 20 7946 0958";
$phones = Extract::phone_numbers($content);
// Returns: ['+1 234 567 8900', '+44 20 7946 0958']

ip_addresses( string $string ): array

Extract IP addresses from content.

$content = "Server IPs: 192.168.1.1 and 10.0.0.1, also 2001:db8::1";
$ips = Extract::ip_addresses($content);
// Returns: ['192.168.1.1', '10.0.0.1', '2001:db8::1']

postal_codes( string $string, string $country = '' ): array

Extract postal codes from various countries.

$content = "Addresses: 90210, SW1A 1AA, K1A 0A9, 10115";
$postcodes = Extract::postal_codes($content);
// Returns: ['90210', 'SW1A 1AA', 'K1A 0A9', '10115']

// US ZIP codes only
$us_zips = Extract::postal_codes($content, 'US');
// Returns: ['90210']

WordPress-Specific

shortcodes( string $string ): array

Extract WordPress shortcodes from content.

$content = "Gallery: [gallery ids='1,2,3'] and button: [button text='Click me']";
$shortcodes = Extract::shortcodes($content);
// Returns: ["[gallery ids='1,2,3']", "[button text='Click me']"]

gutenberg_blocks( string $string ): array

Extract Gutenberg block names from post content.

$content = "<!-- wp:paragraph --><p>Text</p><!-- /wp:paragraph --><!-- wp:image -->";
$blocks = Extract::gutenberg_blocks($content);
// Returns: ['paragraph', 'image']

user_ids( string $string, bool $check_all_nums = false ): array

Extract and validate WordPress user IDs.

$content = "Author user_id:123 and editor user_id:456";
$user_ids = Extract::user_ids($content);
// Returns: [123, 456] (only if users exist)

// Check all numbers in string
$content_with_numbers = "User 123 created post 456 with 789 views";
$all_user_ids = Extract::user_ids($content_with_numbers, true);
// Returns: [123] (only valid user IDs)

post_ids( string $string, bool $check_all_nums = false ): array

Extract and validate WordPress post IDs.

$content = "Related posts: post_id:100, post_id:200";
$post_ids = Extract::post_ids($content);
// Returns: [100, 200] (only if posts exist)

term_ids( string $string, bool $check_all_nums = false ): array

Extract and validate WordPress term IDs.

$content = "Categories: term_id:5, term_id:10";
$term_ids = Extract::term_ids($content);
// Returns: [5, 10] (only if terms exist)

usernames( string $string, bool $validate = true ): array

Extract WordPress usernames with optional validation.

$content = "Users: admin john_doe jane-smith invalid@name";
$usernames = Extract::usernames($content, false);
// Returns: ['admin', 'john_doe', 'jane-smith']

// With validation (returns user objects)
$valid_users = Extract::usernames($content, true);
// Returns: [WP_User objects] (only existing users)

post_slugs( string $string, bool $validate = true ): array

Extract WordPress post slugs with optional validation.

$content = "Posts: hello-world sample-post about-us";
$slugs = Extract::post_slugs($content, false);
// Returns: ['hello-world', 'sample-post', 'about-us']

// With validation (returns post objects)
$valid_posts = Extract::post_slugs($content, true);
// Returns: [WP_Post objects] (only existing posts)

Document & Content

isbn_numbers( string $string ): array

Extract ISBN-10 and ISBN-13 numbers.

$content = "Books: ISBN-13: 978-0-596-52068-7 and ISBN-10: 0-596-52068-9";
$isbns = Extract::isbn_numbers($content);
// Returns: ['978-0-596-52068-7', '0-596-52068-9']

html_tables( string $string, bool $header_row = false ): array

Extract table data from HTML content.

$html = "<table><tr><td>Name</td><td>Age</td></tr><tr><td>John</td><td>30</td></tr></table>";
$tables = Extract::html_tables($html);
// Returns: [[['Name', 'Age'], ['John', '30']]]

// With header row structure
$structured = Extract::html_tables($html, true);
// Returns: [['headers' => ['Name', 'Age'], 'rows' => [['John', '30']]]]

html_comments( string $string, bool $exclude_wp_blocks = true ): array

Extract HTML comments from content.

$html = "<!-- Regular comment --><!-- wp:paragraph -->Content<!-- Another comment -->";
$comments = Extract::html_comments($html);
// Returns: ['Regular comment', 'Another comment'] (excludes WP blocks)

// Include all comments
$all_comments = Extract::html_comments($html, false);
// Returns: ['Regular comment', 'wp:paragraph', 'Another comment']

custom_field_patterns( string $string ): array

Extract custom field patterns like [field key="value"].

$content = "[field name='author'] and {{title:My Title}} and %category:News%";
$fields = Extract::custom_field_patterns($content);
// Returns: ['name' => 'author', 'title' => 'My Title', 'category' => 'News']

Technical Data

hex_colors( string $string ): array

Extract hex color codes (#fff, #ffffff).

$content = "Colors: #fff, #000000, #ff5733, and invalid #gg1122";
$colors = Extract::hex_colors($content);
// Returns: ['#fff', '#000000', '#ff5733']

file_extensions( string $string ): array

Extract file extensions from content.

$content = "Files: document.pdf, image.jpg, script.js, archive.tar.gz";
$extensions = Extract::file_extensions($content);
// Returns: ['pdf', 'jpg', 'js', 'gz']

dates( string $string, string $format = 'Y-m-d' ): array

Extract and format dates from various patterns.

$content = "Events: 2024-03-15, 12/25/2023, 01-01-2024";
$dates = Extract::dates($content);
// Returns: ['2024-03-15', '2023-12-25', '2024-01-01']

// Custom format
$formatted = Extract::dates($content, 'm/d/Y');
// Returns: ['03/15/2024', '12/25/2023', '01/01/2024']

times( string $string, string $format = 'H:i:s' ): array

Extract and format times from content.

$content = "Meeting times: 14:30, 9:15 AM, 10:45:30";
$times = Extract::times($content);
// Returns: ['14:30:00', '09:15:00', '10:45:30']

// Custom format
$formatted = Extract::times($content, 'H:i');
// Returns: ['14:30', '09:15', '10:45']

Utility Methods

object_id( mixed $object ): ?int

Extract object ID from various input types.

$post = get_post(123);
$id1 = Extract::object_id($post);        // Returns: 123
$id2 = Extract::object_id("456");        // Returns: 456
$id3 = Extract::object_id(['ID' => 789]); // Returns: 789
$id4 = Extract::object_id("invalid");    // Returns: null

unit_components( string $value, string $default_unit = '' ): array

Extract numeric value and unit components.

$component1 = Extract::unit_components("10px");
// Returns: ['number' => 10, 'unit' => 'px']

$component2 = Extract::unit_components("5.5em");
// Returns: ['number' => 5.5, 'unit' => 'em']

$component3 = Extract::unit_components("100", "percentage");
// Returns: ['number' => 100, 'unit' => 'percentage']

Split Class

The Split class provides methods for splitting strings into components.

Personal Data

full_name( string $string ): array

Split full name into prefix, first, middle, last, and suffix.

$name = "Dr. John Michael Smith Jr.";
$parts = Split::full_name($name);
// Returns: [
//     'prefix' => 'Dr.',
//     'first_name' => 'John',
//     'middle_name' => 'Michael',
//     'last_name' => 'Smith',
//     'suffix' => 'Jr.'
// ]

email( string $email ): ?array

Split email into username and domain components.

$email_parts = Split::email("user@example.com");
// Returns: ['username' => 'user', 'domain' => 'example.com']

$invalid = Split::email("invalid-email");
// Returns: null

Technical Splitting

path( string $path ): array

Split file path into dirname, basename, extension, and filename.

$path_parts = Split::path("/var/www/html/file.txt");
// Returns: [
//     'dirname' => '/var/www/html',
//     'basename' => 'file.txt',
//     'extension' => 'txt',
//     'filename' => 'file'
// ]

url( string $url ): array

Split URL into scheme, host, path, query, and fragment.

$url_parts = Split::url("https://example.com/path?query=value#section");
// Returns: [
//     'scheme' => 'https',
//     'host' => 'example.com',
//     'path' => '/path',
//     'query' => 'query=value',
//     'fragment' => 'section'
// ]

domain( string $url ): array

Split domain into subdomain, domain, and TLD.

$domain_parts = Split::domain("https://blog.example.com/page");
// Returns: [
//     'subdomain' => 'blog',
//     'domain' => 'example',
//     'tld' => 'com'
// ]

version( string $version ): array

Split version string into major, minor, patch, and suffix.

$version_parts = Split::version("2.1.3-beta");
// Returns: [
//     'major' => 2,
//     'minor' => 1,
//     'patch' => 3,
//     'suffix' => 'beta'
// ]

Data Parsing

meta_key_value( string $string ): ?array

Split meta_key:value format into components.

$meta = Split::meta_key_value("color:blue");
// Returns: ['meta_key' => 'color', 'meta_value' => 'blue']

$invalid = Split::meta_key_value("invalid-format");
// Returns: null

unit_value( string $string ): array

Split number and period from strings like "7day".

$unit = Split::unit_value("7days");
// Returns: ['number' => 7, 'period' => 'days']

$unit2 = Split::unit_value("30minutes");
// Returns: ['number' => 30, 'period' => 'minutes']

duration( string $duration ): array

Split duration strings into time components.

$duration = Split::duration("2h 30m 15s");
// Returns: ['hours' => 2, 'minutes' => 30, 'seconds' => 15]

$complex = Split::duration("1 year 2 months 3 weeks 4 days");
// Returns: ['years' => 1, 'months' => 2, 'weeks' => 3, 'days' => 4]

String Processing

camel_case( string $string, string $separator = ' ' ): string

Split camelCase strings into words.

$words = Split::camel_case("firstName");
// Returns: "first Name"

$custom = Split::camel_case("getUserName", "-");
// Returns: "get-User-Name"

chunks( string $string, int $length ): array

Split string into chunks of specified length.

$chunks = Split::chunks("abcdefgh", 3);
// Returns: ['abc', 'def', 'gh']

csv( string $string, string $delimiter = ',' ): array

Split CSV strings handling quoted values.

$csv_data = Split::csv("name,age,city\nJohn,30,\"New York\"\nJane,25,Boston");
// Returns: [
//     ['name', 'age', 'city'],
//     ['John', '30', 'New York'],
//     ['Jane', '25', 'Boston']
// ]

list( string $string, string $delimiter = ',' ): array

Split delimited lists with trimming.

$list = Split::list("apple, banana , orange,  grape");
// Returns: ['apple', 'banana', 'orange', 'grape']

$custom_delimiter = Split::list("red|green|blue", "|");
// Returns: ['red', 'green', 'blue']

Requirements

  • PHP 7.4 or later
  • WordPress 5.0 or later

Contributing

Contributions are welcome! Please feel free to submit a Pull Request.

License

This project is licensed under the GPL-2.0-or-later License.

Support