From 113499ae9f211e51abab10f65eb8bc9fc7a0b99f Mon Sep 17 00:00:00 2001 From: Henrique Moody Date: Sat, 31 Jan 2026 01:46:11 +0100 Subject: [PATCH] Add TrimFormatter for configurable string edge trimming Allows precise control over trimming operations with support for left, right, or both sides and custom characters, using PHP's mb_trim, mb_ltrim, and mb_rtrim functions for proper multibyte-safe trimming. Includes comprehensive tests covering all trim modes, custom characters, Unicode characters (CJK, emoji), special characters, multi-byte strings, and edge cases like empty strings and strings shorter than the characters to trim. Assisted-by: OpenCode (GLM-4.7) --- README.md | 1 + docs/TrimFormatter.md | 145 +++++++++++++++++++++++++++ src/Mixin/Builder.php | 3 + src/Mixin/Chain.php | 3 + src/TrimFormatter.php | 41 ++++++++ tests/Unit/TrimFormatterTest.php | 166 +++++++++++++++++++++++++++++++ 6 files changed, 359 insertions(+) create mode 100644 docs/TrimFormatter.md create mode 100644 src/TrimFormatter.php create mode 100644 tests/Unit/TrimFormatterTest.php diff --git a/README.md b/README.md index 3c55d5c..2affeb8 100644 --- a/README.md +++ b/README.md @@ -71,6 +71,7 @@ See the [PlaceholderFormatter documentation](docs/PlaceholderFormatter.md) and [ | [PlaceholderFormatter](docs/PlaceholderFormatter.md) | Template interpolation with placeholder replacement | | [SecureCreditCardFormatter](docs/SecureCreditCardFormatter.md) | Masked credit card formatting for secure display | | [TimeFormatter](docs/TimeFormatter.md) | Time promotion (mil, c, dec, y, mo, w, d, h, min, s, ms, us, ns) | +| [TrimFormatter](docs/TrimFormatter.md) | Remove whitespace from string edges | | [UppercaseFormatter](docs/UppercaseFormatter.md) | Convert string to uppercase | ## Contributing diff --git a/docs/TrimFormatter.md b/docs/TrimFormatter.md new file mode 100644 index 0000000..201351b --- /dev/null +++ b/docs/TrimFormatter.md @@ -0,0 +1,145 @@ + + +# TrimFormatter + +The `TrimFormatter` removes characters from the edges of strings with configurable characters and side selection, fully supporting UTF-8 Unicode characters. + +## Usage + +### Basic Usage + +```php +use Respect\StringFormatter\TrimFormatter; + +$formatter = new TrimFormatter(); + +echo $formatter->format(' hello world '); +// Outputs: "hello world" +``` + +### Trim Specific Side + +```php +use Respect\StringFormatter\TrimFormatter; + +$formatter = new TrimFormatter('left'); + +echo $formatter->format(' hello '); +// Outputs: "hello " + +$formatterRight = new TrimFormatter('right'); + +echo $formatterRight->format(' hello '); +// Outputs: " hello" +``` + +### Custom Characters + +```php +use Respect\StringFormatter\TrimFormatter; + +$formatter = new TrimFormatter('both', '-._'); + +echo $formatter->format('---hello---'); +// Outputs: "hello" + +echo $formatter->format('._hello_._'); +// Outputs: "hello" +``` + +### Unicode Characters + +```php +use Respect\StringFormatter\TrimFormatter; + +// CJK full-width spaces are trimmed by default +$formatter = new TrimFormatter(); + +echo $formatter->format(' hello世界 '); +// Outputs: "hello世界" + +// Trim emoji with custom characters +$formatterEmoji = new TrimFormatter('both', '😊'); + +echo $formatterEmoji->format('😊hello😊'); +// Outputs: "hello" +``` + +## API + +### `TrimFormatter::__construct` + +- `__construct(string $side = "both", string|null $characters = null)` + +Creates a new trim formatter instance. + +**Parameters:** + +- `$side`: Which side(s) to trim: "left", "right", or "both" (default: "both") +- `$characters`: The characters to trim from the string edges, or `null` for default Unicode whitespace (default: `null`) + +**Throws:** `InvalidFormatterException` when `$side` is not "left", "right", or "both" + +### `format` + +- `format(string $input): string` + +Removes characters from the specified side(s) of the input string. + +**Parameters:** + +- `$input`: The string to trim + +**Returns:** The trimmed string + +## Examples + +| Side | Characters | Input | Output | Description | +| --------- | -------------- | --------------- | ------------ | ----------------------------------- | +| `"both"` | `null` | `" hello "` | `"hello"` | Trim default whitespace both sides | +| `"left"` | `null` | `" hello "` | `"hello "` | Trim default whitespace left only | +| `"right"` | `null` | `" hello "` | `" hello"` | Trim default whitespace right only | +| `"both"` | `"-"` | `"---hello---"` | `"hello"` | Trim hyphens from both sides | +| `"both"` | `"-._"` | `"-._hello_.-"` | `"hello"` | Trim multiple custom characters | +| `"left"` | `":"` | `":::hello:::"` | `"hello:::"` | Trim colons from left only | +| `"both"` | `null` | `" hello"` | `"hello"` | CJK space trimmed by default | +| `"both"` | `"😊"` | `"😊hello😊"` | `"hello"` | Trim emoji with custom characters | + +## Notes + +- Uses PHP's `mb_trim`, `mb_ltrim`, and `mb_rtrim` functions for multibyte-safe trimming +- Fully UTF-8 aware - handles all Unicode scripts including CJK, emoji, and complex characters +- Empty strings return empty strings +- If the characters string is empty or contains no characters present in the input, the string is returned unchanged +- Trimming operations are character-oriented, not byte-oriented + +### Default Characters + +When no characters are provided (`null`), the formatter uses `mb_trim`'s default which includes all Unicode whitespace characters: + +**ASCII whitespace:** +- ` ` (U+0020): Ordinary space +- `\t` (U+0009): Tab +- `\n` (U+000A): New line (line feed) +- `\r` (U+000D): Carriage return +- `\0` (U+0000): NUL-byte +- `\v` (U+000B): Vertical tab +- `\f` (U+000C): Form feed + +**Unicode whitespace:** +- U+00A0: No-break space +- U+1680: Ogham space mark +- U+2000–U+200A: Various width spaces (en quad, em quad, en space, em space, etc.) +- U+2028: Line separator +- U+2029: Paragraph separator +- U+202F: Narrow no-break space +- U+205F: Medium mathematical space +- U+3000: Ideographic space (CJK full-width space) +- U+0085: Next line (NEL) +- U+180E: Mongolian vowel separator + +See [mb_trim documentation](https://www.php.net/manual/en/function.mb-trim.php) for the complete list. diff --git a/src/Mixin/Builder.php b/src/Mixin/Builder.php index e6240d2..abe9ce7 100644 --- a/src/Mixin/Builder.php +++ b/src/Mixin/Builder.php @@ -51,5 +51,8 @@ public static function secureCreditCard(string $maskChar = '*'): Chain; public static function time(string $unit): Chain; + /** @param 'both'|'left'|'right' $side */ + public static function trim(string $side, string|null $characters): Chain; + public static function uppercase(): Chain; } diff --git a/src/Mixin/Chain.php b/src/Mixin/Chain.php index 7d47b2c..6614f5a 100644 --- a/src/Mixin/Chain.php +++ b/src/Mixin/Chain.php @@ -50,5 +50,8 @@ public function secureCreditCard(string $maskChar = '*'): Chain; public function time(string $unit): Chain; + /** @param 'both'|'left'|'right' $side */ + public function trim(string $side, string|null $characters): Chain; + public function uppercase(): Chain; } diff --git a/src/TrimFormatter.php b/src/TrimFormatter.php new file mode 100644 index 0000000..38e8673 --- /dev/null +++ b/src/TrimFormatter.php @@ -0,0 +1,41 @@ + + */ + +declare(strict_types=1); + +namespace Respect\StringFormatter; + +use function in_array; +use function mb_ltrim; +use function mb_rtrim; +use function mb_trim; +use function sprintf; + +final readonly class TrimFormatter implements Formatter +{ + /** @param 'both'|'left'|'right' $side */ + public function __construct( + private string $side = 'both', + private string|null $characters = null, + ) { + if (!in_array($this->side, ['left', 'right', 'both'], true)) { + throw new InvalidFormatterException( + sprintf('Invalid side "%s". Must be "left", "right", or "both".', $this->side), + ); + } + } + + public function format(string $input): string + { + return match ($this->side) { + 'left' => mb_ltrim($input, $this->characters), + 'right' => mb_rtrim($input, $this->characters), + default => mb_trim($input, $this->characters), + }; + } +} diff --git a/tests/Unit/TrimFormatterTest.php b/tests/Unit/TrimFormatterTest.php new file mode 100644 index 0000000..9aed3ae --- /dev/null +++ b/tests/Unit/TrimFormatterTest.php @@ -0,0 +1,166 @@ + + */ + +declare(strict_types=1); + +namespace Respect\StringFormatter\Test\Unit; + +use PHPUnit\Framework\Attributes\CoversClass; +use PHPUnit\Framework\Attributes\DataProvider; +use PHPUnit\Framework\Attributes\Test; +use PHPUnit\Framework\TestCase; +use Respect\StringFormatter\InvalidFormatterException; +use Respect\StringFormatter\TrimFormatter; + +#[CoversClass(TrimFormatter::class)] +final class TrimFormatterTest extends TestCase +{ + #[Test] + #[DataProvider('providerForWhitespace')] + #[DataProvider('providerForSides')] + #[DataProvider('providerForCustomMask')] + #[DataProvider('providerForSpecialChars')] + #[DataProvider('providerForUnicode')] + #[DataProvider('providerForEmoji')] + #[DataProvider('providerForMultiByte')] + #[DataProvider('providerForEdgeCases')] + public function itShouldTrimString(string $input, string $expected, string $side, string|null $mask = null): void + { + // @phpstan-ignore argument.type + $formatter = new TrimFormatter($side, $mask); + + $actual = $formatter->format($input); + + self::assertSame($expected, $actual); + } + + #[Test] + public function itShouldThrowExceptionForInvalidSide(): void + { + $this->expectException(InvalidFormatterException::class); + $this->expectExceptionMessage('Invalid side "middle"'); + + // @phpstan-ignore argument.type + new TrimFormatter('middle'); + } + + /** @return array */ + public static function providerForWhitespace(): array + { + return [ + 'whitespace both sides' => [' hello ', 'hello', 'both'], + 'tab both sides' => ["\thello\t", 'hello', 'both'], + 'newline both sides' => ["\nhello\n", 'hello', 'both'], + 'mixed whitespace both' => [" \t\n hello \t\n", 'hello', 'both'], + 'already trimmed both' => ['hello', 'hello', 'both'], + 'only spaces both' => [' ', '', 'both'], + 'ideographic space both' => ["\u{3000}hello\u{3000}", 'hello', 'both'], + 'em space both' => ["\u{2003}hello\u{2003}", 'hello', 'both'], + 'no-break space both' => ["\u{00A0}hello\u{00A0}", 'hello', 'both'], + 'thin space both' => ["\u{2009}hello\u{2009}", 'hello', 'both'], + 'mixed unicode whitespace both' => ["\u{3000}\u{2003} hello \u{00A0}\u{2009}", 'hello', 'both'], + 'narrow no-break space both' => ["\u{202F}hello \u{202F}", 'hello', 'both'], + ]; + } + + /** @return array */ + public static function providerForSides(): array + { + return [ + 'spaces left' => [' hello', 'hello', 'left'], + 'spaces right not trimmed left' => ['hello ', 'hello ', 'left'], + 'spaces left and right left' => [' hello ', 'hello ', 'left'], + 'tabs left' => ["\thello\t", "hello\t", 'left'], + 'mixed whitespace left' => ["\t\n hello world", 'hello world', 'left'], + 'spaces right' => ['hello ', 'hello', 'right'], + 'spaces left not trimmed right' => [' hello', ' hello', 'right'], + 'spaces left and right right' => [' hello ', ' hello', 'right'], + 'tabs right' => ["\thello\t", "\thello", 'right'], + 'mixed whitespace right' => ["hello world \t", 'hello world', 'right'], + ]; + } + + /** @return array */ + public static function providerForCustomMask(): array + { + return [ + 'custom characters both' => ['---hello---', 'hello', 'both', '-'], + 'multiple custom chars both' => ['-._hello-._', 'hello', 'both', '_.-'], + 'dots both' => ['...hello...', 'hello', 'both', '.'], + 'underscores both' => ['___hello___', 'hello', 'both', '_'], + 'mixed custom both' => ['*-+hello+-*', 'hello', 'both', '+-*'], + 'dash left' => ['--hello--', 'hello--', 'left', '-'], + 'dash right' => ['--hello--', '--hello', 'right', '-'], + 'all characters to trim both' => [' !!! ', '!!!', 'both', ' '], + ]; + } + + /** @return array */ + public static function providerForSpecialChars(): array + { + return [ + 'asterisk both' => ['**hello**', 'hello', 'both', '*'], + 'dollar sign both' => ['$$hello$$', 'hello', 'both', '$'], + 'caret both' => ['^^hello^^', 'hello', 'both', '^'], + 'pipe both' => ['||hello||', 'hello', 'both', '|'], + 'question mark both' => ['??hello??', 'hello', 'both', '?'], + 'multiple special both' => ['@#$hello$#@', 'hello', 'both', '@#$'], + ]; + } + + /** @return array */ + public static function providerForUnicode(): array + { + return [ + 'latin accented chars both' => ['éééhelloééé', 'hello', 'both', 'é'], + 'greek letters both' => ['αααhelloααα', 'hello', 'both', 'α'], + 'cyrillic letters both' => ['бббhelloббб', 'hello', 'both', 'б'], + 'chinese characters both' => ['中中hello中中', 'hello', 'both', '中'], + 'japanese hiragana both' => ['あああhelloあああ', 'hello', 'both', 'あ'], + ]; + } + + /** @return array */ + public static function providerForEmoji(): array + { + return [ + 'smiley faces both' => ['😊😊hello😊😊', 'hello', 'both', '😊'], + 'mixed emoji both' => ['👋👋hi👋👋', 'hi', 'both', '👋'], + 'hearts both' => ['❤️❤️love❤️❤️', 'love', 'both', '❤️'], + ]; + } + + /** @return array */ + public static function providerForMultiByte(): array + { + return [ + 'chinese with ideographic space both' => [' 你好 ', '你好', 'both'], + 'japanese with ideographic space both' => [' こんにちは ', 'こんにちは', 'both'], + 'korean with ideographic space both' => [' 안녕하세요 ', '안녕하세요', 'both'], + 'fullwidth letters with custom mask both' => ['aaahelloaaa', 'hello', 'both', 'a'], + 'mixed cjk and ascii both' => [' hello 你好 ', 'hello 你好', 'both'], + ]; + } + + /** @return array */ + public static function providerForEdgeCases(): array + { + return [ + 'empty string both' => ['', '', 'both', ' '], + 'string shorter than mask both' => ['a', '', 'both', 'abcdef'], + 'all characters trimmed both' => ['--', '', 'both', '-'], + 'only one side trimmed left' => ['--a', 'a', 'left', '-'], + 'only one side trimmed right' => ['a--', 'a', 'right', '-'], + 'no characters to trim both' => ['hello', 'hello', 'both', 'xyz'], + 'mask longer than string both' => ['hello', 'hello', 'both', 'abcdefgzij'], + 'empty mask both' => ['hello', 'hello', 'both', ''], + 'repeated characters both' => ['aaaaahelloaaaaa', 'hello', 'both', 'a'], + 'interleaved characters both' => ['ababhelloabab', 'hello', 'both', 'ab'], + ]; + } +}