Skip to content

Commit 5ca5c98

Browse files
committed
refactor(sanitizers)!: enhance sanitizers implementation and test coverage
BREAKING CHANGES: - PhoneSanitizer now removes non-numeric by default, formatting is optional - AlphanumericSanitizer has improved special character handling - All sanitizers now have consistent default behaviors Features: - Add comprehensive test suites for all sanitizers - Add new configuration options for better flexibility - Implement trait-based code sharing between sanitizers Improvements: - Better handling of edge cases in all sanitizers - More consistent and predictable behavior - Enhanced code organization and documentation - Clear separation between sanitization and formatting - Better error handling and validation Sanitizers affected: - HtmlSpecialCharsSanitizer - NormalizeLineBreaksSanitizer - StripTagsSanitizer - TrimSanitizer - EmailSanitizer - NumericSanitizer - PhoneSanitizer - UrlSanitizer - AlphanumericSanitizer Tests: - Add comprehensive test cases - Improve test coverage to 100% - Add edge case testing - Better test organization and documentation
1 parent 15dcb34 commit 5ca5c98

28 files changed

+2061
-160
lines changed
Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
<?php
2+
3+
declare(strict_types=1);
4+
5+
namespace KaririCode\Sanitizer\Processor\Input;
6+
7+
use KaririCode\Contract\Processor\ConfigurableProcessor;
8+
use KaririCode\Sanitizer\Processor\AbstractSanitizerProcessor;
9+
10+
class AlphanumericSanitizer extends AbstractSanitizerProcessor implements ConfigurableProcessor
11+
{
12+
/**
13+
* Special characters that can be optionally allowed.
14+
*/
15+
private const SPECIAL_CHARS = [
16+
'space' => ' ',
17+
'underscore' => '_',
18+
'dash' => '-',
19+
'dot' => '.',
20+
];
21+
22+
private array $allowedSpecialChars = [];
23+
private bool $preserveCase = true;
24+
25+
public function configure(array $options): void
26+
{
27+
$this->configureAllowedChars($options);
28+
$this->configureOptions($options);
29+
}
30+
31+
public function process(mixed $input): string
32+
{
33+
$input = $this->guardAgainstNonString($input);
34+
35+
$sanitized = $this->sanitizeString($input);
36+
37+
return $this->applyCaseTransformation($sanitized);
38+
}
39+
40+
private function configureAllowedChars(array $options): void
41+
{
42+
$this->allowedSpecialChars = [];
43+
44+
foreach (self::SPECIAL_CHARS as $name => $char) {
45+
if (isset($options['allow' . ucfirst($name)]) && true === $options['allow' . ucfirst($name)]) {
46+
$this->allowedSpecialChars[] = preg_quote($char, '/');
47+
}
48+
}
49+
}
50+
51+
private function configureOptions(array $options): void
52+
{
53+
$this->preserveCase = $options['preserveCase'] ?? $this->preserveCase;
54+
}
55+
56+
private function sanitizeString(string $input): string
57+
{
58+
$allowedPattern = $this->buildAllowedPattern();
59+
60+
return preg_replace($allowedPattern, '', $input);
61+
}
62+
63+
private function buildAllowedPattern(): string
64+
{
65+
$allowed = $this->allowedSpecialChars;
66+
67+
if (empty($allowed)) {
68+
return '/[^a-zA-Z0-9]/';
69+
}
70+
71+
return '/[^a-zA-Z0-9' . implode('', $allowed) . ']/';
72+
}
73+
74+
private function applyCaseTransformation(string $input): string
75+
{
76+
if (!$this->preserveCase) {
77+
return strtolower($input);
78+
}
79+
80+
return $input;
81+
}
82+
}
Lines changed: 137 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,137 @@
1+
<?php
2+
3+
declare(strict_types=1);
4+
5+
namespace KaririCode\Sanitizer\Processor\Input;
6+
7+
use KaririCode\Contract\Processor\ConfigurableProcessor;
8+
use KaririCode\Sanitizer\Processor\AbstractSanitizerProcessor;
9+
use KaririCode\Sanitizer\Trait\CaseTransformerTrait;
10+
use KaririCode\Sanitizer\Trait\CharacterFilterTrait;
11+
use KaririCode\Sanitizer\Trait\CharacterReplacementTrait;
12+
use KaririCode\Sanitizer\Trait\WhitespaceSanitizerTrait;
13+
14+
class EmailSanitizer extends AbstractSanitizerProcessor implements ConfigurableProcessor
15+
{
16+
use WhitespaceSanitizerTrait;
17+
use CaseTransformerTrait;
18+
use CharacterReplacementTrait;
19+
use CharacterFilterTrait;
20+
21+
private const COMMON_TYPOS = [
22+
',' => '.',
23+
';' => '.',
24+
'mailto:' => '',
25+
];
26+
private const DOMAIN_REPLACEMENTS = [
27+
'gmail.com' => ['gmial.com', 'gmai.com', 'gmaill.com', 'gamil.com', 'gmail.comm'],
28+
'yahoo.com' => ['yaho.com', 'yahooo.com', 'yahoo.comm'],
29+
'hotmail.com' => ['hotmal.com', 'hotmail.comm', 'hotmal.com'],
30+
'outlook.com' => ['outlok.com', 'outlook.comm', 'outlock.com'],
31+
];
32+
33+
private array $typoReplacements;
34+
private array $domainReplacements;
35+
private bool $removeMailtoPrefix = true;
36+
37+
public function __construct()
38+
{
39+
$this->typoReplacements = self::COMMON_TYPOS;
40+
$this->domainReplacements = self::DOMAIN_REPLACEMENTS;
41+
}
42+
43+
public function configure(array $options): void
44+
{
45+
$this->configureReplacements($options);
46+
$this->configureBehavior($options);
47+
}
48+
49+
public function process(mixed $input): string
50+
{
51+
$input = $this->guardAgainstNonString($input);
52+
53+
return $this->buildSanitizedEmail($input);
54+
}
55+
56+
private function buildSanitizedEmail(string $input): string
57+
{
58+
$email = $this->performBasicSanitization($input);
59+
60+
if ($this->containsAtSymbol($email)) {
61+
$email = $this->processEmailParts($email);
62+
}
63+
64+
return $email;
65+
}
66+
67+
private function performBasicSanitization(string $input): string
68+
{
69+
$email = $this->trimWhitespace($input);
70+
$email = $this->toLowerCase($email);
71+
72+
if ($this->removeMailtoPrefix) {
73+
$email = $this->removeMailtoPrefix($email);
74+
}
75+
76+
$email = $this->replaceMultipleCharacters($email, $this->typoReplacements);
77+
78+
return $this->replaceConsecutiveCharacters($email, '.', '.');
79+
}
80+
81+
private function containsAtSymbol(string $email): bool
82+
{
83+
return str_contains($email, '@');
84+
}
85+
86+
private function processEmailParts(string $email): string
87+
{
88+
[$localPart, $domain] = explode('@', $email, 2);
89+
$domain = $this->fixDomainTypos($domain);
90+
91+
return $localPart . '@' . $domain;
92+
}
93+
94+
private function removeMailtoPrefix(string $email): string
95+
{
96+
return str_replace('mailto:', '', $email);
97+
}
98+
99+
private function fixDomainTypos(string $domain): string
100+
{
101+
foreach ($this->domainReplacements as $correct => $typos) {
102+
if ($this->isDomainTypo($domain, $typos)) {
103+
return $correct;
104+
}
105+
}
106+
107+
return $domain;
108+
}
109+
110+
private function isDomainTypo(string $domain, array $typos): bool
111+
{
112+
return in_array($domain, $typos, true);
113+
}
114+
115+
private function configureReplacements(array $options): void
116+
{
117+
if (isset($options['typoReplacements'])) {
118+
$this->typoReplacements = array_merge(
119+
$this->typoReplacements,
120+
$options['typoReplacements']
121+
);
122+
}
123+
124+
if (isset($options['domainReplacements'])) {
125+
$this->domainReplacements = array_merge(
126+
$this->domainReplacements,
127+
$options['domainReplacements']
128+
);
129+
}
130+
}
131+
132+
private function configureBehavior(array $options): void
133+
{
134+
$this->removeMailtoPrefix = $options['removeMailtoPrefix']
135+
?? $this->removeMailtoPrefix;
136+
}
137+
}

src/Processor/Input/HtmlSpecialCharsSanitizer.php

Lines changed: 24 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,14 +4,36 @@
44

55
namespace KaririCode\Sanitizer\Processor\Input;
66

7+
use KaririCode\Contract\Processor\ConfigurableProcessor;
78
use KaririCode\Sanitizer\Processor\AbstractSanitizerProcessor;
89

9-
class HtmlSpecialCharsSanitizer extends AbstractSanitizerProcessor
10+
class HtmlSpecialCharsSanitizer extends AbstractSanitizerProcessor implements ConfigurableProcessor
1011
{
12+
private int $flags = ENT_QUOTES | ENT_HTML5;
13+
private string $encoding = 'UTF-8';
14+
private bool $doubleEncode = true;
15+
16+
public function configure(array $options): void
17+
{
18+
$this->flags = $options['flags'] ?? $this->flags;
19+
$this->encoding = $options['encoding'] ?? $this->encoding;
20+
$this->doubleEncode = $options['doubleEncode'] ?? $this->doubleEncode;
21+
}
22+
1123
public function process(mixed $input): string
1224
{
1325
$input = $this->guardAgainstNonString($input);
1426

15-
return htmlspecialchars($input, ENT_QUOTES | ENT_HTML5, 'UTF-8');
27+
return $this->escapeSpecialCharacters($input);
28+
}
29+
30+
private function escapeSpecialCharacters(string $input): string
31+
{
32+
return htmlspecialchars(
33+
$input,
34+
$this->flags,
35+
$this->encoding,
36+
$this->doubleEncode
37+
);
1638
}
1739
}

src/Processor/Input/NormalizeLineBreaksSanitizer.php

Lines changed: 67 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,14 +4,79 @@
44

55
namespace KaririCode\Sanitizer\Processor\Input;
66

7+
use KaririCode\Contract\Processor\ConfigurableProcessor;
78
use KaririCode\Sanitizer\Processor\AbstractSanitizerProcessor;
9+
use KaririCode\Sanitizer\Trait\CharacterReplacementTrait;
810

9-
class NormalizeLineBreaksSanitizer extends AbstractSanitizerProcessor
11+
class NormalizeLineBreaksSanitizer extends AbstractSanitizerProcessor implements ConfigurableProcessor
1012
{
13+
use CharacterReplacementTrait;
14+
15+
private const LINE_ENDINGS = [
16+
'windows' => "\r\n",
17+
'mac' => "\r",
18+
'unix' => "\n",
19+
];
20+
21+
private string $targetLineEnding;
22+
23+
public function __construct()
24+
{
25+
$this->targetLineEnding = self::LINE_ENDINGS['unix'];
26+
}
27+
28+
public function configure(array $options): void
29+
{
30+
if (isset($options['lineEnding'])) {
31+
$this->setTargetLineEnding($options['lineEnding']);
32+
}
33+
}
34+
1135
public function process(mixed $input): string
1236
{
1337
$input = $this->guardAgainstNonString($input);
1438

15-
return str_replace(["\r\n", "\r"], "\n", $input);
39+
return $this->normalizeLineEndings($input);
40+
}
41+
42+
private function normalizeLineEndings(string $input): string
43+
{
44+
$normalized = $this->convertToUnixLineEndings($input);
45+
46+
if ($this->shouldConvertLineEndings()) {
47+
return $this->convertToTargetLineEndings($normalized);
48+
}
49+
50+
return $normalized;
51+
}
52+
53+
private function convertToUnixLineEndings(string $input): string
54+
{
55+
// First convert all Windows line endings to Unix
56+
$normalized = str_replace(self::LINE_ENDINGS['windows'], self::LINE_ENDINGS['unix'], $input);
57+
58+
// Then convert any remaining Mac line endings to Unix
59+
return str_replace(self::LINE_ENDINGS['mac'], self::LINE_ENDINGS['unix'], $normalized);
60+
}
61+
62+
private function shouldConvertLineEndings(): bool
63+
{
64+
return $this->targetLineEnding !== self::LINE_ENDINGS['unix'];
65+
}
66+
67+
private function convertToTargetLineEndings(string $input): string
68+
{
69+
return str_replace(self::LINE_ENDINGS['unix'], $this->targetLineEnding, $input);
70+
}
71+
72+
private function setTargetLineEnding(string $type): void
73+
{
74+
$type = strtolower($type);
75+
76+
if (!isset(self::LINE_ENDINGS[$type])) {
77+
return;
78+
}
79+
80+
$this->targetLineEnding = self::LINE_ENDINGS[$type];
1681
}
1782
}

0 commit comments

Comments
 (0)