From 0c02d868d6dc3097834b4a21d8fe92e7360ae7ac Mon Sep 17 00:00:00 2001 From: Steve Ayers Date: Wed, 26 Mar 2025 20:54:36 -0400 Subject: [PATCH 1/2] Proposal --- protovalidate/internal/extra_func.py | 202 +++++++++++---------------- 1 file changed, 78 insertions(+), 124 deletions(-) diff --git a/protovalidate/internal/extra_func.py b/protovalidate/internal/extra_func.py index 09ba69e3..ee42870a 100644 --- a/protovalidate/internal/extra_func.py +++ b/protovalidate/internal/extra_func.py @@ -152,16 +152,7 @@ def is_email(string: celtypes.Value) -> celpy.Result: unexpected forms of email addresses and will easily match a typographical error. - Args: - string (celTypes.Value): The string to validate. - - Returns: - True if the string is an email address, for example "foo@example.com". False otherwise. - - Raises: - celpy.CELEvalError: If string is not an instance of celtypes.StringType. """ - if not isinstance(string, celtypes.StringType): msg = "invalid argument, expected string" raise celpy.CELEvalError(msg) @@ -175,16 +166,7 @@ def is_uri(string: celtypes.Value) -> celpy.Result: URI is defined in the internet standard RFC 3986. Zone Identifiers in IPv6 address literals are supported (RFC 6874). - Args: - string (celTypes.Value): The string to validate. - - Returns: - True if the string is a URI, for example "https://example.com/foo/bar?baz=quux#frag". False otherwise. - - Raises: - celpy.CELEvalError: If string is not an instance of celtypes.StringType. """ - if not isinstance(string, celtypes.StringType): msg = "invalid argument, expected string" raise celpy.CELEvalError(msg) @@ -198,17 +180,7 @@ def is_uri_ref(string: celtypes.Value) -> celpy.Result: URI, URI Reference, and Relative Reference are defined in the internet standard RFC 3986. Zone Identifiers in IPv6 address literals are supported (RFC 6874). - Args: - string (celTypes.Value): The string to validate. - - Returns: - True if the string is a URI Reference - a URI such as "https://example.com/foo/bar?baz=quux#frag" - or a Relative Reference such as "./foo/bar?query". False otherwise. - - Raises: - celpy.CELEvalError: If string is not an instance of celtypes.StringType. """ - if not isinstance(string, celtypes.StringType): msg = "invalid argument, expected string" raise celpy.CELEvalError(msg) @@ -266,27 +238,14 @@ def unique(val: celtypes.Value) -> celpy.Result: class Uri: - """Uri is a class used to parse a given string to determine if it is a valid URI or URI reference. - - Callers can validate a string by constructing an instance of this class and then calling one of its - public methods: - uri() - uri_reference() - - Each method will return True or False depending on whether it passes validation. - """ + """Uri is a class used to parse a given string to determine if it is a valid URI or URI reference.""" _string: str _index: int _pct_encoded_found: bool def __init__(self, string: str): - """Initialize a URI validation class with a given string - - Args: - string (str): String to validate as a URI or URI reference. - """ - + """Initialize a URI validation class with a given string.""" super().__init__() self._string = string self._index = 0 @@ -296,9 +255,9 @@ def uri(self) -> bool: Method parses the rule: - URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ] - """ + URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ] + """ start = self._index if not (self.__scheme() and self.__take(":") and self.__hier_part()): self._index = start @@ -321,9 +280,9 @@ def uri_reference(self) -> bool: Method parses the rule: - URI-reference = URI / relative-ref - """ + URI-reference = URI / relative-ref + """ return self.uri() or self.__relative_ref() def __hier_part(self) -> bool: @@ -331,12 +290,12 @@ def __hier_part(self) -> bool: Method parses the rule: - hier-part = "//" authority path-abempty. - / path-absolute - / path-rootless - / path-empty - """ + hier-part = "//" authority path-abempty. + / path-absolute + / path-rootless + / path-empty + """ start = self._index if self.__take("/") and self.__take("/") and self.__authority() and self.__path_abempty(): return True @@ -350,9 +309,9 @@ def __relative_ref(self) -> bool: Method parses the rule: - relative-ref = relative-part [ "?" query ] [ "#" fragment ] - """ + relative-ref = relative-part [ "?" query ] [ "#" fragment ] + """ start = self._index if not self.__relative_part(): return False @@ -376,12 +335,12 @@ def __relative_part(self) -> bool: Method parses the rule: - relative-part = "//" authority path-abempty - / path-absolute - / path-noscheme - / path-empty - """ + relative-part = "//" authority path-abempty + / path-absolute + / path-noscheme + / path-empty + """ start = self._index if self.__take("/") and self.__take("/") and self.__authority() and self.__path_abempty(): return True @@ -395,11 +354,11 @@ def __scheme(self) -> bool: Method parses the rule: - scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." ) + scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." ) Terminated by ":". - """ + """ start = self._index if self.__alpha(): while self.__alpha() or self.__digit() or self.__take("+") or self.__take("-") or self.__take("."): @@ -416,11 +375,11 @@ def __authority(self) -> bool: Method parses the rule: - authority = [ userinfo "@" ] host [ ":" port ] + authority = [ userinfo "@" ] host [ ":" port ] Lead by double slash ("") and terminated by "/", "?", "#", or end of URI. - """ + """ start = self._index if self.__userinfo(): if not self.__take("@"): @@ -448,8 +407,8 @@ def __is_authority_end(self) -> bool: The authority component [...] is terminated by the next slash ("/"), question mark ("?"), or number sign ("#") character, or by the end of the URI. - """ + """ return ( self._index >= len(self._string) or self._string[self._index] == "?" @@ -462,11 +421,11 @@ def __userinfo(self) -> bool: Method parses the rule: - userinfo = *( unreserved / pct-encoded / sub-delims / ":" ) + userinfo = *( unreserved / pct-encoded / sub-delims / ":" ) Terminated by "@" in authority. - """ + """ start = self._index while True: if self.__unreserved() or self.__pct_encoded() or self.__sub_delims() or self.__take(":"): @@ -480,7 +439,7 @@ def __userinfo(self) -> bool: return False def __check_host_pct_encoded(self, string: str) -> bool: - """Verify that string is correctly percent-encoded""" + """Verify that string is correctly percent-encoded.""" try: # unquote defaults to 'UTF-8' encoding. urlparse.unquote(string, errors="strict") @@ -494,9 +453,9 @@ def __host(self) -> bool: Method parses the rule: - host = IP-literal / IPv4address / reg-name. - """ + host = IP-literal / IPv4address / reg-name. + """ if self._index >= len(self._string): return False @@ -522,11 +481,11 @@ def __port(self) -> bool: Method parses the rule: - port = *DIGIT + port = *DIGIT Terminated by end of authority. - """ + """ start = self._index while True: if self.__digit(): @@ -543,9 +502,9 @@ def __ip_literal(self) -> bool: Method parses the rule from RFC 6874: - IP-literal = "[" ( IPv6address / IPv6addrz / IPvFuture ) "]" - """ + IP-literal = "[" ( IPv6address / IPv6addrz / IPvFuture ) "]" + """ start = self._index if self.__take("["): @@ -572,8 +531,8 @@ def __ipv6_address(self) -> bool: Method parses the rule "IPv6address". Relies on the implementation of validate_ip. - """ + """ start = self._index while self.__hex_dig() or self.__take(":"): pass @@ -589,9 +548,9 @@ def __ipv6_addrz(self) -> bool: Method parses the rule from RFC 6874: - IPv6addrz = IPv6address "%25" ZoneID - """ + IPv6addrz = IPv6address "%25" ZoneID + """ start = self._index if self.__ipv6_address() and self.__take("%") and self.__take("2") and self.__take("5") and self.__zone_id(): return True @@ -605,9 +564,9 @@ def __zone_id(self) -> bool: Method parses the rule from RFC 6874: - ZoneID = 1*( unreserved / pct-encoded ) - """ + ZoneID = 1*( unreserved / pct-encoded ) + """ start = self._index while self.__unreserved() or self.__pct_encoded(): pass @@ -624,9 +583,9 @@ def __ip_vfuture(self) -> bool: Method parses the rule: - IPvFuture = "v" 1*HEXDIG "." 1*( unreserved / sub-delims / ":" ) - """ + IPvFuture = "v" 1*HEXDIG "." 1*( unreserved / sub-delims / ":" ) + """ start = self._index if self.__take("v") and self.__hex_dig(): @@ -650,11 +609,11 @@ def __reg_name(self) -> bool: Method parses the rule: - reg-name = *( unreserved / pct-encoded / sub-delims ) + reg-name = *( unreserved / pct-encoded / sub-delims ) Terminates on start of port (":") or end of authority. - """ + """ start = self._index while True: if self.__unreserved() or self.__pct_encoded() or self.__sub_delims(): @@ -676,8 +635,8 @@ def __is_path_end(self) -> bool: > The path is terminated by the first question mark ("?") or > number sign ("#") character, or by the end of the URI. - """ + """ return self._index >= len(self._string) or self._string[self._index] == "?" or self._string[self._index] == "#" def __path_abempty(self) -> bool: @@ -685,11 +644,11 @@ def __path_abempty(self) -> bool: Method parses the rule: - path-abempty = *( "/" segment ) + path-abempty = *( "/" segment ) Terminated by end of path: "?", "#", or end of URI. - """ + """ start = self._index while self.__take("/") and self.__segment(): pass @@ -706,11 +665,11 @@ def __path_absolute(self) -> bool: Method parses the rule: - path-absolute = "/" [ segment-nz *( "/" segment ) ] + path-absolute = "/" [ segment-nz *( "/" segment ) ] Terminated by end of path: "?", "#", or end of URI. - """ + """ start = self._index if self.__take("/"): @@ -730,11 +689,11 @@ def __path_noscheme(self) -> bool: Method parses the rule: - path-noscheme = segment-nz-nc *( "/" segment ) + path-noscheme = segment-nz-nc *( "/" segment ) Terminated by end of path: "?", "#", or end of URI. - """ + """ start = self._index if self.__segment_nz_nc(): while self.__take("/") and self.__segment(): @@ -752,11 +711,11 @@ def __path_rootless(self) -> bool: Method parses the rule: - path-rootless = segment-nz *( "/" segment ) + path-rootless = segment-nz *( "/" segment ) Terminated by end of path: "?", "#", or end of URI. - """ + """ start = self._index if self.__segment_nz(): @@ -775,11 +734,11 @@ def __path_empty(self) -> bool: Method parses the rule: - path-empty = 0 + path-empty = 0 Terminated by end of path: "?", "#", or end of URI. - """ + """ return self.__is_path_end() def __segment(self) -> bool: @@ -787,9 +746,9 @@ def __segment(self) -> bool: Method parses the rule: - segment = *pchar - """ + segment = *pchar + """ while self.__pchar(): pass @@ -800,9 +759,9 @@ def __segment_nz(self) -> bool: Method parses the rule: - segment-nz = 1*pchar - """ + segment-nz = 1*pchar + """ start = self._index if self.__pchar(): @@ -820,10 +779,10 @@ def __segment_nz_nc(self) -> bool: Method parses the rule: - segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" ) - ; non-zero-length segment without any colon ":" - """ + segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" ) + ; non-zero-length segment without any colon ":" + """ start = self._index while self.__unreserved() or self.__pct_encoded() or self.__sub_delims() or self.__take("@"): @@ -841,9 +800,9 @@ def __pchar(self) -> bool: Method parses the rule: - pchar = unreserved / pct-encoded / sub-delims / ":" / "@" - """ + pchar = unreserved / pct-encoded / sub-delims / ":" / "@" + """ return ( self.__unreserved() or self.__pct_encoded() or self.__sub_delims() or self.__take(":") or self.__take("@") ) @@ -853,11 +812,11 @@ def __query(self) -> bool: Method parses the rule: - query = *( pchar / "/" / "?" ) + query = *( pchar / "/" / "?" ) Terminated by "#" or end of URI. - """ + """ start = self._index while True: @@ -876,11 +835,11 @@ def __fragment(self) -> bool: Method parses the rule: - fragment = *( pchar / "/" / "?" ) + fragment = *( pchar / "/" / "?" ) Terminated by end of URI. - """ + """ start = self._index while True: @@ -899,11 +858,11 @@ def __pct_encoded(self) -> bool: Method parses the rule: - pct-encoded = "%" HEXDIG HEXDIG + pct-encoded = "%" HEXDIG HEXDIG Sets `_pct_encoded_found` to true if a valid triplet was found - """ + """ start = self._index if self.__take("%") and self.__hex_dig() and self.__hex_dig(): @@ -919,9 +878,9 @@ def __unreserved(self) -> bool: Method parses the rule: - unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" - """ + unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" + """ return ( self.__alpha() or self.__digit() @@ -936,10 +895,10 @@ def __sub_delims(self) -> bool: Method parses the rule: - sub-delims = "!" / "$" / "&" / "'" / "(" / ")" - / "*" / "+" / "," / ";" / "=" - """ + sub-delims = "!" / "$" / "&" / "'" / "(" / ")" + / "*" / "+" / "," / ";" / "=" + """ return ( self.__take("!") or self.__take("$") @@ -959,9 +918,9 @@ def __alpha(self) -> bool: Method parses the rule: - ALPHA = %x41-5A / %x61-7A ; A-Z / a-z - """ + ALPHA = %x41-5A / %x61-7A ; A-Z / a-z + """ if self._index >= len(self._string): return False @@ -977,9 +936,9 @@ def __digit(self) -> bool: Method parses the rule: - DIGIT = %x30-39 ; 0-9 - """ + DIGIT = %x30-39 ; 0-9 + """ if self._index >= len(self._string): return False @@ -995,9 +954,9 @@ def __hex_dig(self) -> bool: Method parses the rule: - HEXDIG = DIGIT / "A" / "B" / "C" / "D" / "E" / "F" - """ + HEXDIG = DIGIT / "A" / "B" / "C" / "D" / "E" / "F" + """ if self._index >= len(self._string): return False @@ -1014,12 +973,7 @@ def __take(self, char: str) -> bool: """Take the given char at the current index. If char is at the current index, increment the index. - - Returns: - True if char is at the current index. False if char is not at the - current index or the end of string has been reached. """ - if self._index >= len(self._string): return False From 001449db42182481769a0a0f8d66d7774a022b26 Mon Sep 17 00:00:00 2001 From: Steve Ayers Date: Thu, 27 Mar 2025 08:48:55 -0400 Subject: [PATCH 2/2] Make more consistent --- protovalidate/internal/extra_func.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/protovalidate/internal/extra_func.py b/protovalidate/internal/extra_func.py index ee42870a..3ad72b1c 100644 --- a/protovalidate/internal/extra_func.py +++ b/protovalidate/internal/extra_func.py @@ -145,7 +145,7 @@ def is_ip_prefix(val: celtypes.Value, *args) -> celpy.Result: def is_email(string: celtypes.Value) -> celpy.Result: - """Validate whether string is a valid email address. + """Return true if the string is an email address, for example "foo@example.com". Conforms to the definition for a valid email address from the HTML standard. Note that this standard willfully deviates from RFC 5322, which allows many @@ -161,7 +161,7 @@ def is_email(string: celtypes.Value) -> celpy.Result: def is_uri(string: celtypes.Value) -> celpy.Result: - """Validate whether string is a valid URI. + """Return true if the string is a URI, for example "https://example.com/foo/bar?baz=quux#frag". URI is defined in the internet standard RFC 3986. Zone Identifiers in IPv6 address literals are supported (RFC 6874). @@ -175,7 +175,8 @@ def is_uri(string: celtypes.Value) -> celpy.Result: def is_uri_ref(string: celtypes.Value) -> celpy.Result: - """Validate whether string is a valid URI reference. + """Return true if the string is a URI Reference - a URI such as "https://example.com/foo/bar?baz=quux#frag" or + a Relative Reference such as "./foo/bar?query". URI, URI Reference, and Relative Reference are defined in the internet standard RFC 3986. Zone Identifiers in IPv6 address literals are supported (RFC 6874).