Python3 Migrate
This commit is contained in:
204
venv/lib/python3.7/site-packages/uritools/compose.py
Normal file
204
venv/lib/python3.7/site-packages/uritools/compose.py
Normal file
@@ -0,0 +1,204 @@
|
||||
import collections
|
||||
import collections.abc
|
||||
import ipaddress
|
||||
import numbers
|
||||
import re
|
||||
|
||||
from .chars import SUB_DELIMS
|
||||
from .encoding import uriencode
|
||||
from .split import uriunsplit
|
||||
|
||||
# RFC 3986 3.1: scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
|
||||
_SCHEME_RE = re.compile(b'^[A-Za-z][A-Za-z0-9+.-]*$')
|
||||
|
||||
# RFC 3986 3.2: authority = [ userinfo "@" ] host [ ":" port ]
|
||||
_AUTHORITY_RE_BYTES = re.compile(b'^(?:(.*)@)?(.*?)(?::([0-9]*))?$')
|
||||
_AUTHORITY_RE_STR = re.compile(u'^(?:(.*)@)?(.*?)(?::([0-9]*))?$')
|
||||
|
||||
# safe component characters
|
||||
_SAFE_USERINFO = SUB_DELIMS + ':'
|
||||
_SAFE_HOST = SUB_DELIMS
|
||||
_SAFE_PATH = SUB_DELIMS + ':@/'
|
||||
_SAFE_QUERY = SUB_DELIMS + ':@/?'
|
||||
_SAFE_FRAGMENT = SUB_DELIMS + ':@/?'
|
||||
|
||||
|
||||
def _scheme(scheme):
|
||||
if _SCHEME_RE.match(scheme):
|
||||
return scheme.lower()
|
||||
else:
|
||||
raise ValueError('Invalid scheme component')
|
||||
|
||||
|
||||
def _authority(userinfo, host, port, encoding):
|
||||
authority = []
|
||||
|
||||
if userinfo is not None:
|
||||
authority.append(uriencode(userinfo, _SAFE_USERINFO, encoding))
|
||||
authority.append(b'@')
|
||||
|
||||
if isinstance(host, ipaddress.IPv6Address):
|
||||
authority.append(b'[' + host.compressed.encode() + b']')
|
||||
elif isinstance(host, ipaddress.IPv4Address):
|
||||
authority.append(host.compressed.encode())
|
||||
elif isinstance(host, bytes):
|
||||
authority.append(_host(host))
|
||||
elif host is not None:
|
||||
authority.append(_host(host.encode('utf-8')))
|
||||
|
||||
if isinstance(port, numbers.Number):
|
||||
authority.append(_port(str(port).encode()))
|
||||
elif isinstance(port, bytes):
|
||||
authority.append(_port(port))
|
||||
elif port is not None:
|
||||
authority.append(_port(port.encode()))
|
||||
|
||||
return b''.join(authority) if authority else None
|
||||
|
||||
|
||||
def _ip_literal(address):
|
||||
if address.startswith('v'):
|
||||
raise ValueError('Address mechanism not supported')
|
||||
else:
|
||||
return b'[' + ipaddress.IPv6Address(address).compressed.encode() + b']'
|
||||
|
||||
|
||||
def _host(host):
|
||||
# RFC 3986 3.2.3: Although host is case-insensitive, producers and
|
||||
# normalizers should use lowercase for registered names and
|
||||
# hexadecimal addresses for the sake of uniformity, while only
|
||||
# using uppercase letters for percent-encodings.
|
||||
if host.startswith(b'[') and host.endswith(b']'):
|
||||
return _ip_literal(host[1:-1].decode())
|
||||
# check for IPv6 addresses as returned by SplitResult.gethost()
|
||||
try:
|
||||
return _ip_literal(host.decode('utf-8'))
|
||||
except ValueError:
|
||||
return uriencode(host, _SAFE_HOST, 'utf-8').lower()
|
||||
|
||||
|
||||
def _port(port):
|
||||
# RFC 3986 3.2.3: URI producers and normalizers should omit the
|
||||
# port component and its ":" delimiter if port is empty or if its
|
||||
# value would be the same as that of the scheme's default.
|
||||
if port.lstrip(b'0123456789'):
|
||||
raise ValueError('Invalid port subcomponent')
|
||||
elif port:
|
||||
return b':' + port
|
||||
else:
|
||||
return b''
|
||||
|
||||
|
||||
def _querylist(items, sep, encoding):
|
||||
terms = []
|
||||
append = terms.append
|
||||
safe = _SAFE_QUERY.replace(sep, '')
|
||||
for key, value in items:
|
||||
name = uriencode(key, safe, encoding)
|
||||
if value is None:
|
||||
append(name)
|
||||
elif isinstance(value, (bytes, str)):
|
||||
append(name + b'=' + uriencode(value, safe, encoding))
|
||||
else:
|
||||
append(name + b'=' + uriencode(str(value), safe, encoding))
|
||||
return sep.encode('ascii').join(terms)
|
||||
|
||||
|
||||
def _querydict(mapping, sep, encoding):
|
||||
items = []
|
||||
for key, value in mapping.items():
|
||||
if isinstance(value, (bytes, str)):
|
||||
items.append((key, value))
|
||||
elif isinstance(value, collections.abc.Iterable):
|
||||
items.extend([(key, v) for v in value])
|
||||
else:
|
||||
items.append((key, value))
|
||||
return _querylist(items, sep, encoding)
|
||||
|
||||
|
||||
def uricompose(scheme=None, authority=None, path='', query=None,
|
||||
fragment=None, userinfo=None, host=None, port=None,
|
||||
querysep='&', encoding='utf-8'):
|
||||
"""Compose a URI reference string from its individual components."""
|
||||
|
||||
# RFC 3986 3.1: Scheme names consist of a sequence of characters
|
||||
# beginning with a letter and followed by any combination of
|
||||
# letters, digits, plus ("+"), period ("."), or hyphen ("-").
|
||||
# Although schemes are case-insensitive, the canonical form is
|
||||
# lowercase and documents that specify schemes must do so with
|
||||
# lowercase letters. An implementation should accept uppercase
|
||||
# letters as equivalent to lowercase in scheme names (e.g., allow
|
||||
# "HTTP" as well as "http") for the sake of robustness but should
|
||||
# only produce lowercase scheme names for consistency.
|
||||
if isinstance(scheme, bytes):
|
||||
scheme = _scheme(scheme)
|
||||
elif scheme is not None:
|
||||
scheme = _scheme(scheme.encode())
|
||||
|
||||
# authority must be string type or three-item iterable
|
||||
if authority is None:
|
||||
authority = (None, None, None)
|
||||
elif isinstance(authority, bytes):
|
||||
authority = _AUTHORITY_RE_BYTES.match(authority).groups()
|
||||
elif isinstance(authority, str):
|
||||
authority = _AUTHORITY_RE_STR.match(authority).groups()
|
||||
elif not isinstance(authority, collections.abc.Iterable):
|
||||
raise TypeError('Invalid authority type')
|
||||
elif len(authority) != 3:
|
||||
raise ValueError('Invalid authority length')
|
||||
authority = _authority(
|
||||
userinfo if userinfo is not None else authority[0],
|
||||
host if host is not None else authority[1],
|
||||
port if port is not None else authority[2],
|
||||
encoding
|
||||
)
|
||||
|
||||
# RFC 3986 3.3: If a URI contains an authority component, then the
|
||||
# path component must either be empty or begin with a slash ("/")
|
||||
# character. If a URI does not contain an authority component,
|
||||
# then the path cannot begin with two slash characters ("//").
|
||||
path = uriencode(path, _SAFE_PATH, encoding)
|
||||
if authority is not None and path and not path.startswith(b'/'):
|
||||
raise ValueError('Invalid path with authority component')
|
||||
if authority is None and path.startswith(b'//'):
|
||||
raise ValueError('Invalid path without authority component')
|
||||
|
||||
# RFC 3986 4.2: A path segment that contains a colon character
|
||||
# (e.g., "this:that") cannot be used as the first segment of a
|
||||
# relative-path reference, as it would be mistaken for a scheme
|
||||
# name. Such a segment must be preceded by a dot-segment (e.g.,
|
||||
# "./this:that") to make a relative-path reference.
|
||||
if scheme is None and authority is None and not path.startswith(b'/'):
|
||||
if b':' in path.partition(b'/')[0]:
|
||||
path = b'./' + path
|
||||
|
||||
# RFC 3986 3.4: The characters slash ("/") and question mark ("?")
|
||||
# may represent data within the query component. Beware that some
|
||||
# older, erroneous implementations may not handle such data
|
||||
# correctly when it is used as the base URI for relative
|
||||
# references (Section 5.1), apparently because they fail to
|
||||
# distinguish query data from path data when looking for
|
||||
# hierarchical separators. However, as query components are often
|
||||
# used to carry identifying information in the form of "key=value"
|
||||
# pairs and one frequently used value is a reference to another
|
||||
# URI, it is sometimes better for usability to avoid percent-
|
||||
# encoding those characters.
|
||||
if isinstance(query, (bytes, str)):
|
||||
query = uriencode(query, _SAFE_QUERY, encoding)
|
||||
elif isinstance(query, collections.abc.Mapping):
|
||||
query = _querydict(query, querysep, encoding)
|
||||
elif isinstance(query, collections.abc.Iterable):
|
||||
query = _querylist(query, querysep, encoding)
|
||||
elif query is not None:
|
||||
raise TypeError('Invalid query type')
|
||||
|
||||
# RFC 3986 3.5: The characters slash ("/") and question mark ("?")
|
||||
# are allowed to represent data within the fragment identifier.
|
||||
# Beware that some older, erroneous implementations may not handle
|
||||
# this data correctly when it is used as the base URI for relative
|
||||
# references.
|
||||
if fragment is not None:
|
||||
fragment = uriencode(fragment, _SAFE_FRAGMENT, encoding)
|
||||
|
||||
# return URI reference as `str`
|
||||
return uriunsplit((scheme, authority, path, query, fragment)).decode()
|
||||
Reference in New Issue
Block a user