Files
mopidy-radionet/venv/lib/python3.7/site-packages/uritools/split.py
2020-01-18 20:01:00 +01:00

400 lines
13 KiB
Python

import collections
import collections.abc
import ipaddress
import re
from .encoding import uridecode
_URI_COMPONENTS = ('scheme', 'authority', 'path', 'query', 'fragment')
def _ip_literal(address):
# RFC 3986 3.2.2: In anticipation of future, as-yet-undefined IP
# literal address formats, an implementation may use an optional
# version flag to indicate such a format explicitly rather than
# rely on heuristic determination.
#
# IP-literal = "[" ( IPv6address / IPvFuture ) "]"
#
# IPvFuture = "v" 1*HEXDIG "." 1*( unreserved / sub-delims / ":" )
#
# If a URI containing an IP-literal that starts with "v"
# (case-insensitive), indicating that the version flag is present,
# is dereferenced by an application that does not know the meaning
# of that version flag, then the application should return an
# appropriate error for "address mechanism not supported".
if isinstance(address, bytes):
address = address.decode('ascii')
if address.startswith(u'v'):
raise ValueError('address mechanism not supported')
return ipaddress.IPv6Address(address)
def _ipv4_address(address):
try:
if isinstance(address, bytes):
return ipaddress.IPv4Address(address.decode('ascii'))
else:
return ipaddress.IPv4Address(address)
except ValueError:
return None
class SplitResult(collections.namedtuple('SplitResult', _URI_COMPONENTS)):
"""Base class to hold :func:`urisplit` results."""
__slots__ = () # prevent creation of instance dictionary
@property
def userinfo(self):
authority = self.authority
if authority is None:
return None
userinfo, present, _ = authority.rpartition(self.AT)
if present:
return userinfo
else:
return None
@property
def host(self):
authority = self.authority
if authority is None:
return None
_, _, hostinfo = authority.rpartition(self.AT)
host, _, port = hostinfo.rpartition(self.COLON)
if port.lstrip(self.DIGITS):
return hostinfo
else:
return host
@property
def port(self):
authority = self.authority
if authority is None:
return None
_, present, port = authority.rpartition(self.COLON)
if present and not port.lstrip(self.DIGITS):
return port
else:
return None
def geturi(self):
"""Return the re-combined version of the original URI reference as a
string.
"""
scheme, authority, path, query, fragment = self
# RFC 3986 5.3. Component Recomposition
result = []
if scheme is not None:
result.extend([scheme, self.COLON])
if authority is not None:
result.extend([self.SLASH, self.SLASH, authority])
result.append(path)
if query is not None:
result.extend([self.QUEST, query])
if fragment is not None:
result.extend([self.HASH, fragment])
return self.EMPTY.join(result)
def getscheme(self, default=None):
"""Return the URI scheme in canonical (lowercase) form, or `default`
if the original URI reference did not contain a scheme component.
"""
scheme = self.scheme
if scheme is None:
return default
elif isinstance(scheme, bytes):
return scheme.decode('ascii').lower()
else:
return scheme.lower()
def getauthority(self, default=None, encoding='utf-8', errors='strict'):
"""Return the decoded userinfo, host and port subcomponents of the URI
authority as a three-item tuple.
"""
# TBD: (userinfo, host, port) kwargs, default string?
if default is None:
default = (None, None, None)
elif not isinstance(default, collections.abc.Iterable):
raise TypeError('Invalid default type')
elif len(default) != 3:
raise ValueError('Invalid default length')
# TODO: this could be much more efficient by using a dedicated regex
return (
self.getuserinfo(default[0], encoding, errors),
self.gethost(default[1], errors),
self.getport(default[2])
)
def getuserinfo(self, default=None, encoding='utf-8', errors='strict'):
"""Return the decoded userinfo subcomponent of the URI authority, or
`default` if the original URI reference did not contain a
userinfo field.
"""
userinfo = self.userinfo
if userinfo is None:
return default
else:
return uridecode(userinfo, encoding, errors)
def gethost(self, default=None, errors='strict'):
"""Return the decoded host subcomponent of the URI authority as a
string or an :mod:`ipaddress` address object, or `default` if
the original URI reference did not contain a host.
"""
host = self.host
if host is None or (not host and default is not None):
return default
elif host.startswith(self.LBRACKET) and host.endswith(self.RBRACKET):
return _ip_literal(host[1:-1])
elif host.startswith(self.LBRACKET) or host.endswith(self.RBRACKET):
raise ValueError('Invalid host %r' % host)
# TODO: faster check for IPv4 address?
return _ipv4_address(host) or uridecode(host, 'utf-8', errors).lower()
def getport(self, default=None):
"""Return the port subcomponent of the URI authority as an
:class:`int`, or `default` if the original URI reference did
not contain a port or if the port was empty.
"""
port = self.port
if port:
return int(port)
else:
return default
def getpath(self, encoding='utf-8', errors='strict'):
"""Return the normalized decoded URI path."""
path = self.__remove_dot_segments(self.path)
return uridecode(path, encoding, errors)
def getquery(self, default=None, encoding='utf-8', errors='strict'):
"""Return the decoded query string, or `default` if the original URI
reference did not contain a query component.
"""
query = self.query
if query is None:
return default
else:
return uridecode(query, encoding, errors)
def getquerydict(self, sep='&', encoding='utf-8', errors='strict'):
"""Split the query component into individual `name=value` pairs
separated by `sep` and return a dictionary of query variables.
The dictionary keys are the unique query variable names and
the values are lists of values for each name.
"""
dict = collections.defaultdict(list)
for name, value in self.getquerylist(sep, encoding, errors):
dict[name].append(value)
return dict
def getquerylist(self, sep='&', encoding='utf-8', errors='strict'):
"""Split the query component into individual `name=value` pairs
separated by `sep`, and return a list of `(name, value)`
tuples.
"""
if not self.query:
return []
elif isinstance(sep, type(self.query)):
qsl = self.query.split(sep)
elif isinstance(sep, bytes):
qsl = self.query.split(sep.decode('ascii'))
else:
qsl = self.query.split(sep.encode('ascii'))
items = []
for parts in [qs.partition(self.EQ) for qs in qsl if qs]:
name = uridecode(parts[0], encoding, errors)
if parts[1]:
value = uridecode(parts[2], encoding, errors)
else:
value = None
items.append((name, value))
return items
def getfragment(self, default=None, encoding='utf-8', errors='strict'):
"""Return the decoded fragment identifier, or `default` if the
original URI reference did not contain a fragment component.
"""
fragment = self.fragment
if fragment is None:
return default
else:
return uridecode(fragment, encoding, errors)
def isuri(self):
"""Return :const:`True` if this is a URI."""
return self.scheme is not None
def isabsuri(self):
"""Return :const:`True` if this is an absolute URI."""
return self.scheme is not None and self.fragment is None
def isnetpath(self):
"""Return :const:`True` if this is a network-path reference."""
return self.scheme is None and self.authority is not None
def isabspath(self):
"""Return :const:`True` if this is an absolute-path reference."""
return (self.scheme is None and self.authority is None and
self.path.startswith(self.SLASH))
def isrelpath(self):
"""Return :const:`True` if this is a relative-path reference."""
return (self.scheme is None and self.authority is None and
not self.path.startswith(self.SLASH))
def issamedoc(self):
"""Return :const:`True` if this is a same-document reference."""
return (self.scheme is None and self.authority is None and
not self.path and self.query is None)
def transform(self, ref, strict=False):
"""Transform a URI reference relative to `self` into a
:class:`SplitResult` representing its target URI.
"""
scheme, authority, path, query, fragment = self.RE.match(ref).groups()
# RFC 3986 5.2.2. Transform References
if scheme is not None and (strict or scheme != self.scheme):
path = self.__remove_dot_segments(path)
elif authority is not None:
scheme = self.scheme
path = self.__remove_dot_segments(path)
elif not path:
scheme = self.scheme
authority = self.authority
path = self.path
query = self.query if query is None else query
elif path.startswith(self.SLASH):
scheme = self.scheme
authority = self.authority
path = self.__remove_dot_segments(path)
else:
scheme = self.scheme
authority = self.authority
path = self.__remove_dot_segments(self.__merge(path))
return type(self)(scheme, authority, path, query, fragment)
def __merge(self, path):
# RFC 3986 5.2.3. Merge Paths
if self.authority is not None and not self.path:
return self.SLASH + path
else:
parts = self.path.rpartition(self.SLASH)
return parts[1].join((parts[0], path))
@classmethod
def __remove_dot_segments(cls, path):
# RFC 3986 5.2.4. Remove Dot Segments
pseg = []
for s in path.split(cls.SLASH):
if s == cls.DOT:
continue
elif s != cls.DOTDOT:
pseg.append(s)
elif len(pseg) == 1 and not pseg[0]:
continue
elif pseg and pseg[-1] != cls.DOTDOT:
pseg.pop()
else:
pseg.append(s)
# adjust for trailing '/.' or '/..'
if path.rpartition(cls.SLASH)[2] in (cls.DOT, cls.DOTDOT):
pseg.append(cls.EMPTY)
if path and len(pseg) == 1 and pseg[0] == cls.EMPTY:
pseg.insert(0, cls.DOT)
return cls.SLASH.join(pseg)
class SplitResultBytes(SplitResult):
__slots__ = () # prevent creation of instance dictionary
# RFC 3986 Appendix B
RE = re.compile(br"""
(?:([A-Za-z][A-Za-z0-9+.-]*):)? # scheme (RFC 3986 3.1)
(?://([^/?#]*))? # authority
([^?#]*) # path
(?:\?([^#]*))? # query
(?:\#(.*))? # fragment
""", flags=re.VERBOSE)
# RFC 3986 2.2 gen-delims
COLON, SLASH, QUEST, HASH, LBRACKET, RBRACKET, AT = (
b':', b'/', b'?', b'#', b'[', b']', b'@'
)
# RFC 3986 3.3 dot-segments
DOT, DOTDOT = b'.', b'..'
EMPTY, EQ = b'', b'='
DIGITS = b'0123456789'
class SplitResultString(SplitResult):
__slots__ = () # prevent creation of instance dictionary
# RFC 3986 Appendix B
RE = re.compile(r"""
(?:([A-Za-z][A-Za-z0-9+.-]*):)? # scheme (RFC 3986 3.1)
(?://([^/?#]*))? # authority
([^?#]*) # path
(?:\?([^#]*))? # query
(?:\#(.*))? # fragment
""", flags=re.VERBOSE)
# RFC 3986 2.2 gen-delims
COLON, SLASH, QUEST, HASH, LBRACKET, RBRACKET, AT = (
u':', u'/', u'?', u'#', u'[', u']', u'@'
)
# RFC 3986 3.3 dot-segments
DOT, DOTDOT = u'.', u'..'
EMPTY, EQ = u'', u'='
DIGITS = u'0123456789'
def urisplit(uristring):
"""Split a well-formed URI reference string into a tuple with five
components corresponding to a URI's general structure::
<scheme>://<authority>/<path>?<query>#<fragment>
"""
if isinstance(uristring, bytes):
result = SplitResultBytes
else:
result = SplitResultString
return result(*result.RE.match(uristring).groups())
def uriunsplit(parts):
"""Combine the elements of a five-item iterable into a URI reference's
string representation.
"""
scheme, authority, path, query, fragment = parts
if isinstance(path, bytes):
result = SplitResultBytes
else:
result = SplitResultString
return result(scheme, authority, path, query, fragment).geturi()