Python3 Migrate
This commit is contained in:
40
venv/lib/python3.7/site-packages/uritools/__init__.py
Normal file
40
venv/lib/python3.7/site-packages/uritools/__init__.py
Normal file
@@ -0,0 +1,40 @@
|
||||
"""RFC 3986 compliant, scheme-agnostic replacement for `urllib.parse`.
|
||||
|
||||
This module defines RFC 3986 compliant replacements for the most
|
||||
commonly used functions of the Python Standard Library
|
||||
:mod:`urllib.parse` module.
|
||||
|
||||
"""
|
||||
|
||||
from .chars import GEN_DELIMS, RESERVED, SUB_DELIMS, UNRESERVED
|
||||
from .classify import isabspath, isabsuri, isnetpath, isrelpath
|
||||
from .classify import issamedoc, isuri
|
||||
from .compose import uricompose
|
||||
from .defrag import DefragResult, uridefrag
|
||||
from .encoding import uridecode, uriencode
|
||||
from .join import urijoin
|
||||
from .split import SplitResult, urisplit, uriunsplit
|
||||
|
||||
__all__ = (
|
||||
'GEN_DELIMS',
|
||||
'RESERVED',
|
||||
'SUB_DELIMS',
|
||||
'UNRESERVED',
|
||||
'DefragResult',
|
||||
'SplitResult',
|
||||
'isabspath',
|
||||
'isabsuri',
|
||||
'isnetpath',
|
||||
'isrelpath',
|
||||
'issamedoc',
|
||||
'isuri',
|
||||
'uricompose',
|
||||
'uridecode',
|
||||
'uridefrag',
|
||||
'uriencode',
|
||||
'urijoin',
|
||||
'urisplit',
|
||||
'uriunsplit'
|
||||
)
|
||||
|
||||
__version__ = '3.0.0'
|
||||
23
venv/lib/python3.7/site-packages/uritools/chars.py
Normal file
23
venv/lib/python3.7/site-packages/uritools/chars.py
Normal file
@@ -0,0 +1,23 @@
|
||||
# RFC 3986 2.2. Reserved Characters
|
||||
#
|
||||
# reserved = gen-delims / sub-delims
|
||||
#
|
||||
# gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@"
|
||||
#
|
||||
# sub-delims = "!" / "$" / "&" / "'" / "(" / ")"
|
||||
# / "*" / "+" / "," / ";" / "="
|
||||
#
|
||||
GEN_DELIMS = ':/?#[]@'
|
||||
SUB_DELIMS = "!$&'()*+,;="
|
||||
RESERVED = GEN_DELIMS + SUB_DELIMS
|
||||
|
||||
# RFC 3986 2.3. Unreserved Characters
|
||||
#
|
||||
# unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
|
||||
#
|
||||
UNRESERVED = (
|
||||
'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
|
||||
'abcdefghijklmnopqrstuvwxyz'
|
||||
'0123456789'
|
||||
'-._~'
|
||||
)
|
||||
33
venv/lib/python3.7/site-packages/uritools/classify.py
Normal file
33
venv/lib/python3.7/site-packages/uritools/classify.py
Normal file
@@ -0,0 +1,33 @@
|
||||
from .split import urisplit
|
||||
|
||||
# TODO: use specialized checks/regexes for performance
|
||||
|
||||
|
||||
def isuri(uristring):
|
||||
"""Return :const:`True` if `uristring` is a URI."""
|
||||
return urisplit(uristring).isuri()
|
||||
|
||||
|
||||
def isabsuri(uristring):
|
||||
"""Return :const:`True` if `uristring` is an absolute URI."""
|
||||
return urisplit(uristring).isabsuri()
|
||||
|
||||
|
||||
def isnetpath(uristring):
|
||||
"""Return :const:`True` if `uristring` is a network-path reference."""
|
||||
return urisplit(uristring).isnetpath()
|
||||
|
||||
|
||||
def isabspath(uristring):
|
||||
"""Return :const:`True` if `uristring` is an absolute-path reference."""
|
||||
return urisplit(uristring).isabspath()
|
||||
|
||||
|
||||
def isrelpath(uristring):
|
||||
"""Return :const:`True` if `uristring` is a relative-path reference."""
|
||||
return urisplit(uristring).isrelpath()
|
||||
|
||||
|
||||
def issamedoc(uristring):
|
||||
"""Return :const:`True` if `uristring` is a same-document reference."""
|
||||
return urisplit(uristring).issamedoc()
|
||||
204
venv/lib/python3.7/site-packages/uritools/compose.py
Normal file
204
venv/lib/python3.7/site-packages/uritools/compose.py
Normal file
@@ -0,0 +1,204 @@
|
||||
import collections
|
||||
import collections.abc
|
||||
import ipaddress
|
||||
import numbers
|
||||
import re
|
||||
|
||||
from .chars import SUB_DELIMS
|
||||
from .encoding import uriencode
|
||||
from .split import uriunsplit
|
||||
|
||||
# RFC 3986 3.1: scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
|
||||
_SCHEME_RE = re.compile(b'^[A-Za-z][A-Za-z0-9+.-]*$')
|
||||
|
||||
# RFC 3986 3.2: authority = [ userinfo "@" ] host [ ":" port ]
|
||||
_AUTHORITY_RE_BYTES = re.compile(b'^(?:(.*)@)?(.*?)(?::([0-9]*))?$')
|
||||
_AUTHORITY_RE_STR = re.compile(u'^(?:(.*)@)?(.*?)(?::([0-9]*))?$')
|
||||
|
||||
# safe component characters
|
||||
_SAFE_USERINFO = SUB_DELIMS + ':'
|
||||
_SAFE_HOST = SUB_DELIMS
|
||||
_SAFE_PATH = SUB_DELIMS + ':@/'
|
||||
_SAFE_QUERY = SUB_DELIMS + ':@/?'
|
||||
_SAFE_FRAGMENT = SUB_DELIMS + ':@/?'
|
||||
|
||||
|
||||
def _scheme(scheme):
|
||||
if _SCHEME_RE.match(scheme):
|
||||
return scheme.lower()
|
||||
else:
|
||||
raise ValueError('Invalid scheme component')
|
||||
|
||||
|
||||
def _authority(userinfo, host, port, encoding):
|
||||
authority = []
|
||||
|
||||
if userinfo is not None:
|
||||
authority.append(uriencode(userinfo, _SAFE_USERINFO, encoding))
|
||||
authority.append(b'@')
|
||||
|
||||
if isinstance(host, ipaddress.IPv6Address):
|
||||
authority.append(b'[' + host.compressed.encode() + b']')
|
||||
elif isinstance(host, ipaddress.IPv4Address):
|
||||
authority.append(host.compressed.encode())
|
||||
elif isinstance(host, bytes):
|
||||
authority.append(_host(host))
|
||||
elif host is not None:
|
||||
authority.append(_host(host.encode('utf-8')))
|
||||
|
||||
if isinstance(port, numbers.Number):
|
||||
authority.append(_port(str(port).encode()))
|
||||
elif isinstance(port, bytes):
|
||||
authority.append(_port(port))
|
||||
elif port is not None:
|
||||
authority.append(_port(port.encode()))
|
||||
|
||||
return b''.join(authority) if authority else None
|
||||
|
||||
|
||||
def _ip_literal(address):
|
||||
if address.startswith('v'):
|
||||
raise ValueError('Address mechanism not supported')
|
||||
else:
|
||||
return b'[' + ipaddress.IPv6Address(address).compressed.encode() + b']'
|
||||
|
||||
|
||||
def _host(host):
|
||||
# RFC 3986 3.2.3: Although host is case-insensitive, producers and
|
||||
# normalizers should use lowercase for registered names and
|
||||
# hexadecimal addresses for the sake of uniformity, while only
|
||||
# using uppercase letters for percent-encodings.
|
||||
if host.startswith(b'[') and host.endswith(b']'):
|
||||
return _ip_literal(host[1:-1].decode())
|
||||
# check for IPv6 addresses as returned by SplitResult.gethost()
|
||||
try:
|
||||
return _ip_literal(host.decode('utf-8'))
|
||||
except ValueError:
|
||||
return uriencode(host, _SAFE_HOST, 'utf-8').lower()
|
||||
|
||||
|
||||
def _port(port):
|
||||
# RFC 3986 3.2.3: URI producers and normalizers should omit the
|
||||
# port component and its ":" delimiter if port is empty or if its
|
||||
# value would be the same as that of the scheme's default.
|
||||
if port.lstrip(b'0123456789'):
|
||||
raise ValueError('Invalid port subcomponent')
|
||||
elif port:
|
||||
return b':' + port
|
||||
else:
|
||||
return b''
|
||||
|
||||
|
||||
def _querylist(items, sep, encoding):
|
||||
terms = []
|
||||
append = terms.append
|
||||
safe = _SAFE_QUERY.replace(sep, '')
|
||||
for key, value in items:
|
||||
name = uriencode(key, safe, encoding)
|
||||
if value is None:
|
||||
append(name)
|
||||
elif isinstance(value, (bytes, str)):
|
||||
append(name + b'=' + uriencode(value, safe, encoding))
|
||||
else:
|
||||
append(name + b'=' + uriencode(str(value), safe, encoding))
|
||||
return sep.encode('ascii').join(terms)
|
||||
|
||||
|
||||
def _querydict(mapping, sep, encoding):
|
||||
items = []
|
||||
for key, value in mapping.items():
|
||||
if isinstance(value, (bytes, str)):
|
||||
items.append((key, value))
|
||||
elif isinstance(value, collections.abc.Iterable):
|
||||
items.extend([(key, v) for v in value])
|
||||
else:
|
||||
items.append((key, value))
|
||||
return _querylist(items, sep, encoding)
|
||||
|
||||
|
||||
def uricompose(scheme=None, authority=None, path='', query=None,
|
||||
fragment=None, userinfo=None, host=None, port=None,
|
||||
querysep='&', encoding='utf-8'):
|
||||
"""Compose a URI reference string from its individual components."""
|
||||
|
||||
# RFC 3986 3.1: Scheme names consist of a sequence of characters
|
||||
# beginning with a letter and followed by any combination of
|
||||
# letters, digits, plus ("+"), period ("."), or hyphen ("-").
|
||||
# Although schemes are case-insensitive, the canonical form is
|
||||
# lowercase and documents that specify schemes must do so with
|
||||
# lowercase letters. An implementation should accept uppercase
|
||||
# letters as equivalent to lowercase in scheme names (e.g., allow
|
||||
# "HTTP" as well as "http") for the sake of robustness but should
|
||||
# only produce lowercase scheme names for consistency.
|
||||
if isinstance(scheme, bytes):
|
||||
scheme = _scheme(scheme)
|
||||
elif scheme is not None:
|
||||
scheme = _scheme(scheme.encode())
|
||||
|
||||
# authority must be string type or three-item iterable
|
||||
if authority is None:
|
||||
authority = (None, None, None)
|
||||
elif isinstance(authority, bytes):
|
||||
authority = _AUTHORITY_RE_BYTES.match(authority).groups()
|
||||
elif isinstance(authority, str):
|
||||
authority = _AUTHORITY_RE_STR.match(authority).groups()
|
||||
elif not isinstance(authority, collections.abc.Iterable):
|
||||
raise TypeError('Invalid authority type')
|
||||
elif len(authority) != 3:
|
||||
raise ValueError('Invalid authority length')
|
||||
authority = _authority(
|
||||
userinfo if userinfo is not None else authority[0],
|
||||
host if host is not None else authority[1],
|
||||
port if port is not None else authority[2],
|
||||
encoding
|
||||
)
|
||||
|
||||
# RFC 3986 3.3: If a URI contains an authority component, then the
|
||||
# path component must either be empty or begin with a slash ("/")
|
||||
# character. If a URI does not contain an authority component,
|
||||
# then the path cannot begin with two slash characters ("//").
|
||||
path = uriencode(path, _SAFE_PATH, encoding)
|
||||
if authority is not None and path and not path.startswith(b'/'):
|
||||
raise ValueError('Invalid path with authority component')
|
||||
if authority is None and path.startswith(b'//'):
|
||||
raise ValueError('Invalid path without authority component')
|
||||
|
||||
# RFC 3986 4.2: A path segment that contains a colon character
|
||||
# (e.g., "this:that") cannot be used as the first segment of a
|
||||
# relative-path reference, as it would be mistaken for a scheme
|
||||
# name. Such a segment must be preceded by a dot-segment (e.g.,
|
||||
# "./this:that") to make a relative-path reference.
|
||||
if scheme is None and authority is None and not path.startswith(b'/'):
|
||||
if b':' in path.partition(b'/')[0]:
|
||||
path = b'./' + path
|
||||
|
||||
# RFC 3986 3.4: The characters slash ("/") and question mark ("?")
|
||||
# may represent data within the query component. Beware that some
|
||||
# older, erroneous implementations may not handle such data
|
||||
# correctly when it is used as the base URI for relative
|
||||
# references (Section 5.1), apparently because they fail to
|
||||
# distinguish query data from path data when looking for
|
||||
# hierarchical separators. However, as query components are often
|
||||
# used to carry identifying information in the form of "key=value"
|
||||
# pairs and one frequently used value is a reference to another
|
||||
# URI, it is sometimes better for usability to avoid percent-
|
||||
# encoding those characters.
|
||||
if isinstance(query, (bytes, str)):
|
||||
query = uriencode(query, _SAFE_QUERY, encoding)
|
||||
elif isinstance(query, collections.abc.Mapping):
|
||||
query = _querydict(query, querysep, encoding)
|
||||
elif isinstance(query, collections.abc.Iterable):
|
||||
query = _querylist(query, querysep, encoding)
|
||||
elif query is not None:
|
||||
raise TypeError('Invalid query type')
|
||||
|
||||
# RFC 3986 3.5: The characters slash ("/") and question mark ("?")
|
||||
# are allowed to represent data within the fragment identifier.
|
||||
# Beware that some older, erroneous implementations may not handle
|
||||
# this data correctly when it is used as the base URI for relative
|
||||
# references.
|
||||
if fragment is not None:
|
||||
fragment = uriencode(fragment, _SAFE_FRAGMENT, encoding)
|
||||
|
||||
# return URI reference as `str`
|
||||
return uriunsplit((scheme, authority, path, query, fragment)).decode()
|
||||
41
venv/lib/python3.7/site-packages/uritools/defrag.py
Normal file
41
venv/lib/python3.7/site-packages/uritools/defrag.py
Normal file
@@ -0,0 +1,41 @@
|
||||
import collections
|
||||
|
||||
from .encoding import uridecode
|
||||
|
||||
|
||||
class DefragResult(collections.namedtuple('DefragResult', 'uri fragment')):
|
||||
"""Class to hold :func:`uridefrag` results."""
|
||||
|
||||
__slots__ = () # prevent creation of instance dictionary
|
||||
|
||||
def geturi(self):
|
||||
"""Return the recombined version of the original URI as a string."""
|
||||
fragment = self.fragment
|
||||
if fragment is None:
|
||||
return self.uri
|
||||
elif isinstance(fragment, bytes):
|
||||
return self.uri + b'#' + fragment
|
||||
else:
|
||||
return self.uri + u'#' + fragment
|
||||
|
||||
def getfragment(self, default=None, encoding='utf-8', errors='strict'):
|
||||
"""Return the decoded fragment identifier, or `default` if the
|
||||
original URI did not contain a fragment component.
|
||||
|
||||
"""
|
||||
fragment = self.fragment
|
||||
if fragment is not None:
|
||||
return uridecode(fragment, encoding, errors)
|
||||
else:
|
||||
return default
|
||||
|
||||
|
||||
def uridefrag(uristring):
|
||||
"""Remove an existing fragment component from a URI reference string.
|
||||
|
||||
"""
|
||||
if isinstance(uristring, bytes):
|
||||
parts = uristring.partition(b'#')
|
||||
else:
|
||||
parts = uristring.partition(u'#')
|
||||
return DefragResult(parts[0], parts[2] if parts[1] else None)
|
||||
53
venv/lib/python3.7/site-packages/uritools/encoding.py
Normal file
53
venv/lib/python3.7/site-packages/uritools/encoding.py
Normal file
@@ -0,0 +1,53 @@
|
||||
from string import hexdigits as _hex
|
||||
|
||||
from .chars import UNRESERVED
|
||||
|
||||
|
||||
# RFC 3986 2.1: For consistency, URI producers and normalizers should
|
||||
# use uppercase hexadecimal digits for all percent-encodings.
|
||||
def _pctenc(byte):
|
||||
return ('%%%02X' % byte).encode()
|
||||
|
||||
|
||||
_unreserved = frozenset(UNRESERVED.encode())
|
||||
|
||||
_encoded = {
|
||||
b'': [bytes([i]) if i in _unreserved else _pctenc(i) for i in range(256)]
|
||||
}
|
||||
|
||||
_decoded = {
|
||||
(a + b).encode(): bytes.fromhex(a + b) for a in _hex for b in _hex
|
||||
}
|
||||
|
||||
|
||||
def uriencode(uristring, safe='', encoding='utf-8', errors='strict'):
|
||||
"""Encode a URI string or string component."""
|
||||
if not isinstance(uristring, bytes):
|
||||
uristring = uristring.encode(encoding, errors)
|
||||
if not isinstance(safe, bytes):
|
||||
safe = safe.encode('ascii')
|
||||
try:
|
||||
encoded = _encoded[safe]
|
||||
except KeyError:
|
||||
encoded = _encoded[b''][:]
|
||||
for i in safe:
|
||||
encoded[i] = bytes([i])
|
||||
_encoded[safe] = encoded
|
||||
return b''.join(map(encoded.__getitem__, uristring))
|
||||
|
||||
|
||||
def uridecode(uristring, encoding='utf-8', errors='strict'):
|
||||
"""Decode a URI string or string component."""
|
||||
if not isinstance(uristring, bytes):
|
||||
uristring = uristring.encode(encoding or 'ascii', errors)
|
||||
parts = uristring.split(b'%')
|
||||
result = [parts[0]]
|
||||
append = result.append
|
||||
decode = _decoded.get
|
||||
for s in parts[1:]:
|
||||
append(decode(s[:2], b'%' + s[:2]))
|
||||
append(s[2:])
|
||||
if encoding is not None:
|
||||
return b''.join(result).decode(encoding, errors)
|
||||
else:
|
||||
return b''.join(result)
|
||||
14
venv/lib/python3.7/site-packages/uritools/join.py
Normal file
14
venv/lib/python3.7/site-packages/uritools/join.py
Normal file
@@ -0,0 +1,14 @@
|
||||
from .split import urisplit
|
||||
|
||||
|
||||
def urijoin(base, ref, strict=False):
|
||||
"""Convert a URI reference relative to a base URI to its target URI
|
||||
string.
|
||||
|
||||
"""
|
||||
if isinstance(base, type(ref)):
|
||||
return urisplit(base).transform(ref, strict).geturi()
|
||||
elif isinstance(base, bytes):
|
||||
return urisplit(base.decode()).transform(ref, strict).geturi()
|
||||
else:
|
||||
return urisplit(base).transform(ref.decode(), strict).geturi()
|
||||
399
venv/lib/python3.7/site-packages/uritools/split.py
Normal file
399
venv/lib/python3.7/site-packages/uritools/split.py
Normal file
@@ -0,0 +1,399 @@
|
||||
import collections
|
||||
import collections.abc
|
||||
import ipaddress
|
||||
import re
|
||||
|
||||
from .encoding import uridecode
|
||||
|
||||
_URI_COMPONENTS = ('scheme', 'authority', 'path', 'query', 'fragment')
|
||||
|
||||
|
||||
def _ip_literal(address):
|
||||
# RFC 3986 3.2.2: In anticipation of future, as-yet-undefined IP
|
||||
# literal address formats, an implementation may use an optional
|
||||
# version flag to indicate such a format explicitly rather than
|
||||
# rely on heuristic determination.
|
||||
#
|
||||
# IP-literal = "[" ( IPv6address / IPvFuture ) "]"
|
||||
#
|
||||
# IPvFuture = "v" 1*HEXDIG "." 1*( unreserved / sub-delims / ":" )
|
||||
#
|
||||
# If a URI containing an IP-literal that starts with "v"
|
||||
# (case-insensitive), indicating that the version flag is present,
|
||||
# is dereferenced by an application that does not know the meaning
|
||||
# of that version flag, then the application should return an
|
||||
# appropriate error for "address mechanism not supported".
|
||||
if isinstance(address, bytes):
|
||||
address = address.decode('ascii')
|
||||
if address.startswith(u'v'):
|
||||
raise ValueError('address mechanism not supported')
|
||||
return ipaddress.IPv6Address(address)
|
||||
|
||||
|
||||
def _ipv4_address(address):
|
||||
try:
|
||||
if isinstance(address, bytes):
|
||||
return ipaddress.IPv4Address(address.decode('ascii'))
|
||||
else:
|
||||
return ipaddress.IPv4Address(address)
|
||||
except ValueError:
|
||||
return None
|
||||
|
||||
|
||||
class SplitResult(collections.namedtuple('SplitResult', _URI_COMPONENTS)):
|
||||
"""Base class to hold :func:`urisplit` results."""
|
||||
|
||||
__slots__ = () # prevent creation of instance dictionary
|
||||
|
||||
@property
|
||||
def userinfo(self):
|
||||
authority = self.authority
|
||||
if authority is None:
|
||||
return None
|
||||
userinfo, present, _ = authority.rpartition(self.AT)
|
||||
if present:
|
||||
return userinfo
|
||||
else:
|
||||
return None
|
||||
|
||||
@property
|
||||
def host(self):
|
||||
authority = self.authority
|
||||
if authority is None:
|
||||
return None
|
||||
_, _, hostinfo = authority.rpartition(self.AT)
|
||||
host, _, port = hostinfo.rpartition(self.COLON)
|
||||
if port.lstrip(self.DIGITS):
|
||||
return hostinfo
|
||||
else:
|
||||
return host
|
||||
|
||||
@property
|
||||
def port(self):
|
||||
authority = self.authority
|
||||
if authority is None:
|
||||
return None
|
||||
_, present, port = authority.rpartition(self.COLON)
|
||||
if present and not port.lstrip(self.DIGITS):
|
||||
return port
|
||||
else:
|
||||
return None
|
||||
|
||||
def geturi(self):
|
||||
"""Return the re-combined version of the original URI reference as a
|
||||
string.
|
||||
|
||||
"""
|
||||
scheme, authority, path, query, fragment = self
|
||||
|
||||
# RFC 3986 5.3. Component Recomposition
|
||||
result = []
|
||||
if scheme is not None:
|
||||
result.extend([scheme, self.COLON])
|
||||
if authority is not None:
|
||||
result.extend([self.SLASH, self.SLASH, authority])
|
||||
result.append(path)
|
||||
if query is not None:
|
||||
result.extend([self.QUEST, query])
|
||||
if fragment is not None:
|
||||
result.extend([self.HASH, fragment])
|
||||
return self.EMPTY.join(result)
|
||||
|
||||
def getscheme(self, default=None):
|
||||
"""Return the URI scheme in canonical (lowercase) form, or `default`
|
||||
if the original URI reference did not contain a scheme component.
|
||||
|
||||
"""
|
||||
scheme = self.scheme
|
||||
if scheme is None:
|
||||
return default
|
||||
elif isinstance(scheme, bytes):
|
||||
return scheme.decode('ascii').lower()
|
||||
else:
|
||||
return scheme.lower()
|
||||
|
||||
def getauthority(self, default=None, encoding='utf-8', errors='strict'):
|
||||
"""Return the decoded userinfo, host and port subcomponents of the URI
|
||||
authority as a three-item tuple.
|
||||
|
||||
"""
|
||||
# TBD: (userinfo, host, port) kwargs, default string?
|
||||
if default is None:
|
||||
default = (None, None, None)
|
||||
elif not isinstance(default, collections.abc.Iterable):
|
||||
raise TypeError('Invalid default type')
|
||||
elif len(default) != 3:
|
||||
raise ValueError('Invalid default length')
|
||||
# TODO: this could be much more efficient by using a dedicated regex
|
||||
return (
|
||||
self.getuserinfo(default[0], encoding, errors),
|
||||
self.gethost(default[1], errors),
|
||||
self.getport(default[2])
|
||||
)
|
||||
|
||||
def getuserinfo(self, default=None, encoding='utf-8', errors='strict'):
|
||||
"""Return the decoded userinfo subcomponent of the URI authority, or
|
||||
`default` if the original URI reference did not contain a
|
||||
userinfo field.
|
||||
|
||||
"""
|
||||
userinfo = self.userinfo
|
||||
if userinfo is None:
|
||||
return default
|
||||
else:
|
||||
return uridecode(userinfo, encoding, errors)
|
||||
|
||||
def gethost(self, default=None, errors='strict'):
|
||||
"""Return the decoded host subcomponent of the URI authority as a
|
||||
string or an :mod:`ipaddress` address object, or `default` if
|
||||
the original URI reference did not contain a host.
|
||||
|
||||
"""
|
||||
host = self.host
|
||||
if host is None or (not host and default is not None):
|
||||
return default
|
||||
elif host.startswith(self.LBRACKET) and host.endswith(self.RBRACKET):
|
||||
return _ip_literal(host[1:-1])
|
||||
elif host.startswith(self.LBRACKET) or host.endswith(self.RBRACKET):
|
||||
raise ValueError('Invalid host %r' % host)
|
||||
# TODO: faster check for IPv4 address?
|
||||
return _ipv4_address(host) or uridecode(host, 'utf-8', errors).lower()
|
||||
|
||||
def getport(self, default=None):
|
||||
"""Return the port subcomponent of the URI authority as an
|
||||
:class:`int`, or `default` if the original URI reference did
|
||||
not contain a port or if the port was empty.
|
||||
|
||||
"""
|
||||
port = self.port
|
||||
if port:
|
||||
return int(port)
|
||||
else:
|
||||
return default
|
||||
|
||||
def getpath(self, encoding='utf-8', errors='strict'):
|
||||
"""Return the normalized decoded URI path."""
|
||||
path = self.__remove_dot_segments(self.path)
|
||||
return uridecode(path, encoding, errors)
|
||||
|
||||
def getquery(self, default=None, encoding='utf-8', errors='strict'):
|
||||
"""Return the decoded query string, or `default` if the original URI
|
||||
reference did not contain a query component.
|
||||
|
||||
"""
|
||||
query = self.query
|
||||
if query is None:
|
||||
return default
|
||||
else:
|
||||
return uridecode(query, encoding, errors)
|
||||
|
||||
def getquerydict(self, sep='&', encoding='utf-8', errors='strict'):
|
||||
"""Split the query component into individual `name=value` pairs
|
||||
separated by `sep` and return a dictionary of query variables.
|
||||
The dictionary keys are the unique query variable names and
|
||||
the values are lists of values for each name.
|
||||
|
||||
"""
|
||||
dict = collections.defaultdict(list)
|
||||
for name, value in self.getquerylist(sep, encoding, errors):
|
||||
dict[name].append(value)
|
||||
return dict
|
||||
|
||||
def getquerylist(self, sep='&', encoding='utf-8', errors='strict'):
|
||||
"""Split the query component into individual `name=value` pairs
|
||||
separated by `sep`, and return a list of `(name, value)`
|
||||
tuples.
|
||||
|
||||
"""
|
||||
if not self.query:
|
||||
return []
|
||||
elif isinstance(sep, type(self.query)):
|
||||
qsl = self.query.split(sep)
|
||||
elif isinstance(sep, bytes):
|
||||
qsl = self.query.split(sep.decode('ascii'))
|
||||
else:
|
||||
qsl = self.query.split(sep.encode('ascii'))
|
||||
items = []
|
||||
for parts in [qs.partition(self.EQ) for qs in qsl if qs]:
|
||||
name = uridecode(parts[0], encoding, errors)
|
||||
if parts[1]:
|
||||
value = uridecode(parts[2], encoding, errors)
|
||||
else:
|
||||
value = None
|
||||
items.append((name, value))
|
||||
return items
|
||||
|
||||
def getfragment(self, default=None, encoding='utf-8', errors='strict'):
|
||||
"""Return the decoded fragment identifier, or `default` if the
|
||||
original URI reference did not contain a fragment component.
|
||||
|
||||
"""
|
||||
fragment = self.fragment
|
||||
if fragment is None:
|
||||
return default
|
||||
else:
|
||||
return uridecode(fragment, encoding, errors)
|
||||
|
||||
def isuri(self):
|
||||
"""Return :const:`True` if this is a URI."""
|
||||
return self.scheme is not None
|
||||
|
||||
def isabsuri(self):
|
||||
"""Return :const:`True` if this is an absolute URI."""
|
||||
return self.scheme is not None and self.fragment is None
|
||||
|
||||
def isnetpath(self):
|
||||
"""Return :const:`True` if this is a network-path reference."""
|
||||
return self.scheme is None and self.authority is not None
|
||||
|
||||
def isabspath(self):
|
||||
"""Return :const:`True` if this is an absolute-path reference."""
|
||||
return (self.scheme is None and self.authority is None and
|
||||
self.path.startswith(self.SLASH))
|
||||
|
||||
def isrelpath(self):
|
||||
"""Return :const:`True` if this is a relative-path reference."""
|
||||
return (self.scheme is None and self.authority is None and
|
||||
not self.path.startswith(self.SLASH))
|
||||
|
||||
def issamedoc(self):
|
||||
"""Return :const:`True` if this is a same-document reference."""
|
||||
return (self.scheme is None and self.authority is None and
|
||||
not self.path and self.query is None)
|
||||
|
||||
def transform(self, ref, strict=False):
|
||||
"""Transform a URI reference relative to `self` into a
|
||||
:class:`SplitResult` representing its target URI.
|
||||
|
||||
"""
|
||||
scheme, authority, path, query, fragment = self.RE.match(ref).groups()
|
||||
|
||||
# RFC 3986 5.2.2. Transform References
|
||||
if scheme is not None and (strict or scheme != self.scheme):
|
||||
path = self.__remove_dot_segments(path)
|
||||
elif authority is not None:
|
||||
scheme = self.scheme
|
||||
path = self.__remove_dot_segments(path)
|
||||
elif not path:
|
||||
scheme = self.scheme
|
||||
authority = self.authority
|
||||
path = self.path
|
||||
query = self.query if query is None else query
|
||||
elif path.startswith(self.SLASH):
|
||||
scheme = self.scheme
|
||||
authority = self.authority
|
||||
path = self.__remove_dot_segments(path)
|
||||
else:
|
||||
scheme = self.scheme
|
||||
authority = self.authority
|
||||
path = self.__remove_dot_segments(self.__merge(path))
|
||||
return type(self)(scheme, authority, path, query, fragment)
|
||||
|
||||
def __merge(self, path):
|
||||
# RFC 3986 5.2.3. Merge Paths
|
||||
if self.authority is not None and not self.path:
|
||||
return self.SLASH + path
|
||||
else:
|
||||
parts = self.path.rpartition(self.SLASH)
|
||||
return parts[1].join((parts[0], path))
|
||||
|
||||
@classmethod
|
||||
def __remove_dot_segments(cls, path):
|
||||
# RFC 3986 5.2.4. Remove Dot Segments
|
||||
pseg = []
|
||||
for s in path.split(cls.SLASH):
|
||||
if s == cls.DOT:
|
||||
continue
|
||||
elif s != cls.DOTDOT:
|
||||
pseg.append(s)
|
||||
elif len(pseg) == 1 and not pseg[0]:
|
||||
continue
|
||||
elif pseg and pseg[-1] != cls.DOTDOT:
|
||||
pseg.pop()
|
||||
else:
|
||||
pseg.append(s)
|
||||
# adjust for trailing '/.' or '/..'
|
||||
if path.rpartition(cls.SLASH)[2] in (cls.DOT, cls.DOTDOT):
|
||||
pseg.append(cls.EMPTY)
|
||||
if path and len(pseg) == 1 and pseg[0] == cls.EMPTY:
|
||||
pseg.insert(0, cls.DOT)
|
||||
return cls.SLASH.join(pseg)
|
||||
|
||||
|
||||
class SplitResultBytes(SplitResult):
|
||||
|
||||
__slots__ = () # prevent creation of instance dictionary
|
||||
|
||||
# RFC 3986 Appendix B
|
||||
RE = re.compile(br"""
|
||||
(?:([A-Za-z][A-Za-z0-9+.-]*):)? # scheme (RFC 3986 3.1)
|
||||
(?://([^/?#]*))? # authority
|
||||
([^?#]*) # path
|
||||
(?:\?([^#]*))? # query
|
||||
(?:\#(.*))? # fragment
|
||||
""", flags=re.VERBOSE)
|
||||
|
||||
# RFC 3986 2.2 gen-delims
|
||||
COLON, SLASH, QUEST, HASH, LBRACKET, RBRACKET, AT = (
|
||||
b':', b'/', b'?', b'#', b'[', b']', b'@'
|
||||
)
|
||||
|
||||
# RFC 3986 3.3 dot-segments
|
||||
DOT, DOTDOT = b'.', b'..'
|
||||
|
||||
EMPTY, EQ = b'', b'='
|
||||
|
||||
DIGITS = b'0123456789'
|
||||
|
||||
|
||||
class SplitResultString(SplitResult):
|
||||
|
||||
__slots__ = () # prevent creation of instance dictionary
|
||||
|
||||
# RFC 3986 Appendix B
|
||||
RE = re.compile(r"""
|
||||
(?:([A-Za-z][A-Za-z0-9+.-]*):)? # scheme (RFC 3986 3.1)
|
||||
(?://([^/?#]*))? # authority
|
||||
([^?#]*) # path
|
||||
(?:\?([^#]*))? # query
|
||||
(?:\#(.*))? # fragment
|
||||
""", flags=re.VERBOSE)
|
||||
|
||||
# RFC 3986 2.2 gen-delims
|
||||
COLON, SLASH, QUEST, HASH, LBRACKET, RBRACKET, AT = (
|
||||
u':', u'/', u'?', u'#', u'[', u']', u'@'
|
||||
)
|
||||
|
||||
# RFC 3986 3.3 dot-segments
|
||||
DOT, DOTDOT = u'.', u'..'
|
||||
|
||||
EMPTY, EQ = u'', u'='
|
||||
|
||||
DIGITS = u'0123456789'
|
||||
|
||||
|
||||
def urisplit(uristring):
|
||||
"""Split a well-formed URI reference string into a tuple with five
|
||||
components corresponding to a URI's general structure::
|
||||
|
||||
<scheme>://<authority>/<path>?<query>#<fragment>
|
||||
|
||||
"""
|
||||
if isinstance(uristring, bytes):
|
||||
result = SplitResultBytes
|
||||
else:
|
||||
result = SplitResultString
|
||||
return result(*result.RE.match(uristring).groups())
|
||||
|
||||
|
||||
def uriunsplit(parts):
|
||||
"""Combine the elements of a five-item iterable into a URI reference's
|
||||
string representation.
|
||||
|
||||
"""
|
||||
scheme, authority, path, query, fragment = parts
|
||||
if isinstance(path, bytes):
|
||||
result = SplitResultBytes
|
||||
else:
|
||||
result = SplitResultString
|
||||
return result(scheme, authority, path, query, fragment).geturi()
|
||||
Reference in New Issue
Block a user