A user account is required in order to edit this wiki, but we've had to disable public user registrations due to spam.
To request an account, ask an autoconfirmed user on Chat (such as one of these permanent autoconfirmed members).
URL: Difference between revisions
Jump to navigation
Jump to search
(put parser sketch online) |
mNo edit summary |
||
| Line 26: | Line 26: | ||
parse (urlstr, optional baseURL) | parse (urlstr, optional baseURL) | ||
url = new URL | url = new URL | ||
tokenize(urlstr) | |||
SCHEME | SCHEME CHECK START | ||
if char is in ALPHA | |||
append char to buffer | |||
-> SCHEME CHECK NEXT | |||
else | |||
unconsume char | |||
-> NO SCHEME | |||
.. | SCHEME CHECK NEXT | ||
if char is in ALPHA / DIGIT / "+" / "-" / "." | |||
append char to buffer | |||
-> continue | |||
elif char is ":" | |||
url.scheme = buffer.toASCIILowercase() | |||
buffer = "" | |||
-> SCHEME | |||
else: | |||
unconsume char | |||
prepend buffer to input | |||
-> NO SCHEME | |||
SCHEME | SCHEME | ||
if url.scheme is not hierarchical | if url.scheme is not hierarchical (data:) | ||
-> NON-HIERARCHICAL | -> NON-HIERARCHICAL | ||
elif baseURL and url.scheme is baseURL.scheme (http:?test) | |||
-> RELATIVE | -> RELATIVE | ||
else (https://test.com/) | |||
-> AUTHORITY | -> AUTHORITY START | ||
NO SCHEME | NO SCHEME | ||
if baseURL.scheme is not hierarchical | if not baseURL or baseURL.scheme is not hierarchical | ||
url.invalid = true | url.invalid = true | ||
return url | return url | ||
| Line 54: | Line 64: | ||
-> RELATIVE | -> RELATIVE | ||
NON-HIERARCHICAL | NON-HIERARCHICAL (could merge with PATH) | ||
if curChar is "#" | if curChar is "#" | ||
FRAGMENT | FRAGMENT | ||
| Line 61: | Line 71: | ||
RELATIVE | RELATIVE | ||
if | if char is EOI (end-of-input) | ||
url = baseURL | url = baseURL | ||
url.fragment = null | url.fragment = null | ||
exit | |||
elif char is "/" or char is "\" | |||
if | if next char "/" or next char is "\" | ||
url.scheme = baseURL.scheme | url.scheme = baseURL.scheme | ||
AUTHORITY | -> AUTHORITY START | ||
else | else | ||
url.scheme = baseURL.scheme | url.scheme = baseURL.scheme | ||
url.authority = baseURL.authority | url.authority = baseURL.authority | ||
PATH | -> PATH | ||
elif char is "?" | |||
url.scheme = baseURL.scheme | url.scheme = baseURL.scheme | ||
url.authority = baseURL.authority | url.authority = baseURL.authority | ||
url.path = baseURL.path | url.path = baseURL.path | ||
QUERY | -> QUERY | ||
elif char is "#" | |||
url.scheme = baseURL.scheme | url.scheme = baseURL.scheme | ||
url.authority = baseURL.authority | url.authority = baseURL.authority | ||
url.path = baseURL.path | url.path = baseURL.path | ||
url.query = baseURL.query | url.query = baseURL.query | ||
FRAGMENT | -> FRAGMENT | ||
else | else | ||
| Line 92: | Line 102: | ||
url.authority = baseURL.authority | url.authority = baseURL.authority | ||
prepend input by baseURL.path up to the last / | prepend input by baseURL.path up to the last / | ||
PATH | -> PATH | ||
AUTHORITY | AUTHORITY START | ||
if "/" or "\" | if char is "/" or char is "\" | ||
-> continue | |||
else | else | ||
AUTHORITY | -> AUTHORITY | ||
AUTHORITY | AUTHORITY | ||
... | ... | ||
PATH | PATH | ||
if | if char is "?" | ||
QUERY | -> QUERY | ||
if | if char is "#" | ||
FRAGMENT | -> FRAGMENT | ||
QUERY | QUERY | ||
if | if char is "#" | ||
FRAGMENT | -> FRAGMENT | ||
FRAGMENT | FRAGMENT | ||
Revision as of 14:18, 15 June 2012
This documents research and notes around the URL specification.
Implementations
- http://trac.webkit.org/browser/trunk/Source/WebCore/platform/KURL.cpp
- http://trac.webkit.org/browser/trunk/Source/WebCore/platform/KURLWTFURL.cpp
- http://trac.webkit.org/browser/trunk/Source/WebCore/platform/KURLGoogle.cpp
- http://trac.webkit.org/browser/trunk/Source/WebCore/platform/network/DataURL.cpp (data URLs)
Model
URL (.href) - invalid? - scheme (.protocol) - authority - username (proposed .username) - password (proposed .password) - ip/host (.hostname) - port (.port) - path (.pathname) - query (.search) - fragment (.hash)
Parsing
parse (urlstr, optional baseURL)
url = new URL
tokenize(urlstr)
SCHEME CHECK START
if char is in ALPHA
append char to buffer
-> SCHEME CHECK NEXT
else
unconsume char
-> NO SCHEME
SCHEME CHECK NEXT
if char is in ALPHA / DIGIT / "+" / "-" / "."
append char to buffer
-> continue
elif char is ":"
url.scheme = buffer.toASCIILowercase()
buffer = ""
-> SCHEME
else:
unconsume char
prepend buffer to input
-> NO SCHEME
SCHEME
if url.scheme is not hierarchical (data:)
-> NON-HIERARCHICAL
elif baseURL and url.scheme is baseURL.scheme (http:?test)
-> RELATIVE
else (https://test.com/)
-> AUTHORITY START
NO SCHEME
if not baseURL or baseURL.scheme is not hierarchical
url.invalid = true
return url
else
-> RELATIVE
NON-HIERARCHICAL (could merge with PATH)
if curChar is "#"
FRAGMENT
else
...
RELATIVE
if char is EOI (end-of-input)
url = baseURL
url.fragment = null
exit
elif char is "/" or char is "\"
if next char "/" or next char is "\"
url.scheme = baseURL.scheme
-> AUTHORITY START
else
url.scheme = baseURL.scheme
url.authority = baseURL.authority
-> PATH
elif char is "?"
url.scheme = baseURL.scheme
url.authority = baseURL.authority
url.path = baseURL.path
-> QUERY
elif char is "#"
url.scheme = baseURL.scheme
url.authority = baseURL.authority
url.path = baseURL.path
url.query = baseURL.query
-> FRAGMENT
else
url.scheme = baseURL.scheme
url.authority = baseURL.authority
prepend input by baseURL.path up to the last /
-> PATH
AUTHORITY START
if char is "/" or char is "\"
-> continue
else
-> AUTHORITY
AUTHORITY
...
PATH
if char is "?"
-> QUERY
if char is "#"
-> FRAGMENT
QUERY
if char is "#"
-> FRAGMENT
FRAGMENT
...