A user account is required in order to edit this wiki, but we've had to disable public user registrations due to spam.

To request an account, ask an autoconfirmed user on IRC (such as one of these permanent autoconfirmed members).

Difference between revisions of "URL"

From WHATWG Wiki
Jump to: navigation, search
(put parser sketch online)
m
Line 26: Line 26:
 
  parse (urlstr, optional baseURL)
 
  parse (urlstr, optional baseURL)
 
   url = new URL
 
   url = new URL
 +
  tokenize(urlstr)
 
   
 
   
   SCHEME-OR-RELATIVE
+
   SCHEME CHECK START
     FIRST SCHEME CHARACTER
+
     if char is in ALPHA
       if ...
+
       append char to buffer
        -> REMAINING SCHEME CHARACTERS
+
      -> SCHEME CHECK NEXT
        -> NO SCHEME
+
    else
    REMAINING SCHEME CHARACTERS
+
      unconsume char
      if curChar is ":"
+
      -> NO SCHEME
        -> SCHEME
+
       ...
+
  SCHEME CHECK NEXT
        -> NO SCHEME
+
    if char is in ALPHA / DIGIT / "+" / "-" / "."
        -> REMAINING SCHEME CHARACTERS
+
      append char to buffer
 +
      -> continue
 +
    elif char is ":"
 +
       url.scheme = buffer.toASCIILowercase()
 +
      buffer = ""
 +
      -> SCHEME
 +
    else:
 +
      unconsume char
 +
      prepend buffer to input
 +
      -> NO SCHEME
 
   
 
   
 
   SCHEME
 
   SCHEME
     if url.scheme is not hierarchical (data:)
+
     if url.scheme is not hierarchical (data:)
 
       -> NON-HIERARCHICAL
 
       -> NON-HIERARCHICAL
     if url.scheme is hierarchical and url.scheme is baseURL.scheme (http:?test)
+
     elif baseURL and url.scheme is baseURL.scheme (http:?test)
 
       -> RELATIVE
 
       -> RELATIVE
     if url.scheme is hierarchical (https://test.com/)
+
     else  (https://test.com/)
       -> AUTHORITY
+
       -> AUTHORITY START
 
   
 
   
 
   NO SCHEME
 
   NO SCHEME
     if baseURL.scheme is not hierarchical
+
     if not baseURL or baseURL.scheme is not hierarchical
 
       url.invalid = true
 
       url.invalid = true
 
       return url
 
       return url
Line 54: Line 64:
 
       -> RELATIVE
 
       -> RELATIVE
 
   
 
   
   NON-HIERARCHICAL
+
   NON-HIERARCHICAL (could merge with PATH)
 
     if curChar is "#"
 
     if curChar is "#"
 
       FRAGMENT
 
       FRAGMENT
Line 61: Line 71:
 
   
 
   
 
   RELATIVE
 
   RELATIVE
     if urlstr is empty
+
     if char is EOI (end-of-input)
 
       url = baseURL
 
       url = baseURL
 
       url.fragment = null
 
       url.fragment = null
       return url
+
       exit
 
   
 
   
     if curChar is either "/" or "\"
+
     elif char is "/" or char is "\"
       if urlstr second character is either "/" or "\"
+
       if next char "/" or next char is "\"
 
         url.scheme = baseURL.scheme
 
         url.scheme = baseURL.scheme
         AUTHORITY
+
         -> AUTHORITY START
 
       else
 
       else
 
         url.scheme = baseURL.scheme
 
         url.scheme = baseURL.scheme
 
         url.authority = baseURL.authority
 
         url.authority = baseURL.authority
         PATH
+
         -> PATH
 
   
 
   
     if curChar is "?"
+
     elif char is "?"
 
         url.scheme = baseURL.scheme
 
         url.scheme = baseURL.scheme
 
         url.authority = baseURL.authority
 
         url.authority = baseURL.authority
 
         url.path = baseURL.path
 
         url.path = baseURL.path
         QUERY
+
         -> QUERY
 
   
 
   
     if curChar is "#"
+
     elif char is "#"
 
         url.scheme = baseURL.scheme
 
         url.scheme = baseURL.scheme
 
         url.authority = baseURL.authority
 
         url.authority = baseURL.authority
 
         url.path = baseURL.path
 
         url.path = baseURL.path
 
         url.query = baseURL.query
 
         url.query = baseURL.query
         FRAGMENT
+
         -> FRAGMENT
 
   
 
   
 
     else
 
     else
Line 92: Line 102:
 
       url.authority = baseURL.authority
 
       url.authority = baseURL.authority
 
       prepend input by baseURL.path up to the last /
 
       prepend input by baseURL.path up to the last /
       PATH
+
       -> PATH
 
   
 
   
   AUTHORITY
+
   AUTHORITY START
     if "/" or "\"
+
     if char is "/" or char is "\"
       AUTHORITY
+
       -> continue
 
     else
 
     else
       AUTHORITY-AFTER-SLASHES
+
       -> AUTHORITY
 
   
 
   
   AUTHORITY-AFTER-SLASHES
+
   AUTHORITY
 
     ...
 
     ...
 
   
 
   
 
   PATH
 
   PATH
     if curChar is "?"
+
     if char is "?"
       QUERY
+
       -> QUERY
     if curChar is "#"
+
     if char is "#"
       FRAGMENT
+
       -> FRAGMENT
 
   
 
   
 
   QUERY
 
   QUERY
     if curChar is "#"
+
     if char is "#"
       FRAGMENT
+
       -> FRAGMENT
 
   
 
   
 
   FRAGMENT
 
   FRAGMENT

Revision as of 14:18, 15 June 2012

This documents research and notes around the URL specification.

Implementations

Model

URL (.href)
- invalid?
- scheme (.protocol)
- authority
  - username (proposed .username)
  - password (proposed .password)
  - ip/host (.hostname)
  - port (.port)
- path (.pathname)
- query (.search)
- fragment (.hash)

Parsing

parse (urlstr, optional baseURL)
 url = new URL
 tokenize(urlstr)

 SCHEME CHECK START
   if char is in ALPHA
     append char to buffer
     -> SCHEME CHECK NEXT
   else
     unconsume char
     -> NO SCHEME

 SCHEME CHECK NEXT
   if char is in ALPHA / DIGIT / "+" / "-" / "."
     append char to buffer
     -> continue
   elif char is ":"
     url.scheme = buffer.toASCIILowercase()
     buffer = ""
     -> SCHEME
   else:
     unconsume char
     prepend buffer to input
     -> NO SCHEME

 SCHEME
   if url.scheme is not hierarchical (data:)
     -> NON-HIERARCHICAL
   elif baseURL and url.scheme is baseURL.scheme (http:?test)
     -> RELATIVE
   else  (https://test.com/)
     -> AUTHORITY START

 NO SCHEME
   if not baseURL or baseURL.scheme is not hierarchical
     url.invalid = true
     return url
   else
     -> RELATIVE

 NON-HIERARCHICAL (could merge with PATH)
   if curChar is "#"
     FRAGMENT
   else
     ...

 RELATIVE
   if char is EOI (end-of-input)
     url = baseURL
     url.fragment = null
     exit

   elif char is "/" or char is "\"
     if next char "/" or next char is "\"
       url.scheme = baseURL.scheme
       -> AUTHORITY START
     else
       url.scheme = baseURL.scheme
       url.authority = baseURL.authority
       -> PATH

   elif char is "?"
       url.scheme = baseURL.scheme
       url.authority = baseURL.authority
       url.path = baseURL.path
       -> QUERY

   elif char is "#"
       url.scheme = baseURL.scheme
       url.authority = baseURL.authority
       url.path = baseURL.path
       url.query = baseURL.query
       -> FRAGMENT

   else
     url.scheme = baseURL.scheme
     url.authority = baseURL.authority
     prepend input by baseURL.path up to the last /
     -> PATH

 AUTHORITY START
   if char is "/" or char is "\"
     -> continue
   else
     -> AUTHORITY

 AUTHORITY
   ...

 PATH
   if char is "?"
     -> QUERY
   if char is "#"
     -> FRAGMENT

 QUERY
   if char is "#"
     -> FRAGMENT

 FRAGMENT
   ...