aboutsummaryrefslogtreecommitdiffstats
path: root/src/include/ipxe/uri.h
diff options
context:
space:
mode:
authorMichael Brown <mcb30@ipxe.org>2021-11-11 23:31:23 +0000
committerMichael Brown <mcb30@ipxe.org>2021-11-11 23:31:23 +0000
commit3332cfa45777a4d4f0f59aefa5561960ad835feb (patch)
tree9fb2b1513a3ff9739d9bef5e1852c68d081c5d71 /src/include/ipxe/uri.h
parent85eb961bf9afd61385430f8837485ba69cc0cc11 (diff)
downloadipxe-3332cfa45777a4d4f0f59aefa5561960ad835feb.tar.gz
[uri] Retain original encodings for path, query, and fragment fieldsuri
iPXE decodes any percent-encoded characters during the URI parsing stage, thereby allowing protocol implementations to consume the raw field values directly without further decoding. When reconstructing a URI string for use in an HTTP request line, the percent-encoding is currently reapplied in a reversible way: we guarantee that our reconstructed URI string could be decoded to give the same raw field values. This technically violates RFC3986, which states that "URIs that differ in the replacement of a reserved character with its corresponding percent-encoded octet are not equivalent". Experiments show that several HTTP server applications will attach meaning to the choice of whether or not a particular character was percent-encoded, even when the percent-encoding is unnecessary from the perspective of parsing the URI into its component fields. Fix by storing the originally encoded substrings for the path, query, and fragment fields and using these original encoded versions when reconstructing a URI string. The path field is also stored as a decoded string, for use by protocols such as TFTP that communicate using raw strings rather than URI-encoded strings. All other fields (such as the username and password) continue to be stored only in their decoded versions since nothing ever needs to know the originally encoded versions of these fields. Signed-off-by: Michael Brown <mcb30@ipxe.org>
Diffstat (limited to 'src/include/ipxe/uri.h')
-rw-r--r--src/include/ipxe/uri.h31
1 files changed, 24 insertions, 7 deletions
diff --git a/src/include/ipxe/uri.h b/src/include/ipxe/uri.h
index 3879a0e73..e5b7c8616 100644
--- a/src/include/ipxe/uri.h
+++ b/src/include/ipxe/uri.h
@@ -46,6 +46,20 @@ struct parameters;
* scheme = "ftp", user = "joe", password = "secret",
* host = "insecure.org", port = "8081", path = "/hidden/path/to",
* query = "what=is", fragment = "this"
+ *
+ * The URI syntax includes a percent-encoding mechanism that can be
+ * used to represent characters that would otherwise not be possible,
+ * such as a '/' character within the password field. These encodings
+ * are decoded during the URI parsing stage, thereby allowing protocol
+ * implementations to consume the raw field values directly without
+ * further decoding.
+ *
+ * Some protocols (such as HTTP) communicate using URI-encoded values.
+ * For these protocols, the original encoded substring must be
+ * retained verbatim since the choice of whether or not to encode a
+ * particular character may have significance to the receiving
+ * application. We therefore retain the originally-encoded substrings
+ * for the path, query, and fragment fields.
*/
struct uri {
/** Reference count */
@@ -62,12 +76,14 @@ struct uri {
const char *host;
/** Port number */
const char *port;
- /** Path */
+ /** Path (after URI decoding) */
const char *path;
- /** Query */
- const char *query;
- /** Fragment */
- const char *fragment;
+ /** Path (with original URI encoding) */
+ const char *epath;
+ /** Query (with original URI encoding) */
+ const char *equery;
+ /** Fragment (with original URI encoding) */
+ const char *efragment;
/** Form parameters */
struct parameters *params;
} __attribute__ (( packed ));
@@ -100,8 +116,9 @@ enum uri_fields {
URI_HOST = URI_FIELD ( host ),
URI_PORT = URI_FIELD ( port ),
URI_PATH = URI_FIELD ( path ),
- URI_QUERY = URI_FIELD ( query ),
- URI_FRAGMENT = URI_FIELD ( fragment ),
+ URI_EPATH = URI_FIELD ( epath ),
+ URI_EQUERY = URI_FIELD ( equery ),
+ URI_EFRAGMENT = URI_FIELD ( efragment ),
URI_FIELDS
};