diff options
-rw-r--r-- | src/core/uri.c | 131 | ||||
-rw-r--r-- | src/include/ipxe/uri.h | 31 | ||||
-rw-r--r-- | src/net/tcp/httpcore.c | 4 | ||||
-rw-r--r-- | src/tests/uri_test.c | 53 | ||||
-rw-r--r-- | src/usr/imgmgmt.c | 4 |
5 files changed, 148 insertions, 75 deletions
diff --git a/src/core/uri.c b/src/core/uri.c index e9e512ab4..a0f79e9ec 100644 --- a/src/core/uri.c +++ b/src/core/uri.c @@ -79,12 +79,10 @@ size_t uri_decode ( const char *encoded, void *buf, size_t len ) { /** * Decode URI field in-place * - * @v uri URI - * @v field URI field index + * @v encoded Encoded field, or NULL */ -static void uri_decode_inplace ( struct uri *uri, unsigned int field ) { - const char *encoded = uri_field ( uri, field ); - char *decoded = ( ( char * ) encoded ); +static void uri_decode_inplace ( char *encoded ) { + char *decoded = encoded; size_t len; /* Do nothing if field is not present */ @@ -150,7 +148,7 @@ static int uri_character_escaped ( char c, unsigned int field ) { * parser but for any other URI parsers (e.g. HTTP query * string parsers, which care about '=' and '&'). */ - static const char *escaped[URI_FIELDS] = { + static const char *escaped[URI_EPATH] = { /* Scheme or default: escape everything */ [URI_SCHEME] = "/#:@?=&", /* Opaque part: escape characters which would affect @@ -172,20 +170,21 @@ static int uri_character_escaped ( char c, unsigned int field ) { * appears within paths. */ [URI_PATH] = "#:@?", - /* Query: escape everything except '/', which - * sometimes appears within queries. - */ - [URI_QUERY] = "#:@?", - /* Fragment: escape everything */ - [URI_FRAGMENT] = "/#:@?", }; - return ( /* Always escape non-printing characters and whitespace */ - ( ! isprint ( c ) ) || ( c == ' ' ) || - /* Always escape '%' */ - ( c == '%' ) || - /* Escape field-specific characters */ - strchr ( escaped[field], c ) ); + /* Always escape non-printing characters and whitespace */ + if ( ( ! isprint ( c ) ) || ( c == ' ' ) ) + return 1; + + /* Escape nothing else in already-escaped fields */ + if ( field >= URI_EPATH ) + return 0; + + /* Escape '%' and any field-specific characters */ + if ( ( c == '%' ) || strchr ( escaped[field], c ) ) + return 1; + + return 0; } /** @@ -262,10 +261,12 @@ static void uri_dump ( const struct uri *uri ) { DBGC ( uri, " port \"%s\"", uri->port ); if ( uri->path ) DBGC ( uri, " path \"%s\"", uri->path ); - if ( uri->query ) - DBGC ( uri, " query \"%s\"", uri->query ); - if ( uri->fragment ) - DBGC ( uri, " fragment \"%s\"", uri->fragment ); + if ( uri->epath ) + DBGC ( uri, " epath \"%s\"", uri->epath ); + if ( uri->equery ) + DBGC ( uri, " equery \"%s\"", uri->equery ); + if ( uri->efragment ) + DBGC ( uri, " efragment \"%s\"", uri->efragment ); if ( uri->params ) DBGC ( uri, " params \"%s\"", uri->params->name ); } @@ -298,17 +299,19 @@ struct uri * parse_uri ( const char *uri_string ) { char *raw; char *tmp; char *path; + char *epath; char *authority; size_t raw_len; unsigned int field; - /* Allocate space for URI struct and a copy of the string */ + /* Allocate space for URI struct and two copies of the string */ raw_len = ( strlen ( uri_string ) + 1 /* NUL */ ); - uri = zalloc ( sizeof ( *uri ) + raw_len ); + uri = zalloc ( sizeof ( *uri ) + ( 2 * raw_len ) ); if ( ! uri ) return NULL; ref_init ( &uri->refcnt, uri_free ); raw = ( ( ( void * ) uri ) + sizeof ( *uri ) ); + path = ( raw + raw_len ); /* Copy in the raw string */ memcpy ( raw, uri_string, raw_len ); @@ -328,7 +331,7 @@ struct uri * parse_uri ( const char *uri_string ) { /* Chop off the fragment, if it exists */ if ( ( tmp = strchr ( raw, '#' ) ) ) { *(tmp++) = '\0'; - uri->fragment = tmp; + uri->efragment = tmp; } /* Identify absolute/relative URI */ @@ -338,47 +341,47 @@ struct uri * parse_uri ( const char *uri_string ) { *(tmp++) = '\0'; if ( *tmp == '/' ) { /* Absolute URI with hierarchical part */ - path = tmp; + epath = tmp; } else { /* Absolute URI with opaque part */ uri->opaque = tmp; - path = NULL; + epath = NULL; } } else { /* Relative URI */ - path = raw; + epath = raw; } /* If we don't have a path (i.e. we have an absolute URI with * an opaque portion, we're already finished processing */ - if ( ! path ) + if ( ! epath ) goto done; /* Chop off the query, if it exists */ - if ( ( tmp = strchr ( path, '?' ) ) ) { + if ( ( tmp = strchr ( epath, '?' ) ) ) { *(tmp++) = '\0'; - uri->query = tmp; + uri->equery = tmp; } /* If we have no path remaining, then we're already finished * processing. */ - if ( ! path[0] ) + if ( ! epath[0] ) goto done; /* Identify net/absolute/relative path */ - if ( uri->scheme && ( strncmp ( path, "//", 2 ) == 0 ) ) { + if ( uri->scheme && ( strncmp ( epath, "//", 2 ) == 0 ) ) { /* Net path. If this is terminated by the first '/' * of an absolute path, then we have no space for a * terminator after the authority field, so shuffle * the authority down by one byte, overwriting one of * the two slashes. */ - authority = ( path + 2 ); + authority = ( epath + 2 ); if ( ( tmp = strchr ( authority, '/' ) ) ) { /* Shuffle down */ - uri->path = tmp; + uri->epath = tmp; memmove ( ( authority - 1 ), authority, ( tmp - authority ) ); authority--; @@ -386,10 +389,16 @@ struct uri * parse_uri ( const char *uri_string ) { } } else { /* Absolute/relative path */ - uri->path = path; + uri->epath = epath; authority = NULL; } + /* Create copy of path for decoding */ + if ( uri->epath ) { + strcpy ( path, uri->epath ); + uri->path = path; + } + /* If we don't have an authority (i.e. we have a non-net * path), we're already finished processing */ @@ -421,8 +430,8 @@ struct uri * parse_uri ( const char *uri_string ) { done: /* Decode fields in-place */ - for ( field = 0 ; field < URI_FIELDS ; field++ ) - uri_decode_inplace ( uri, field ); + for ( field = 0 ; field < URI_EPATH ; field++ ) + uri_decode_inplace ( ( char * ) uri_field ( uri, field ) ); DBGC ( uri, "URI parsed \"%s\" to", uri_string ); uri_dump ( uri ); @@ -458,8 +467,8 @@ size_t format_uri ( const struct uri *uri, char *buf, size_t len ) { static const char prefixes[URI_FIELDS] = { [URI_PASSWORD] = ':', [URI_PORT] = ':', - [URI_QUERY] = '?', - [URI_FRAGMENT] = '#', + [URI_EQUERY] = '?', + [URI_EFRAGMENT] = '#', }; char prefix; size_t used = 0; @@ -480,6 +489,10 @@ size_t format_uri ( const struct uri *uri, char *buf, size_t len ) { if ( ! uri_field ( uri, field ) ) continue; + /* Skip path field if encoded path is present */ + if ( ( field == URI_PATH ) && uri->epath ) + continue; + /* Prefix this field, if applicable */ prefix = prefixes[field]; if ( ( field == URI_HOST ) && ( uri->user != NULL ) ) @@ -676,6 +689,7 @@ char * resolve_path ( const char *base_path, struct uri * resolve_uri ( const struct uri *base_uri, struct uri *relative_uri ) { struct uri tmp_uri; + char *tmp_epath = NULL; char *tmp_path = NULL; struct uri *new_uri; @@ -685,20 +699,27 @@ struct uri * resolve_uri ( const struct uri *base_uri, /* Mangle URI */ memcpy ( &tmp_uri, base_uri, sizeof ( tmp_uri ) ); - if ( relative_uri->path ) { - tmp_path = resolve_path ( ( base_uri->path ? - base_uri->path : "/" ), - relative_uri->path ); + if ( relative_uri->epath ) { + tmp_epath = resolve_path ( ( base_uri->epath ? + base_uri->epath : "/" ), + relative_uri->epath ); + if ( ! tmp_epath ) + goto err_epath; + tmp_path = strdup ( tmp_epath ); + if ( ! tmp_path ) + goto err_path; + uri_decode_inplace ( tmp_path ); + tmp_uri.epath = tmp_epath; tmp_uri.path = tmp_path; - tmp_uri.query = relative_uri->query; - tmp_uri.fragment = relative_uri->fragment; + tmp_uri.equery = relative_uri->equery; + tmp_uri.efragment = relative_uri->efragment; tmp_uri.params = relative_uri->params; - } else if ( relative_uri->query ) { - tmp_uri.query = relative_uri->query; - tmp_uri.fragment = relative_uri->fragment; + } else if ( relative_uri->equery ) { + tmp_uri.equery = relative_uri->equery; + tmp_uri.efragment = relative_uri->efragment; tmp_uri.params = relative_uri->params; - } else if ( relative_uri->fragment ) { - tmp_uri.fragment = relative_uri->fragment; + } else if ( relative_uri->efragment ) { + tmp_uri.efragment = relative_uri->efragment; tmp_uri.params = relative_uri->params; } else if ( relative_uri->params ) { tmp_uri.params = relative_uri->params; @@ -707,7 +728,14 @@ struct uri * resolve_uri ( const struct uri *base_uri, /* Create demangled URI */ new_uri = uri_dup ( &tmp_uri ); free ( tmp_path ); + free ( tmp_epath ); return new_uri; + + free ( tmp_path ); + err_path: + free ( tmp_epath ); + err_epath: + return NULL; } /** @@ -746,6 +774,7 @@ static struct uri * tftp_uri ( struct sockaddr *sa_server, if ( asprintf ( &path, "/%s", filename ) < 0 ) goto err_path; tmp.path = path; + tmp.epath = path; /* Demangle URI */ uri = uri_dup ( &tmp ); diff --git a/src/include/ipxe/uri.h b/src/include/ipxe/uri.h index 3879a0e73..e5b7c8616 100644 --- a/src/include/ipxe/uri.h +++ b/src/include/ipxe/uri.h @@ -46,6 +46,20 @@ struct parameters; * scheme = "ftp", user = "joe", password = "secret", * host = "insecure.org", port = "8081", path = "/hidden/path/to", * query = "what=is", fragment = "this" + * + * The URI syntax includes a percent-encoding mechanism that can be + * used to represent characters that would otherwise not be possible, + * such as a '/' character within the password field. These encodings + * are decoded during the URI parsing stage, thereby allowing protocol + * implementations to consume the raw field values directly without + * further decoding. + * + * Some protocols (such as HTTP) communicate using URI-encoded values. + * For these protocols, the original encoded substring must be + * retained verbatim since the choice of whether or not to encode a + * particular character may have significance to the receiving + * application. We therefore retain the originally-encoded substrings + * for the path, query, and fragment fields. */ struct uri { /** Reference count */ @@ -62,12 +76,14 @@ struct uri { const char *host; /** Port number */ const char *port; - /** Path */ + /** Path (after URI decoding) */ const char *path; - /** Query */ - const char *query; - /** Fragment */ - const char *fragment; + /** Path (with original URI encoding) */ + const char *epath; + /** Query (with original URI encoding) */ + const char *equery; + /** Fragment (with original URI encoding) */ + const char *efragment; /** Form parameters */ struct parameters *params; } __attribute__ (( packed )); @@ -100,8 +116,9 @@ enum uri_fields { URI_HOST = URI_FIELD ( host ), URI_PORT = URI_FIELD ( port ), URI_PATH = URI_FIELD ( path ), - URI_QUERY = URI_FIELD ( query ), - URI_FRAGMENT = URI_FIELD ( fragment ), + URI_EPATH = URI_FIELD ( epath ), + URI_EQUERY = URI_FIELD ( equery ), + URI_EFRAGMENT = URI_FIELD ( efragment ), URI_FIELDS }; diff --git a/src/net/tcp/httpcore.c b/src/net/tcp/httpcore.c index 01bb496b2..fd94b5f08 100644 --- a/src/net/tcp/httpcore.c +++ b/src/net/tcp/httpcore.c @@ -614,8 +614,8 @@ int http_open ( struct interface *xfer, struct http_method *method, /* Calculate request URI length */ memset ( &request_uri, 0, sizeof ( request_uri ) ); - request_uri.path = ( uri->path ? uri->path : "/" ); - request_uri.query = uri->query; + request_uri.epath = ( uri->epath ? uri->epath : "/" ); + request_uri.equery = uri->equery; request_uri_len = ( format_uri ( &request_uri, NULL, 0 ) + 1 /* NUL */); diff --git a/src/tests/uri_test.c b/src/tests/uri_test.c index 92c2f9037..929ab3632 100644 --- a/src/tests/uri_test.c +++ b/src/tests/uri_test.c @@ -149,8 +149,10 @@ static void uri_okx ( struct uri *uri, struct uri *expected, const char *file, okx ( uristrcmp ( uri->host, expected->host ) == 0, file, line ); okx ( uristrcmp ( uri->port, expected->port ) == 0, file, line ); okx ( uristrcmp ( uri->path, expected->path ) == 0, file, line ); - okx ( uristrcmp ( uri->query, expected->query ) == 0, file, line ); - okx ( uristrcmp ( uri->fragment, expected->fragment ) == 0, file, line); + okx ( uristrcmp ( uri->epath, expected->epath ) == 0, file, line ); + okx ( uristrcmp ( uri->equery, expected->equery ) == 0, file, line ); + okx ( uristrcmp ( uri->efragment, expected->efragment ) == 0, + file, line); okx ( uri->params == expected->params, file, line ); } #define uri_ok( uri, expected ) uri_okx ( uri, expected, __FILE__, __LINE__ ) @@ -490,25 +492,33 @@ static struct uri_test uri_empty = { /** Basic HTTP URI */ static struct uri_test uri_boot_ipxe_org = { "http://boot.ipxe.org/demo/boot.php", - { .scheme = "http", .host = "boot.ipxe.org", .path = "/demo/boot.php" } + { .scheme = "http", .host = "boot.ipxe.org", + .path = "/demo/boot.php", .epath = "/demo/boot.php" }, }; /** Basic opaque URI */ static struct uri_test uri_mailto = { "mailto:ipxe-devel@lists.ipxe.org", - { .scheme = "mailto", .opaque = "ipxe-devel@lists.ipxe.org" } + { .scheme = "mailto", .opaque = "ipxe-devel@lists.ipxe.org" }, +}; + +/** Basic host-only URI */ +static struct uri_test uri_host = { + "http://boot.ipxe.org", + { .scheme = "http", .host = "boot.ipxe.org" }, }; /** Basic path-only URI */ static struct uri_test uri_path = { "/var/lib/tftpboot/pxelinux.0", - { .path = "/var/lib/tftpboot/pxelinux.0" }, + { .path = "/var/lib/tftpboot/pxelinux.0", + .epath ="/var/lib/tftpboot/pxelinux.0" }, }; /** Path-only URI with escaped characters */ static struct uri_test uri_path_escaped = { "/hello%20world%3F", - { .path = "/hello world?" }, + { .path = "/hello world?", .epath = "/hello%20world%3F" }, }; /** HTTP URI with all the trimmings */ @@ -521,8 +531,9 @@ static struct uri_test uri_http_all = { .host = "example.com", .port = "3001", .path = "/~foo/cgi-bin/foo.pl", - .query = "a=b&c=d", - .fragment = "bit", + .epath = "/~foo/cgi-bin/foo.pl", + .equery = "a=b&c=d", + .efragment = "bit", }, }; @@ -533,8 +544,9 @@ static struct uri_test uri_http_escaped = { .scheme = "https", .host = "test.ipxe.org", .path = "/wtf?\n", - .query = "kind#of/uri is", - .fragment = "this?", + .epath = "/wtf%3F%0A", + .equery = "kind%23of/uri%20is", + .efragment = "this%3F", }, }; @@ -550,8 +562,9 @@ static struct uri_test uri_http_escaped_improper = { .scheme = "https", .host = "test.ipxe.org", .path = "/wtf?\n", - .query = "kind#of/uri is", - .fragment = "this?", + .epath = "/wt%66%3f\n", + .equery = "kind%23of/uri is", + .efragment = "this?", }, }; @@ -562,6 +575,7 @@ static struct uri_test uri_ipv6 = { .scheme = "http", .host = "[2001:ba8:0:1d4::6950:5845]", .path = "/", + .epath = "/", }, }; @@ -573,6 +587,7 @@ static struct uri_test uri_ipv6_port = { .host = "[2001:ba8:0:1d4::6950:5845]", .port = "8001", .path = "/boot", + .epath = "/boot", }, }; @@ -583,6 +598,7 @@ static struct uri_test uri_ipv6_local = { .scheme = "http", .host = "[fe80::69ff:fe50:5845%net0]", .path = "/ipxe", + .epath = "/ipxe", }, }; @@ -598,6 +614,7 @@ static struct uri_test uri_ipv6_local_non_conforming = { .scheme = "http", .host = "[fe80::69ff:fe50:5845%net0]", .path = "/ipxe", + .epath = "/ipxe", }, }; @@ -625,6 +642,7 @@ static struct uri_test uri_file_absolute = { { .scheme = "file", .path = "/boot/script.ipxe", + .epath = "/boot/script.ipxe", }, }; @@ -635,6 +653,7 @@ static struct uri_test uri_file_volume = { .scheme = "file", .host = "hpilo", .path = "/boot/script.ipxe", + .epath = "/boot/script.ipxe", }, }; @@ -736,6 +755,7 @@ static struct uri_pxe_test uri_pxe_absolute = { .scheme = "http", .host = "not.a.tftp", .path = "/uri", + .epath = "/uri", }, "http://not.a.tftp/uri", }; @@ -754,6 +774,7 @@ static struct uri_pxe_test uri_pxe_absolute_path = { .scheme = "tftp", .host = "192.168.0.2", .path = "//absolute/path", + .epath = "//absolute/path", }, "tftp://192.168.0.2//absolute/path", }; @@ -772,6 +793,7 @@ static struct uri_pxe_test uri_pxe_relative_path = { .scheme = "tftp", .host = "192.168.0.3", .path = "/relative/path", + .epath = "/relative/path", }, "tftp://192.168.0.3/relative/path", }; @@ -790,8 +812,9 @@ static struct uri_pxe_test uri_pxe_icky = { .scheme = "tftp", .host = "10.0.0.6", .path = "/C:\\tftpboot\\icky#path", + .epath = "/C:\\tftpboot\\icky#path", }, - "tftp://10.0.0.6/C%3A\\tftpboot\\icky%23path", + "tftp://10.0.0.6/C:\\tftpboot\\icky#path", }; /** PXE URI with custom port */ @@ -810,6 +833,7 @@ static struct uri_pxe_test uri_pxe_port = { .host = "192.168.0.1", .port = "4069", .path = "//another/path", + .epath = "//another/path", }, "tftp://192.168.0.1:4069//another/path", }; @@ -873,6 +897,7 @@ static struct uri_params_test uri_params = { .scheme = "http", .host = "boot.ipxe.org", .path = "/demo/boot.php", + .epath = "/demo/boot.php", }, NULL, uri_params_list, @@ -902,6 +927,7 @@ static struct uri_params_test uri_named_params = { .host = "192.168.100.4", .port = "3001", .path = "/register", + .epath = "/register", }, "foo", uri_named_params_list, @@ -917,6 +943,7 @@ static void uri_test_exec ( void ) { uri_parse_format_dup_ok ( &uri_empty ); uri_parse_format_dup_ok ( &uri_boot_ipxe_org ); uri_parse_format_dup_ok ( &uri_mailto ); + uri_parse_format_dup_ok ( &uri_host ); uri_parse_format_dup_ok ( &uri_path ); uri_parse_format_dup_ok ( &uri_path_escaped ); uri_parse_format_dup_ok ( &uri_http_all ); diff --git a/src/usr/imgmgmt.c b/src/usr/imgmgmt.c index f8d149153..b7fc8293d 100644 --- a/src/usr/imgmgmt.c +++ b/src/usr/imgmgmt.c @@ -58,8 +58,8 @@ int imgdownload ( struct uri *uri, unsigned long timeout, memcpy ( &uri_redacted, uri, sizeof ( uri_redacted ) ); uri_redacted.user = NULL; uri_redacted.password = NULL; - uri_redacted.query = NULL; - uri_redacted.fragment = NULL; + uri_redacted.equery = NULL; + uri_redacted.efragment = NULL; uri_string_redacted = format_uri_alloc ( &uri_redacted ); if ( ! uri_string_redacted ) { rc = -ENOMEM; |