mirror of
https://github.com/arduino-libraries/ArduinoHttpClient.git
synced 2025-04-19 21:22:15 +03:00
Merge pull request #173 from andreagilardoni/url-parser
Added Url parser
This commit is contained in:
commit
2143747e7d
@ -4,4 +4,4 @@
|
|||||||
ignore-words-list = ,
|
ignore-words-list = ,
|
||||||
check-filenames =
|
check-filenames =
|
||||||
check-hidden =
|
check-hidden =
|
||||||
skip = ./.git
|
skip = ./.git,./src/utility/URLParser
|
||||||
|
29
examples/ParseURL/ParseURL.ino
Normal file
29
examples/ParseURL/ParseURL.ino
Normal file
@ -0,0 +1,29 @@
|
|||||||
|
#include "URLParser.h"
|
||||||
|
|
||||||
|
void setup() {
|
||||||
|
|
||||||
|
Serial.begin(9600);
|
||||||
|
|
||||||
|
while(!Serial);
|
||||||
|
|
||||||
|
Serial.println("starting");
|
||||||
|
|
||||||
|
ParsedUrl url(
|
||||||
|
"https://www.google.com/search?q=arduino"
|
||||||
|
);
|
||||||
|
|
||||||
|
Serial.print("parsed URL schema: \"");
|
||||||
|
Serial.print(url.schema());
|
||||||
|
Serial.print("\"\nparsed URL host: \"");
|
||||||
|
Serial.print(url.host());
|
||||||
|
Serial.print("\"\nparsed URL path: \"");
|
||||||
|
Serial.print(url.path());
|
||||||
|
Serial.print("\"\nparsed URL query: \"");
|
||||||
|
Serial.print(url.query());
|
||||||
|
Serial.print("\"\nparsed URL userinfo: \"");
|
||||||
|
Serial.print(url.userinfo());
|
||||||
|
Serial.println("\"");
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
void loop() { }
|
108
src/URLParser.h
Normal file
108
src/URLParser.h
Normal file
@ -0,0 +1,108 @@
|
|||||||
|
/*
|
||||||
|
* PackageLicenseDeclared: Apache-2.0
|
||||||
|
* Copyright (c) 2017 ARM Limited
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/*
|
||||||
|
* The following class is defined in mbed libraries, in case of STM32H7 include the original library
|
||||||
|
*/
|
||||||
|
#if defined __has_include
|
||||||
|
# if __has_include(<utility/http_parsed_url.h>)
|
||||||
|
# include <utility/http_parsed_url.h>
|
||||||
|
# else
|
||||||
|
# define NO_HTTP_PARSED
|
||||||
|
# endif
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef NO_HTTP_PARSED
|
||||||
|
#ifndef _MBED_HTTP_PARSED_URL_H_
|
||||||
|
#define _MBED_HTTP_PARSED_URL_H_
|
||||||
|
|
||||||
|
#include "utility/URLParser/http_parser.h"
|
||||||
|
|
||||||
|
class ParsedUrl {
|
||||||
|
public:
|
||||||
|
ParsedUrl(const char* url) {
|
||||||
|
struct http_parser_url parsed_url;
|
||||||
|
http_parser_parse_url(url, strlen(url), false, &parsed_url);
|
||||||
|
|
||||||
|
for (size_t ix = 0; ix < UF_MAX; ix++) {
|
||||||
|
char* value;
|
||||||
|
if (parsed_url.field_set & (1 << ix)) {
|
||||||
|
value = (char*)calloc(parsed_url.field_data[ix].len + 1, 1);
|
||||||
|
memcpy(value, url + parsed_url.field_data[ix].off,
|
||||||
|
parsed_url.field_data[ix].len);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
value = (char*)calloc(1, 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
switch ((http_parser_url_fields)ix) {
|
||||||
|
case UF_SCHEMA: _schema = value; break;
|
||||||
|
case UF_HOST: _host = value; break;
|
||||||
|
case UF_PATH: _path = value; break;
|
||||||
|
case UF_QUERY: _query = value; break;
|
||||||
|
case UF_USERINFO: _userinfo = value; break;
|
||||||
|
default:
|
||||||
|
// PORT is already parsed, FRAGMENT is not relevant for HTTP requests
|
||||||
|
free(value);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
_port = parsed_url.port;
|
||||||
|
if (!_port) {
|
||||||
|
if (strcmp(_schema, "https") == 0 || strcmp(_schema, "wss") == 0) {
|
||||||
|
_port = 443;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
_port = 80;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (strcmp(_path, "") == 0) {
|
||||||
|
free(_path);
|
||||||
|
_path = (char*)calloc(2, 1);
|
||||||
|
_path[0] = '/';
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
~ParsedUrl() {
|
||||||
|
if (_schema) free(_schema);
|
||||||
|
if (_host) free(_host);
|
||||||
|
if (_path) free(_path);
|
||||||
|
if (_query) free(_query);
|
||||||
|
if (_userinfo) free(_userinfo);
|
||||||
|
}
|
||||||
|
|
||||||
|
uint16_t port() const { return _port; }
|
||||||
|
char* schema() const { return _schema; }
|
||||||
|
char* host() const { return _host; }
|
||||||
|
char* path() const { return _path; }
|
||||||
|
char* query() const { return _query; }
|
||||||
|
char* userinfo() const { return _userinfo; }
|
||||||
|
|
||||||
|
private:
|
||||||
|
uint16_t _port;
|
||||||
|
char* _schema;
|
||||||
|
char* _host;
|
||||||
|
char* _path;
|
||||||
|
char* _query;
|
||||||
|
char* _userinfo;
|
||||||
|
};
|
||||||
|
|
||||||
|
#endif // _MBED_HTTP_PARSED_URL_H_
|
||||||
|
#endif // NO_HTTP_PARSED
|
||||||
|
#undef NO_HTTP_PARSED
|
23
src/utility/URLParser/LICENSE
Normal file
23
src/utility/URLParser/LICENSE
Normal file
@ -0,0 +1,23 @@
|
|||||||
|
http_parser.c is based on src/http/ngx_http_parse.c from NGINX copyright
|
||||||
|
Igor Sysoev.
|
||||||
|
|
||||||
|
Additional changes are licensed under the same terms as NGINX and
|
||||||
|
copyright Joyent, Inc. and other Node contributors. All rights reserved.
|
||||||
|
|
||||||
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
of this software and associated documentation files (the "Software"), to
|
||||||
|
deal in the Software without restriction, including without limitation the
|
||||||
|
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||||
|
sell copies of the Software, and to permit persons to whom the Software is
|
||||||
|
furnished to do so, subject to the following conditions:
|
||||||
|
|
||||||
|
The above copyright notice and this permission notice shall be included in
|
||||||
|
all copies or substantial portions of the Software.
|
||||||
|
|
||||||
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||||
|
IN THE SOFTWARE.
|
5
src/utility/URLParser/README.md
Normal file
5
src/utility/URLParser/README.md
Normal file
@ -0,0 +1,5 @@
|
|||||||
|
# http_parser library
|
||||||
|
|
||||||
|
This code is imported from: https://github.com/arduino/ArduinoCore-mbed/tree/4.1.1/libraries/SocketWrapper/src/utility/http_parser
|
||||||
|
|
||||||
|
The code is shrinked in size by deleting all the unrelated code to url parse.
|
591
src/utility/URLParser/http_parser.c
Normal file
591
src/utility/URLParser/http_parser.c
Normal file
@ -0,0 +1,591 @@
|
|||||||
|
#if defined __has_include
|
||||||
|
# if ! __has_include(<utility/http_parser/http_parser.h>) && ! __has_include(<http_parser.h>)
|
||||||
|
# define NO_HTTP_PARSER
|
||||||
|
# endif
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef NO_HTTP_PARSER
|
||||||
|
/* Based on src/http/ngx_http_parse.c from NGINX copyright Igor Sysoev
|
||||||
|
*
|
||||||
|
* Additional changes are licensed under the same terms as NGINX and
|
||||||
|
* copyright Joyent, Inc. and other Node contributors. All rights reserved.
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
* of this software and associated documentation files (the "Software"), to
|
||||||
|
* deal in the Software without restriction, including without limitation the
|
||||||
|
* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||||
|
* sell copies of the Software, and to permit persons to whom the Software is
|
||||||
|
* furnished to do so, subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be included in
|
||||||
|
* all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||||
|
* IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
#include "http_parser.h"
|
||||||
|
#include <assert.h>
|
||||||
|
#include <stddef.h>
|
||||||
|
#include <ctype.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include <limits.h>
|
||||||
|
|
||||||
|
#ifndef BIT_AT
|
||||||
|
# define BIT_AT(a, i) \
|
||||||
|
(!!((unsigned int) (a)[(unsigned int) (i) >> 3] & \
|
||||||
|
(1 << ((unsigned int) (i) & 7))))
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#define SET_ERRNO(e) \
|
||||||
|
do { \
|
||||||
|
parser->http_errno = (e); \
|
||||||
|
} while(0)
|
||||||
|
|
||||||
|
#if HTTP_PARSER_STRICT
|
||||||
|
# define T(v) 0
|
||||||
|
#else
|
||||||
|
# define T(v) v
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
static const uint8_t normal_url_char[32] = {
|
||||||
|
/* 0 nul 1 soh 2 stx 3 etx 4 eot 5 enq 6 ack 7 bel */
|
||||||
|
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0,
|
||||||
|
/* 8 bs 9 ht 10 nl 11 vt 12 np 13 cr 14 so 15 si */
|
||||||
|
0 | T(2) | 0 | 0 | T(16) | 0 | 0 | 0,
|
||||||
|
/* 16 dle 17 dc1 18 dc2 19 dc3 20 dc4 21 nak 22 syn 23 etb */
|
||||||
|
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0,
|
||||||
|
/* 24 can 25 em 26 sub 27 esc 28 fs 29 gs 30 rs 31 us */
|
||||||
|
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0,
|
||||||
|
/* 32 sp 33 ! 34 " 35 # 36 $ 37 % 38 & 39 ' */
|
||||||
|
0 | 2 | 4 | 0 | 16 | 32 | 64 | 128,
|
||||||
|
/* 40 ( 41 ) 42 * 43 + 44 , 45 - 46 . 47 / */
|
||||||
|
1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
|
||||||
|
/* 48 0 49 1 50 2 51 3 52 4 53 5 54 6 55 7 */
|
||||||
|
1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
|
||||||
|
/* 56 8 57 9 58 : 59 ; 60 < 61 = 62 > 63 ? */
|
||||||
|
1 | 2 | 4 | 8 | 16 | 32 | 64 | 0,
|
||||||
|
/* 64 @ 65 A 66 B 67 C 68 D 69 E 70 F 71 G */
|
||||||
|
1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
|
||||||
|
/* 72 H 73 I 74 J 75 K 76 L 77 M 78 N 79 O */
|
||||||
|
1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
|
||||||
|
/* 80 P 81 Q 82 R 83 S 84 T 85 U 86 V 87 W */
|
||||||
|
1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
|
||||||
|
/* 88 X 89 Y 90 Z 91 [ 92 \ 93 ] 94 ^ 95 _ */
|
||||||
|
1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
|
||||||
|
/* 96 ` 97 a 98 b 99 c 100 d 101 e 102 f 103 g */
|
||||||
|
1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
|
||||||
|
/* 104 h 105 i 106 j 107 k 108 l 109 m 110 n 111 o */
|
||||||
|
1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
|
||||||
|
/* 112 p 113 q 114 r 115 s 116 t 117 u 118 v 119 w */
|
||||||
|
1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
|
||||||
|
/* 120 x 121 y 122 z 123 { 124 | 125 } 126 ~ 127 del */
|
||||||
|
1 | 2 | 4 | 8 | 16 | 32 | 64 | 0, };
|
||||||
|
|
||||||
|
#undef T
|
||||||
|
|
||||||
|
enum state
|
||||||
|
{ s_dead = 1 /* important that this is > 0 */
|
||||||
|
|
||||||
|
, s_start_req
|
||||||
|
|
||||||
|
, s_req_spaces_before_url
|
||||||
|
, s_req_schema
|
||||||
|
, s_req_schema_slash
|
||||||
|
, s_req_schema_slash_slash
|
||||||
|
, s_req_server_start
|
||||||
|
, s_req_server
|
||||||
|
, s_req_server_with_at
|
||||||
|
, s_req_path
|
||||||
|
, s_req_query_string_start
|
||||||
|
, s_req_query_string
|
||||||
|
, s_req_fragment_start
|
||||||
|
, s_req_fragment
|
||||||
|
, s_headers_done
|
||||||
|
};
|
||||||
|
|
||||||
|
enum http_host_state
|
||||||
|
{
|
||||||
|
s_http_host_dead = 1
|
||||||
|
, s_http_userinfo_start
|
||||||
|
, s_http_userinfo
|
||||||
|
, s_http_host_start
|
||||||
|
, s_http_host_v6_start
|
||||||
|
, s_http_host
|
||||||
|
, s_http_host_v6
|
||||||
|
, s_http_host_v6_end
|
||||||
|
, s_http_host_v6_zone_start
|
||||||
|
, s_http_host_v6_zone
|
||||||
|
, s_http_host_port_start
|
||||||
|
, s_http_host_port
|
||||||
|
};
|
||||||
|
|
||||||
|
/* Macros for character classes; depends on strict-mode */
|
||||||
|
#define LOWER(c) (unsigned char)(c | 0x20)
|
||||||
|
#define IS_ALPHA(c) (LOWER(c) >= 'a' && LOWER(c) <= 'z')
|
||||||
|
#define IS_NUM(c) ((c) >= '0' && (c) <= '9')
|
||||||
|
#define IS_ALPHANUM(c) (IS_ALPHA(c) || IS_NUM(c))
|
||||||
|
#define IS_HEX(c) (IS_NUM(c) || (LOWER(c) >= 'a' && LOWER(c) <= 'f'))
|
||||||
|
#define IS_MARK(c) ((c) == '-' || (c) == '_' || (c) == '.' || \
|
||||||
|
(c) == '!' || (c) == '~' || (c) == '*' || (c) == '\'' || (c) == '(' || \
|
||||||
|
(c) == ')')
|
||||||
|
#define IS_USERINFO_CHAR(c) (IS_ALPHANUM(c) || IS_MARK(c) || (c) == '%' || \
|
||||||
|
(c) == ';' || (c) == ':' || (c) == '&' || (c) == '=' || (c) == '+' || \
|
||||||
|
(c) == '$' || (c) == ',')
|
||||||
|
|
||||||
|
#if HTTP_PARSER_STRICT
|
||||||
|
#define IS_URL_CHAR(c) (BIT_AT(normal_url_char, (unsigned char)c))
|
||||||
|
#define IS_HOST_CHAR(c) (IS_ALPHANUM(c) || (c) == '.' || (c) == '-')
|
||||||
|
#else
|
||||||
|
#define IS_URL_CHAR(c) \
|
||||||
|
(BIT_AT(normal_url_char, (unsigned char)c) || ((c) & 0x80))
|
||||||
|
#define IS_HOST_CHAR(c) \
|
||||||
|
(IS_ALPHANUM(c) || (c) == '.' || (c) == '-' || (c) == '_')
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* Our URL parser.
|
||||||
|
*
|
||||||
|
* This is designed to be shared by http_parser_execute() for URL validation,
|
||||||
|
* hence it has a state transition + byte-for-byte interface. In addition, it
|
||||||
|
* is meant to be embedded in http_parser_parse_url(), which does the dirty
|
||||||
|
* work of turning state transitions URL components for its API.
|
||||||
|
*
|
||||||
|
* This function should only be invoked with non-space characters. It is
|
||||||
|
* assumed that the caller cares about (and can detect) the transition between
|
||||||
|
* URL and non-URL states by looking for these.
|
||||||
|
*/
|
||||||
|
static enum state
|
||||||
|
parse_url_char(enum state s, const char ch)
|
||||||
|
{
|
||||||
|
if (ch == ' ' || ch == '\r' || ch == '\n') {
|
||||||
|
return s_dead;
|
||||||
|
}
|
||||||
|
|
||||||
|
#if HTTP_PARSER_STRICT
|
||||||
|
if (ch == '\t' || ch == '\f') {
|
||||||
|
return s_dead;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
switch (s) {
|
||||||
|
case s_req_spaces_before_url:
|
||||||
|
/* Proxied requests are followed by scheme of an absolute URI (alpha).
|
||||||
|
* All methods except CONNECT are followed by '/' or '*'.
|
||||||
|
*/
|
||||||
|
|
||||||
|
if (ch == '/' || ch == '*') {
|
||||||
|
return s_req_path;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (IS_ALPHA(ch)) {
|
||||||
|
return s_req_schema;
|
||||||
|
}
|
||||||
|
|
||||||
|
break;
|
||||||
|
|
||||||
|
case s_req_schema:
|
||||||
|
if (IS_ALPHA(ch)) {
|
||||||
|
return s;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ch == ':') {
|
||||||
|
return s_req_schema_slash;
|
||||||
|
}
|
||||||
|
|
||||||
|
break;
|
||||||
|
|
||||||
|
case s_req_schema_slash:
|
||||||
|
if (ch == '/') {
|
||||||
|
return s_req_schema_slash_slash;
|
||||||
|
}
|
||||||
|
|
||||||
|
break;
|
||||||
|
|
||||||
|
case s_req_schema_slash_slash:
|
||||||
|
if (ch == '/') {
|
||||||
|
return s_req_server_start;
|
||||||
|
}
|
||||||
|
|
||||||
|
break;
|
||||||
|
|
||||||
|
case s_req_server_with_at:
|
||||||
|
if (ch == '@') {
|
||||||
|
return s_dead;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* FALLTHROUGH */
|
||||||
|
case s_req_server_start:
|
||||||
|
case s_req_server:
|
||||||
|
if (ch == '/') {
|
||||||
|
return s_req_path;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ch == '?') {
|
||||||
|
return s_req_query_string_start;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ch == '@') {
|
||||||
|
return s_req_server_with_at;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (IS_USERINFO_CHAR(ch) || ch == '[' || ch == ']') {
|
||||||
|
return s_req_server;
|
||||||
|
}
|
||||||
|
|
||||||
|
break;
|
||||||
|
|
||||||
|
case s_req_path:
|
||||||
|
if (IS_URL_CHAR(ch)) {
|
||||||
|
return s;
|
||||||
|
}
|
||||||
|
|
||||||
|
switch (ch) {
|
||||||
|
case '?':
|
||||||
|
return s_req_query_string_start;
|
||||||
|
|
||||||
|
case '#':
|
||||||
|
return s_req_fragment_start;
|
||||||
|
}
|
||||||
|
|
||||||
|
break;
|
||||||
|
|
||||||
|
case s_req_query_string_start:
|
||||||
|
case s_req_query_string:
|
||||||
|
if (IS_URL_CHAR(ch)) {
|
||||||
|
return s_req_query_string;
|
||||||
|
}
|
||||||
|
|
||||||
|
switch (ch) {
|
||||||
|
case '?':
|
||||||
|
/* allow extra '?' in query string */
|
||||||
|
return s_req_query_string;
|
||||||
|
|
||||||
|
case '#':
|
||||||
|
return s_req_fragment_start;
|
||||||
|
}
|
||||||
|
|
||||||
|
break;
|
||||||
|
|
||||||
|
case s_req_fragment_start:
|
||||||
|
if (IS_URL_CHAR(ch)) {
|
||||||
|
return s_req_fragment;
|
||||||
|
}
|
||||||
|
|
||||||
|
switch (ch) {
|
||||||
|
case '?':
|
||||||
|
return s_req_fragment;
|
||||||
|
|
||||||
|
case '#':
|
||||||
|
return s;
|
||||||
|
}
|
||||||
|
|
||||||
|
break;
|
||||||
|
|
||||||
|
case s_req_fragment:
|
||||||
|
if (IS_URL_CHAR(ch)) {
|
||||||
|
return s;
|
||||||
|
}
|
||||||
|
|
||||||
|
switch (ch) {
|
||||||
|
case '?':
|
||||||
|
case '#':
|
||||||
|
return s;
|
||||||
|
}
|
||||||
|
|
||||||
|
break;
|
||||||
|
|
||||||
|
default:
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* We should never fall out of the switch above unless there's an error */
|
||||||
|
return s_dead;
|
||||||
|
}
|
||||||
|
|
||||||
|
static enum http_host_state
|
||||||
|
http_parse_host_char(enum http_host_state s, const char ch) {
|
||||||
|
switch(s) {
|
||||||
|
case s_http_userinfo:
|
||||||
|
case s_http_userinfo_start:
|
||||||
|
if (ch == '@') {
|
||||||
|
return s_http_host_start;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (IS_USERINFO_CHAR(ch)) {
|
||||||
|
return s_http_userinfo;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
|
case s_http_host_start:
|
||||||
|
if (ch == '[') {
|
||||||
|
return s_http_host_v6_start;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (IS_HOST_CHAR(ch)) {
|
||||||
|
return s_http_host;
|
||||||
|
}
|
||||||
|
|
||||||
|
break;
|
||||||
|
|
||||||
|
case s_http_host:
|
||||||
|
if (IS_HOST_CHAR(ch)) {
|
||||||
|
return s_http_host;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* FALLTHROUGH */
|
||||||
|
case s_http_host_v6_end:
|
||||||
|
if (ch == ':') {
|
||||||
|
return s_http_host_port_start;
|
||||||
|
}
|
||||||
|
|
||||||
|
break;
|
||||||
|
|
||||||
|
case s_http_host_v6:
|
||||||
|
if (ch == ']') {
|
||||||
|
return s_http_host_v6_end;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* FALLTHROUGH */
|
||||||
|
case s_http_host_v6_start:
|
||||||
|
if (IS_HEX(ch) || ch == ':' || ch == '.') {
|
||||||
|
return s_http_host_v6;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (s == s_http_host_v6 && ch == '%') {
|
||||||
|
return s_http_host_v6_zone_start;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
|
case s_http_host_v6_zone:
|
||||||
|
if (ch == ']') {
|
||||||
|
return s_http_host_v6_end;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* FALLTHROUGH */
|
||||||
|
case s_http_host_v6_zone_start:
|
||||||
|
/* RFC 6874 Zone ID consists of 1*( unreserved / pct-encoded) */
|
||||||
|
if (IS_ALPHANUM(ch) || ch == '%' || ch == '.' || ch == '-' || ch == '_' ||
|
||||||
|
ch == '~') {
|
||||||
|
return s_http_host_v6_zone;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
|
case s_http_host_port:
|
||||||
|
case s_http_host_port_start:
|
||||||
|
if (IS_NUM(ch)) {
|
||||||
|
return s_http_host_port;
|
||||||
|
}
|
||||||
|
|
||||||
|
break;
|
||||||
|
|
||||||
|
default:
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
return s_http_host_dead;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int
|
||||||
|
http_parse_host(const char * buf, struct http_parser_url *u, int found_at) {
|
||||||
|
enum http_host_state s;
|
||||||
|
|
||||||
|
const char *p;
|
||||||
|
uint32_t buflen = u->field_data[UF_HOST].off + u->field_data[UF_HOST].len;
|
||||||
|
|
||||||
|
assert(u->field_set & (1 << UF_HOST));
|
||||||
|
|
||||||
|
u->field_data[UF_HOST].len = 0;
|
||||||
|
|
||||||
|
s = found_at ? s_http_userinfo_start : s_http_host_start;
|
||||||
|
|
||||||
|
for (p = buf + u->field_data[UF_HOST].off; p < buf + buflen; p++) {
|
||||||
|
enum http_host_state new_s = http_parse_host_char(s, *p);
|
||||||
|
|
||||||
|
if (new_s == s_http_host_dead) {
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
switch(new_s) {
|
||||||
|
case s_http_host:
|
||||||
|
if (s != s_http_host) {
|
||||||
|
u->field_data[UF_HOST].off = p - buf;
|
||||||
|
}
|
||||||
|
u->field_data[UF_HOST].len++;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case s_http_host_v6:
|
||||||
|
if (s != s_http_host_v6) {
|
||||||
|
u->field_data[UF_HOST].off = p - buf;
|
||||||
|
}
|
||||||
|
u->field_data[UF_HOST].len++;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case s_http_host_v6_zone_start:
|
||||||
|
case s_http_host_v6_zone:
|
||||||
|
u->field_data[UF_HOST].len++;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case s_http_host_port:
|
||||||
|
if (s != s_http_host_port) {
|
||||||
|
u->field_data[UF_PORT].off = p - buf;
|
||||||
|
u->field_data[UF_PORT].len = 0;
|
||||||
|
u->field_set |= (1 << UF_PORT);
|
||||||
|
}
|
||||||
|
u->field_data[UF_PORT].len++;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case s_http_userinfo:
|
||||||
|
if (s != s_http_userinfo) {
|
||||||
|
u->field_data[UF_USERINFO].off = p - buf ;
|
||||||
|
u->field_data[UF_USERINFO].len = 0;
|
||||||
|
u->field_set |= (1 << UF_USERINFO);
|
||||||
|
}
|
||||||
|
u->field_data[UF_USERINFO].len++;
|
||||||
|
break;
|
||||||
|
|
||||||
|
default:
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
s = new_s;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Make sure we don't end somewhere unexpected */
|
||||||
|
switch (s) {
|
||||||
|
case s_http_host_start:
|
||||||
|
case s_http_host_v6_start:
|
||||||
|
case s_http_host_v6:
|
||||||
|
case s_http_host_v6_zone_start:
|
||||||
|
case s_http_host_v6_zone:
|
||||||
|
case s_http_host_port_start:
|
||||||
|
case s_http_userinfo:
|
||||||
|
case s_http_userinfo_start:
|
||||||
|
return 1;
|
||||||
|
default:
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
http_parser_url_init(struct http_parser_url *u) {
|
||||||
|
memset(u, 0, sizeof(*u));
|
||||||
|
}
|
||||||
|
|
||||||
|
int
|
||||||
|
http_parser_parse_url(const char *buf, uint32_t buflen, int is_connect,
|
||||||
|
struct http_parser_url *u)
|
||||||
|
{
|
||||||
|
enum state s;
|
||||||
|
const char *p;
|
||||||
|
enum http_parser_url_fields uf, old_uf;
|
||||||
|
int found_at = 0;
|
||||||
|
|
||||||
|
u->port = u->field_set = 0;
|
||||||
|
s = is_connect ? s_req_server_start : s_req_spaces_before_url;
|
||||||
|
old_uf = UF_MAX;
|
||||||
|
|
||||||
|
for (p = buf; p < buf + buflen; p++) {
|
||||||
|
s = parse_url_char(s, *p);
|
||||||
|
|
||||||
|
/* Figure out the next field that we're operating on */
|
||||||
|
switch (s) {
|
||||||
|
case s_dead:
|
||||||
|
return 1;
|
||||||
|
|
||||||
|
/* Skip delimeters */
|
||||||
|
case s_req_schema_slash:
|
||||||
|
case s_req_schema_slash_slash:
|
||||||
|
case s_req_server_start:
|
||||||
|
case s_req_query_string_start:
|
||||||
|
case s_req_fragment_start:
|
||||||
|
continue;
|
||||||
|
|
||||||
|
case s_req_schema:
|
||||||
|
uf = UF_SCHEMA;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case s_req_server_with_at:
|
||||||
|
found_at = 1;
|
||||||
|
|
||||||
|
/* FALLTROUGH */
|
||||||
|
case s_req_server:
|
||||||
|
uf = UF_HOST;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case s_req_path:
|
||||||
|
uf = UF_PATH;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case s_req_query_string:
|
||||||
|
uf = UF_QUERY;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case s_req_fragment:
|
||||||
|
uf = UF_FRAGMENT;
|
||||||
|
break;
|
||||||
|
|
||||||
|
default:
|
||||||
|
assert(!"Unexpected state");
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Nothing's changed; soldier on */
|
||||||
|
if (uf == old_uf) {
|
||||||
|
u->field_data[uf].len++;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
u->field_data[uf].off = p - buf;
|
||||||
|
u->field_data[uf].len = 1;
|
||||||
|
|
||||||
|
u->field_set |= (1 << uf);
|
||||||
|
old_uf = uf;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* host must be present if there is a schema */
|
||||||
|
/* parsing http:///toto will fail */
|
||||||
|
if ((u->field_set & (1 << UF_SCHEMA)) &&
|
||||||
|
(u->field_set & (1 << UF_HOST)) == 0) {
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (u->field_set & (1 << UF_HOST)) {
|
||||||
|
if (http_parse_host(buf, u, found_at) != 0) {
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* CONNECT requests can only contain "hostname:port" */
|
||||||
|
if (is_connect && u->field_set != ((1 << UF_HOST)|(1 << UF_PORT))) {
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (u->field_set & (1 << UF_PORT)) {
|
||||||
|
/* Don't bother with endp; we've already validated the string */
|
||||||
|
unsigned long v = strtoul(buf + u->field_data[UF_PORT].off, NULL, 10);
|
||||||
|
|
||||||
|
/* Ports have a max value of 2^16 */
|
||||||
|
if (v > 0xffff) {
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
u->port = (uint16_t) v;
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
unsigned long
|
||||||
|
http_parser_version(void) {
|
||||||
|
return HTTP_PARSER_VERSION_MAJOR * 0x10000 |
|
||||||
|
HTTP_PARSER_VERSION_MINOR * 0x00100 |
|
||||||
|
HTTP_PARSER_VERSION_PATCH * 0x00001;
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif // NO_HTTP_PARSER
|
96
src/utility/URLParser/http_parser.h
Normal file
96
src/utility/URLParser/http_parser.h
Normal file
@ -0,0 +1,96 @@
|
|||||||
|
/* Copyright Joyent, Inc. and other Node contributors. All rights reserved.
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
* of this software and associated documentation files (the "Software"), to
|
||||||
|
* deal in the Software without restriction, including without limitation the
|
||||||
|
* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||||
|
* sell copies of the Software, and to permit persons to whom the Software is
|
||||||
|
* furnished to do so, subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be included in
|
||||||
|
* all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||||
|
* IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
#ifndef http_parser_h
|
||||||
|
#define http_parser_h
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* Also update SONAME in the Makefile whenever you change these. */
|
||||||
|
#define HTTP_PARSER_VERSION_MAJOR 2
|
||||||
|
#define HTTP_PARSER_VERSION_MINOR 7
|
||||||
|
#define HTTP_PARSER_VERSION_PATCH 1
|
||||||
|
|
||||||
|
#include <stdint.h>
|
||||||
|
|
||||||
|
/* Compile with -DHTTP_PARSER_STRICT=0 to make less checks, but run
|
||||||
|
* faster
|
||||||
|
*/
|
||||||
|
#ifndef HTTP_PARSER_STRICT
|
||||||
|
# define HTTP_PARSER_STRICT 1
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
enum http_parser_url_fields
|
||||||
|
{ UF_SCHEMA = 0
|
||||||
|
, UF_HOST = 1
|
||||||
|
, UF_PORT = 2
|
||||||
|
, UF_PATH = 3
|
||||||
|
, UF_QUERY = 4
|
||||||
|
, UF_FRAGMENT = 5
|
||||||
|
, UF_USERINFO = 6
|
||||||
|
, UF_MAX = 7
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
/* Result structure for http_parser_parse_url().
|
||||||
|
*
|
||||||
|
* Callers should index into field_data[] with UF_* values iff field_set
|
||||||
|
* has the relevant (1 << UF_*) bit set. As a courtesy to clients (and
|
||||||
|
* because we probably have padding left over), we convert any port to
|
||||||
|
* a uint16_t.
|
||||||
|
*/
|
||||||
|
struct http_parser_url {
|
||||||
|
uint16_t field_set; /* Bitmask of (1 << UF_*) values */
|
||||||
|
uint16_t port; /* Converted UF_PORT string */
|
||||||
|
|
||||||
|
struct {
|
||||||
|
uint16_t off; /* Offset into buffer in which field starts */
|
||||||
|
uint16_t len; /* Length of run in buffer */
|
||||||
|
} field_data[UF_MAX];
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
/* Returns the library version. Bits 16-23 contain the major version number,
|
||||||
|
* bits 8-15 the minor version number and bits 0-7 the patch level.
|
||||||
|
* Usage example:
|
||||||
|
*
|
||||||
|
* unsigned long version = http_parser_version();
|
||||||
|
* unsigned major = (version >> 16) & 255;
|
||||||
|
* unsigned minor = (version >> 8) & 255;
|
||||||
|
* unsigned patch = version & 255;
|
||||||
|
* printf("http_parser v%u.%u.%u\n", major, minor, patch);
|
||||||
|
*/
|
||||||
|
unsigned long http_parser_version(void);
|
||||||
|
|
||||||
|
/* Initialize all http_parser_url members to 0 */
|
||||||
|
void http_parser_url_init(struct http_parser_url *u);
|
||||||
|
|
||||||
|
/* Parse a URL; return nonzero on failure */
|
||||||
|
int http_parser_parse_url(const char *buf, uint32_t buflen,
|
||||||
|
int is_connect,
|
||||||
|
struct http_parser_url *u);
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
#endif
|
Loading…
x
Reference in New Issue
Block a user