1 module yu.tools.http1xparser.url; 2 3 import yu.tools.http1xparser.default_; 4 import yu.traits; 5 6 @trusted : 7 8 enum URLFieldsType : ushort 9 { 10 UF_SCHEMA = 0 , 11 UF_HOST = 1 , 12 UF_PORT = 2 , 13 UF_PATH = 3 , 14 UF_QUERY = 4 , 15 UF_FRAGMENT = 5 , 16 UF_USERINFO = 6 , 17 UF_MAX = 7 18 } 19 20 /* Result structure for httpParserURL(). 21 * 22 * Callers should index into fieldData[] with UF_* values iff field_set 23 * has the relevant (1 << UF_*) bit set. As a courtesy to clients (and 24 * because we probably have padding left over), we convert any port to 25 * a uint16_t. 26 */ 27 struct ParserdUrl 28 { 29 ushort fieldSet; /* Bitmask of (1 << UF_*) values */ 30 ushort port; /* Converted UF_PORT string */ 31 32 struct Field { 33 ushort off; /* Offset into buffer in which field starts */ 34 ushort len; /* Length of run in buffer */ 35 } 36 Field[URLFieldsType.UF_MAX] fieldData; 37 38 pragma(inline,true) 39 bool hasField(URLFieldsType type) nothrow @nogc 40 { 41 return (fieldSet & ( 1 << type)) > 0; 42 } 43 44 auto getField(CHAR)(CHAR[] url, URLFieldsType type) @nogc nothrow 45 { 46 size_t max = (fieldData[type].off + fieldData[type].len); 47 if(!hasField(type) || max > url.length) 48 return (CHAR[]).init; 49 return url[fieldData[type].off .. max]; 50 } 51 } 52 53 //is_connect = true 方法将进行严格检验,如果URL中没有port、schema将导致 httpParserURL 方法失败 54 bool httpParserURL(bool strict = false, CHAR)(CHAR[] url , out ParserdUrl u) @nogc nothrow 55 if(isCharByte!CHAR) 56 { 57 const ubyte[] data = cast(const ubyte[])(url); 58 HTTPParserState s; 59 size_t p; 60 URLFieldsType uf, old_uf; 61 bool found_at = false; 62 63 s = strict ? HTTPParserState.s_req_server_start : HTTPParserState.s_req_spaces_before_url; 64 old_uf = URLFieldsType.UF_MAX; 65 //import std.experimental.logger; 66 for (p = 0; p < data.length; p++) with(URLFieldsType){ 67 const ubyte ch = data[p]; 68 s = parseURLchar(s, ch); 69 //trace("ch == ", cast(char)ch, " type = ", s); 70 /* Figure out the next field that we're operating on */ 71 switch (s) with(HTTPParserState){ 72 case s_dead: 73 return false; 74 75 /* Skip delimeters */ 76 case s_req_schema_slash: 77 case s_req_schema_slash_slash: 78 case s_req_server_start: 79 case s_req_query_string_start: 80 case s_req_fragment_start: 81 continue; 82 83 case s_req_schema: 84 uf = UF_SCHEMA; 85 break; 86 87 case s_req_server_with_at: 88 found_at = true; 89 goto case; 90 /* FALLTROUGH */ 91 case s_req_server: 92 uf = UF_HOST; 93 break; 94 95 case s_req_path: 96 uf = UF_PATH; 97 break; 98 99 case s_req_query_string: 100 uf = UF_QUERY; 101 break; 102 103 case s_req_fragment: 104 uf = UF_FRAGMENT; 105 break; 106 107 default: 108 assert(!"Unexpected state"); 109 return false; 110 } 111 112 /* Nothing's changed; soldier on */ 113 if (uf == old_uf) { 114 u.fieldData[uf].len++; 115 continue; 116 } 117 118 u.fieldData[uf].off = cast(ushort)p; 119 u.fieldData[uf].len = 1; 120 121 u.fieldSet |= (1 << uf); 122 old_uf = uf; 123 } 124 125 /* host must be present if there is a schema */ 126 /* parsing http:///toto will fail */ 127 if (u.hasField(URLFieldsType.UF_SCHEMA) && (!u.hasField(URLFieldsType.UF_HOST))) { 128 return false; 129 } 130 if (u.hasField(URLFieldsType.UF_HOST)) { 131 if(!parserHost(data, u, found_at)) { 132 return false; 133 } 134 } 135 /* CONNECT requests can only contain "hostname:port" */ 136 if (strict && u.fieldSet != ((1 << URLFieldsType.UF_HOST)|(1 << URLFieldsType.UF_PORT))) { 137 return false; 138 } 139 140 if (u.hasField(URLFieldsType.UF_PORT)) { 141 import core.stdc.stdlib : strtoul; 142 /* Don't bother with endp; we've already validated the string */ 143 const ubyte[] tdata = data[u.fieldData[URLFieldsType.UF_PORT].off..$]; 144 ulong v = strtoul(cast(const char*)(tdata.ptr), null, 10); 145 146 /* Ports have a max value of 2^16 */ 147 if (v > ushort.max) return false; 148 149 u.port = cast(ushort)v; 150 } 151 152 return true; 153 } 154 155 package: 156 157 HTTPParserHostState parserHostChar(HTTPParserHostState s, ubyte ch) @nogc nothrow { 158 switch(s) with(HTTPParserHostState) 159 { 160 case s_http_userinfo: 161 case s_http_userinfo_start: 162 if (ch == '@') { 163 return s_http_host_start; 164 } 165 166 if (IS_USERINFO_CHAR2(ch)) { 167 return s_http_userinfo; 168 } 169 break; 170 171 case s_http_host_start: 172 if (ch == '[') { 173 return s_http_host_v6_start; 174 } 175 176 if (IS_HOST_CHAR(ch)) { 177 return s_http_host; 178 } 179 180 break; 181 182 case s_http_host: 183 if (IS_HOST_CHAR(ch)) { 184 return s_http_host; 185 } 186 goto case; 187 /* FALLTHROUGH */ 188 case s_http_host_v6_end: 189 if (ch == ':') { 190 return s_http_host_port_start; 191 } 192 193 break; 194 195 case s_http_host_v6: 196 if (ch == ']') { 197 return s_http_host_v6_end; 198 } 199 goto case; 200 /* FALLTHROUGH */ 201 case s_http_host_v6_start: 202 if (IS_HEX(ch) || ch == ':' || ch == '.') { 203 return s_http_host_v6; 204 } 205 206 if (s == s_http_host_v6 && ch == '%') { 207 return s_http_host_v6_zone_start; 208 } 209 break; 210 211 case s_http_host_v6_zone: 212 if (ch == ']') { 213 return s_http_host_v6_end; 214 } 215 goto case; 216 /* FALLTHROUGH */ 217 case s_http_host_v6_zone_start: 218 /* RFC 6874 Zone ID consists of 1*( unreserved / pct-encoded) */ 219 if (IS_ALPHANUM(ch) || ch == '%' || ch == '.' || ch == '-' || ch == '_' || 220 ch == '~') { 221 return s_http_host_v6_zone; 222 } 223 break; 224 225 case s_http_host_port: 226 case s_http_host_port_start: 227 if (mixin(IS_NUM("ch"))) { 228 return s_http_host_port; 229 } 230 break; 231 default: 232 break; 233 } 234 return HTTPParserHostState.s_http_host_dead; 235 } 236 237 bool parserHost(const ubyte[] data, ref ParserdUrl u, bool found_at) @nogc nothrow { 238 HTTPParserHostState s; 239 240 size_t p; 241 size_t buflen = u.fieldData[URLFieldsType.UF_HOST].off + u.fieldData[URLFieldsType.UF_HOST].len; 242 243 assert(u.fieldSet & (1 << URLFieldsType.UF_HOST)); 244 245 u.fieldData[URLFieldsType.UF_HOST].len = 0; 246 247 s = found_at ? HTTPParserHostState.s_http_userinfo_start : HTTPParserHostState.s_http_host_start; 248 249 for (p = u.fieldData[URLFieldsType.UF_HOST].off; p < buflen; p++) with (HTTPParserHostState){ 250 const ubyte ch = data[p]; 251 const HTTPParserHostState new_s = parserHostChar(s, ch); 252 253 if (new_s == s_http_host_dead) { 254 return false; 255 } 256 257 switch(new_s) with(URLFieldsType){ 258 case s_http_host: 259 if (s != s_http_host) { 260 u.fieldData[UF_HOST].off = cast(ushort)p; 261 } 262 u.fieldData[UF_HOST].len++; 263 break; 264 265 case s_http_host_v6: 266 if (s != s_http_host_v6) { 267 u.fieldData[UF_HOST].off = cast(ushort)p; 268 } 269 u.fieldData[UF_HOST].len++; 270 break; 271 272 case s_http_host_v6_zone_start: 273 case s_http_host_v6_zone: 274 u.fieldData[UF_HOST].len++; 275 break; 276 277 case s_http_host_port: 278 if (s != s_http_host_port) { 279 u.fieldData[UF_PORT].off = cast(ushort)p; 280 u.fieldData[UF_PORT].len = 0; 281 u.fieldSet |= (1 << UF_PORT); 282 } 283 u.fieldData[UF_PORT].len++; 284 break; 285 286 case s_http_userinfo: 287 if (s != s_http_userinfo) { 288 u.fieldData[UF_USERINFO].off = cast(ushort)p; 289 u.fieldData[UF_USERINFO].len = 0; 290 u.fieldSet |= (1 << UF_USERINFO); 291 } 292 u.fieldData[UF_USERINFO].len++; 293 break; 294 295 default: 296 break; 297 } 298 s = new_s; 299 } 300 301 /* Make sure we don't end somewhere unexpected */ 302 switch (s) with (HTTPParserHostState){ 303 case s_http_host_start: 304 case s_http_host_v6_start: 305 case s_http_host_v6: 306 case s_http_host_v6_zone_start: 307 case s_http_host_v6_zone: 308 case s_http_host_port_start: 309 case s_http_userinfo: 310 case s_http_userinfo_start: 311 return false; 312 default: 313 break; 314 } 315 316 return true; 317 } 318 319 HTTPParserState parseURLchar(HTTPParserState s, ubyte ch) @nogc nothrow 320 { 321 if (ch == ' ' || ch == '\r' || ch == '\n') 322 return HTTPParserState.s_dead; 323 324 version (HTTP_PARSER_STRICT) 325 { 326 if (ch == '\t' || ch == '\f') 327 return s_dead; 328 } 329 330 switch (s) with (HTTPParserState) 331 { 332 case s_req_spaces_before_url: 333 /* Proxied requests are followed by scheme of an absolute URI (alpha). 334 * All methods except CONNECT are followed by '/' or '*'. 335 */ 336 if (ch == '/' || ch == '*') 337 return s_req_path; 338 339 if (mixin(IS_ALPHA("ch"))) 340 return s_req_schema; 341 break; 342 343 case s_req_schema: 344 if (mixin(IS_ALPHA("ch"))) 345 return s; 346 347 if (ch == ':') 348 return s_req_schema_slash; 349 break; 350 351 case s_req_schema_slash: 352 if (ch == '/') 353 return s_req_schema_slash_slash; 354 break; 355 356 case s_req_schema_slash_slash: 357 if (ch == '/') 358 return s_req_server_start; 359 break; 360 361 case s_req_server_with_at: 362 if (ch == '@') 363 { 364 return s_dead; 365 } 366 goto case; 367 /* FALLTHROUGH */ 368 case s_req_server_start: 369 case s_req_server: 370 { 371 if (ch == '/') 372 return s_req_path; 373 374 if (ch == '?') 375 return s_req_query_string_start; 376 377 if (ch == '@') 378 return s_req_server_with_at; 379 380 if (IS_USERINFO_CHAR2(ch) || ch == '[' || ch == ']') 381 return s_req_server; 382 } 383 break; 384 385 case s_req_path: 386 { 387 if (mixin(IS_URL_CHAR("ch"))) 388 return s; 389 390 switch (ch) 391 { 392 case '?': 393 return s_req_query_string_start; 394 395 case '#': 396 return s_req_fragment_start; 397 default: 398 break; 399 } 400 } 401 break; 402 case s_req_query_string_start: 403 case s_req_query_string: 404 { 405 if (mixin(IS_URL_CHAR("ch"))) 406 { 407 return s_req_query_string; 408 } 409 410 switch (ch) 411 { 412 case '?': 413 /* allow extra '?' in query string */ 414 return s_req_query_string; 415 416 case '#': 417 return s_req_fragment_start; 418 default: 419 break; 420 } 421 break; 422 } 423 424 case s_req_fragment_start: 425 { 426 if (mixin(IS_URL_CHAR("ch"))) 427 return s_req_fragment; 428 switch (ch) 429 { 430 case '?': 431 return s_req_fragment; 432 433 case '#': 434 return s; 435 default: 436 break; 437 } 438 } 439 break; 440 case s_req_fragment: 441 { 442 if (mixin(IS_URL_CHAR("ch"))) 443 return s; 444 switch (ch) 445 { 446 case '?': 447 case '#': 448 return s; 449 default: 450 break; 451 } 452 } 453 break; 454 default: 455 break; 456 } 457 /* We should never fall out of the switch above unless there's an error */ 458 return HTTPParserState.s_dead; 459 } 460 461 pragma(inline, true) bool IS_HEX(ubyte c) nothrow @nogc 462 { 463 bool sum = mixin(IS_NUM("c")); 464 c = c | 0x20; 465 return (sum || (c >= 'a' && c <= 'f')); 466 } 467 468 pragma(inline, true) bool IS_HOST_CHAR(ubyte c) nothrow @nogc { 469 return (IS_ALPHANUM(c) || (c) == '.' || (c) == '-'); 470 } 471 472 pragma(inline, true) bool IS_ALPHANUM(ubyte c) nothrow @nogc { 473 bool alpha = mixin(IS_ALPHA("c")); 474 bool sum = mixin(IS_NUM("c")); 475 return (sum || alpha); 476 } 477 478 pragma(inline, true) bool IS_USERINFO_CHAR2(ubyte c) nothrow @nogc 479 { 480 bool b1 = (c == '%' || c == ';' || c == ':' || c == '&' || c == '=' 481 || c == '+' || c == '$' || c == ','); 482 bool b2 = (c == '-' || '_' == c || '.' == c || '!' == c || '~' == c || '*' == c 483 || '\'' == c || '(' == c || ')' == c); 484 return (b2 || b1 || IS_ALPHANUM(c)); 485 } 486 487 pragma(inline, true) 488 void STRICT_CHECK(bool istrue) 489 { 490 if(istrue) 491 throw new Http1xParserExcetion(HTTPParserErrno.HPE_STRICT); 492 } 493 // string IS_MARK(string c) { return "(" ~ c ~ " == '-' || " ~ c ~ " == '_' || "~ c ~ " == '.' || " ~ c ~ " == '!' || " ~ c ~ " == '~' || " ~ c ~ " == '*' || " ~ c ~ " == '\'' || " ~ c ~ " == '(' || " ~ c ~ " == ')')";} 494 string IS_NUM(string c) 495 { 496 return "(" ~ c ~ " >= '0' && " ~ c ~ " <= '9')"; 497 } 498 499 string IS_ALPHA(string c) 500 { 501 return "((" ~ c ~ "| 0x20) >= 'a' && (" ~ c ~ " | 0x20) <= 'z')"; 502 } 503 504 string IS_URL_CHAR(string c) 505 { 506 return "(!!(cast(uint) (normal_url_char[cast(uint) (" ~ c 507 ~ ") >> 3] ) & 508 (1 << (cast(uint)" ~ c ~ " & 7))))"; 509 } 510 511 enum NEW_MESSAGE = "httpShouldKeepAlive() ? (type == HTTPType.REQUEST ? HTTPParserState.s_start_req : HTTPParserState.s_start_res) : HTTPParserState.s_dead"; 512 string CALLBACK_NOTIFY(string code) 513 { 514 string _s = " {if (_on" ~ code ~ " !is null){ 515 _on" ~ code ~ "(this); 516 if(!handleIng) 517 throw new Http1xParserStopExcetion(HTTPParserErrno.HPE_CB_" ~ code ~ ", p + 1); 518 } }"; 519 return _s; 520 } 521 522 string CALLBACK_NOTIFY_NOADVANCE(string code) 523 { 524 string _s = " {if (_on" ~ code ~ " != null){ 525 _on" ~ code ~ "(this); 526 if(!handleIng) 527 throw new Http1xParserStopExcetion(HTTPParserErrno.HPE_CB_" ~ code ~ ", p); 528 }}"; 529 return _s; 530 } 531 532 string CALLBACK_DATA(string code) 533 { 534 string _s = "{ if( m" ~ code ~ "Mark != size_t.max && _on" ~ code 535 ~ " !is null){ 536 ulong len = (p - m" ~ code ~ "Mark) ; 537 538 if(len > 0) { 539 /* writeln(\"CALLBACK_DATA at \",__LINE__, \" " ~ code ~ "\");*/ 540 ubyte[] _data = data[m" ~ code ~ "Mark..p]; 541 _on" 542 ~ code ~ "(this,_data,true); 543 if(!handleIng) 544 throw new Http1xParserStopExcetion(HTTPParserErrno.HPE_CB_" 545 ~ code ~ ", p + 1); 546 } } m" ~ code ~ "Mark = size_t.max;}"; 547 return _s; 548 } 549 550 string CALLBACK_DATA_NOADVANCE(string code) 551 { 552 string _s = "{ if(m" ~ code ~ "Mark != size_t.max && _on" ~ code ~ " !is null){ 553 ulong len = (p - m" ~ code ~ "Mark) ; 554 if(len > 0) { 555 /*writeln(\"CALLBACK_DATA_NOADVANCE at \",__LINE__, \" " ~ code ~ "\");*/ 556 ubyte[] _data = data[m" ~ code 557 ~ "Mark..p]; 558 _on" ~ code ~ "(this,_data,false); 559 if(!handleIng) 560 throw new Http1xParserStopExcetion(HTTPParserErrno.HPE_CB_" ~ code ~ ", p); 561 }}m" ~ code 562 ~ "Mark = size_t.max;}"; 563 return _s; 564 } 565 566 @nogc nothrow unittest{ 567 string testUrl1 = "http://aa:werwer@www.hostname.co:8086/test?a=b#dadsas"; 568 ParserdUrl url; 569 assert(httpParserURL(testUrl1,url)); 570 assert(url.hasField(URLFieldsType.UF_SCHEMA)); 571 assert(url.hasField(URLFieldsType.UF_HOST)); 572 string host = url.getField(testUrl1,URLFieldsType.UF_HOST); 573 assert(host == "www.hostname.co"); 574 assert(url.port == 8086); 575 assert(url.hasField(URLFieldsType.UF_FRAGMENT)); 576 string str = url.getField(testUrl1,URLFieldsType.UF_FRAGMENT); 577 assert(str == "dadsas"); 578 str = url.getField(testUrl1,URLFieldsType.UF_QUERY); 579 assert(str == "a=b" ); 580 str = url.getField(testUrl1,URLFieldsType.UF_USERINFO); 581 assert(str == "aa:werwer" ); 582 }