qemu/util/uri.c
<<
>>
Prefs
   1/**
   2 * uri.c: set of generic URI related routines
   3 *
   4 * Reference: RFCs 3986, 2732 and 2373
   5 *
   6 * Copyright (C) 1998-2003 Daniel Veillard.  All Rights Reserved.
   7 *
   8 * Permission is hereby granted, free of charge, to any person obtaining a copy
   9 * of this software and associated documentation files (the "Software"), to deal
  10 * in the Software without restriction, including without limitation the rights
  11 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  12 * copies of the Software, and to permit persons to whom the Software is
  13 * furnished to do so, subject to the following conditions:
  14 *
  15 * The above copyright notice and this permission notice shall be included in
  16 * all copies or substantial portions of the Software.
  17 *
  18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  19 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  20 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
  21 * DANIEL VEILLARD BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
  22 * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  23 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  24 *
  25 * Except as contained in this notice, the name of Daniel Veillard shall not
  26 * be used in advertising or otherwise to promote the sale, use or other
  27 * dealings in this Software without prior written authorization from him.
  28 *
  29 * daniel@veillard.com
  30 *
  31 **
  32 *
  33 * Copyright (C) 2007, 2009-2010 Red Hat, Inc.
  34 *
  35 * This library is free software; you can redistribute it and/or
  36 * modify it under the terms of the GNU Lesser General Public
  37 * License as published by the Free Software Foundation; either
  38 * version 2.1 of the License, or (at your option) any later version.
  39 *
  40 * This library is distributed in the hope that it will be useful,
  41 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  42 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  43 * Lesser General Public License for more details.
  44 *
  45 * You should have received a copy of the GNU Lesser General Public
  46 * License along with this library; if not, write to the Free Software
  47 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307  USA
  48 *
  49 * Authors:
  50 *    Richard W.M. Jones <rjones@redhat.com>
  51 *
  52 */
  53
  54#include "qemu/osdep.h"
  55#include <glib.h>
  56
  57#include "qemu/uri.h"
  58
  59static void uri_clean(URI *uri);
  60
  61/*
  62 * Old rule from 2396 used in legacy handling code
  63 * alpha    = lowalpha | upalpha
  64 */
  65#define IS_ALPHA(x) (IS_LOWALPHA(x) || IS_UPALPHA(x))
  66
  67
  68/*
  69 * lowalpha = "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" | "j" |
  70 *            "k" | "l" | "m" | "n" | "o" | "p" | "q" | "r" | "s" | "t" |
  71 *            "u" | "v" | "w" | "x" | "y" | "z"
  72 */
  73
  74#define IS_LOWALPHA(x) (((x) >= 'a') && ((x) <= 'z'))
  75
  76/*
  77 * upalpha = "A" | "B" | "C" | "D" | "E" | "F" | "G" | "H" | "I" | "J" |
  78 *           "K" | "L" | "M" | "N" | "O" | "P" | "Q" | "R" | "S" | "T" |
  79 *           "U" | "V" | "W" | "X" | "Y" | "Z"
  80 */
  81#define IS_UPALPHA(x) (((x) >= 'A') && ((x) <= 'Z'))
  82
  83#ifdef IS_DIGIT
  84#undef IS_DIGIT
  85#endif
  86/*
  87 * digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9"
  88 */
  89#define IS_DIGIT(x) (((x) >= '0') && ((x) <= '9'))
  90
  91/*
  92 * alphanum = alpha | digit
  93 */
  94
  95#define IS_ALPHANUM(x) (IS_ALPHA(x) || IS_DIGIT(x))
  96
  97/*
  98 * mark = "-" | "_" | "." | "!" | "~" | "*" | "'" | "(" | ")"
  99 */
 100
 101#define IS_MARK(x) (((x) == '-') || ((x) == '_') || ((x) == '.') ||     \
 102    ((x) == '!') || ((x) == '~') || ((x) == '*') || ((x) == '\'') ||    \
 103    ((x) == '(') || ((x) == ')'))
 104
 105/*
 106 * unwise = "{" | "}" | "|" | "\" | "^" | "`"
 107 */
 108
 109#define IS_UNWISE(p)                                                    \
 110      (((*(p) == '{')) || ((*(p) == '}')) || ((*(p) == '|')) ||         \
 111       ((*(p) == '\\')) || ((*(p) == '^')) || ((*(p) == '[')) ||        \
 112       ((*(p) == ']')) || ((*(p) == '`')))
 113/*
 114 * reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" | "$" | "," |
 115 *            "[" | "]"
 116 */
 117
 118#define IS_RESERVED(x) (((x) == ';') || ((x) == '/') || ((x) == '?') || \
 119        ((x) == ':') || ((x) == '@') || ((x) == '&') || ((x) == '=') || \
 120        ((x) == '+') || ((x) == '$') || ((x) == ',') || ((x) == '[') || \
 121        ((x) == ']'))
 122
 123/*
 124 * unreserved = alphanum | mark
 125 */
 126
 127#define IS_UNRESERVED(x) (IS_ALPHANUM(x) || IS_MARK(x))
 128
 129/*
 130 * Skip to next pointer char, handle escaped sequences
 131 */
 132
 133#define NEXT(p) ((*p == '%')? p += 3 : p++)
 134
 135/*
 136 * Productions from the spec.
 137 *
 138 *    authority     = server | reg_name
 139 *    reg_name      = 1*( unreserved | escaped | "$" | "," |
 140 *                        ";" | ":" | "@" | "&" | "=" | "+" )
 141 *
 142 * path          = [ abs_path | opaque_part ]
 143 */
 144
 145
 146/************************************************************************
 147 *                                                                      *
 148 *                         RFC 3986 parser                              *
 149 *                                                                      *
 150 ************************************************************************/
 151
 152#define ISA_DIGIT(p) ((*(p) >= '0') && (*(p) <= '9'))
 153#define ISA_ALPHA(p) (((*(p) >= 'a') && (*(p) <= 'z')) ||               \
 154                      ((*(p) >= 'A') && (*(p) <= 'Z')))
 155#define ISA_HEXDIG(p)                                                   \
 156       (ISA_DIGIT(p) || ((*(p) >= 'a') && (*(p) <= 'f')) ||             \
 157        ((*(p) >= 'A') && (*(p) <= 'F')))
 158
 159/*
 160 *    sub-delims    = "!" / "$" / "&" / "'" / "(" / ")"
 161 *                     / "*" / "+" / "," / ";" / "="
 162 */
 163#define ISA_SUB_DELIM(p)                                                \
 164      (((*(p) == '!')) || ((*(p) == '$')) || ((*(p) == '&')) ||         \
 165       ((*(p) == '(')) || ((*(p) == ')')) || ((*(p) == '*')) ||         \
 166       ((*(p) == '+')) || ((*(p) == ',')) || ((*(p) == ';')) ||         \
 167       ((*(p) == '=')) || ((*(p) == '\'')))
 168
 169/*
 170 *    gen-delims    = ":" / "/" / "?" / "#" / "[" / "]" / "@"
 171 */
 172#define ISA_GEN_DELIM(p)                                                \
 173      (((*(p) == ':')) || ((*(p) == '/')) || ((*(p) == '?')) ||         \
 174       ((*(p) == '#')) || ((*(p) == '[')) || ((*(p) == ']')) ||         \
 175       ((*(p) == '@')))
 176
 177/*
 178 *    reserved      = gen-delims / sub-delims
 179 */
 180#define ISA_RESERVED(p) (ISA_GEN_DELIM(p) || (ISA_SUB_DELIM(p)))
 181
 182/*
 183 *    unreserved    = ALPHA / DIGIT / "-" / "." / "_" / "~"
 184 */
 185#define ISA_UNRESERVED(p)                                               \
 186      ((ISA_ALPHA(p)) || (ISA_DIGIT(p)) || ((*(p) == '-')) ||           \
 187       ((*(p) == '.')) || ((*(p) == '_')) || ((*(p) == '~')))
 188
 189/*
 190 *    pct-encoded   = "%" HEXDIG HEXDIG
 191 */
 192#define ISA_PCT_ENCODED(p)                                              \
 193     ((*(p) == '%') && (ISA_HEXDIG(p + 1)) && (ISA_HEXDIG(p + 2)))
 194
 195/*
 196 *    pchar         = unreserved / pct-encoded / sub-delims / ":" / "@"
 197 */
 198#define ISA_PCHAR(p)                                                    \
 199     (ISA_UNRESERVED(p) || ISA_PCT_ENCODED(p) || ISA_SUB_DELIM(p) ||    \
 200      ((*(p) == ':')) || ((*(p) == '@')))
 201
 202/**
 203 * rfc3986_parse_scheme:
 204 * @uri:  pointer to an URI structure
 205 * @str:  pointer to the string to analyze
 206 *
 207 * Parse an URI scheme
 208 *
 209 * ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
 210 *
 211 * Returns 0 or the error code
 212 */
 213static int
 214rfc3986_parse_scheme(URI *uri, const char **str) {
 215    const char *cur;
 216
 217    if (str == NULL)
 218        return(-1);
 219
 220    cur = *str;
 221    if (!ISA_ALPHA(cur))
 222        return(2);
 223    cur++;
 224    while (ISA_ALPHA(cur) || ISA_DIGIT(cur) ||
 225           (*cur == '+') || (*cur == '-') || (*cur == '.')) cur++;
 226    if (uri != NULL) {
 227        g_free(uri->scheme);
 228        uri->scheme = g_strndup(*str, cur - *str);
 229    }
 230    *str = cur;
 231    return(0);
 232}
 233
 234/**
 235 * rfc3986_parse_fragment:
 236 * @uri:  pointer to an URI structure
 237 * @str:  pointer to the string to analyze
 238 *
 239 * Parse the query part of an URI
 240 *
 241 * fragment      = *( pchar / "/" / "?" )
 242 * NOTE: the strict syntax as defined by 3986 does not allow '[' and ']'
 243 *       in the fragment identifier but this is used very broadly for
 244 *       xpointer scheme selection, so we are allowing it here to not break
 245 *       for example all the DocBook processing chains.
 246 *
 247 * Returns 0 or the error code
 248 */
 249static int
 250rfc3986_parse_fragment(URI *uri, const char **str)
 251{
 252    const char *cur;
 253
 254    if (str == NULL)
 255        return (-1);
 256
 257    cur = *str;
 258
 259    while ((ISA_PCHAR(cur)) || (*cur == '/') || (*cur == '?') ||
 260           (*cur == '[') || (*cur == ']') ||
 261           ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))
 262        NEXT(cur);
 263    if (uri != NULL) {
 264        g_free(uri->fragment);
 265        if (uri->cleanup & 2)
 266            uri->fragment = g_strndup(*str, cur - *str);
 267        else
 268            uri->fragment = uri_string_unescape(*str, cur - *str, NULL);
 269    }
 270    *str = cur;
 271    return (0);
 272}
 273
 274/**
 275 * rfc3986_parse_query:
 276 * @uri:  pointer to an URI structure
 277 * @str:  pointer to the string to analyze
 278 *
 279 * Parse the query part of an URI
 280 *
 281 * query = *uric
 282 *
 283 * Returns 0 or the error code
 284 */
 285static int
 286rfc3986_parse_query(URI *uri, const char **str)
 287{
 288    const char *cur;
 289
 290    if (str == NULL)
 291        return (-1);
 292
 293    cur = *str;
 294
 295    while ((ISA_PCHAR(cur)) || (*cur == '/') || (*cur == '?') ||
 296           ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))
 297        NEXT(cur);
 298    if (uri != NULL) {
 299        g_free(uri->query);
 300        uri->query = g_strndup (*str, cur - *str);
 301    }
 302    *str = cur;
 303    return (0);
 304}
 305
 306/**
 307 * rfc3986_parse_port:
 308 * @uri:  pointer to an URI structure
 309 * @str:  the string to analyze
 310 *
 311 * Parse a port  part and fills in the appropriate fields
 312 * of the @uri structure
 313 *
 314 * port          = *DIGIT
 315 *
 316 * Returns 0 or the error code
 317 */
 318static int
 319rfc3986_parse_port(URI *uri, const char **str)
 320{
 321    const char *cur = *str;
 322    int port = 0;
 323
 324    if (ISA_DIGIT(cur)) {
 325        while (ISA_DIGIT(cur)) {
 326            port = port * 10 + (*cur - '0');
 327            if (port > 65535) {
 328                return 1;
 329            }
 330            cur++;
 331        }
 332        if (uri) {
 333            uri->port = port;
 334        }
 335        *str = cur;
 336        return 0;
 337    }
 338    return 1;
 339}
 340
 341/**
 342 * rfc3986_parse_user_info:
 343 * @uri:  pointer to an URI structure
 344 * @str:  the string to analyze
 345 *
 346 * Parse an user informations part and fills in the appropriate fields
 347 * of the @uri structure
 348 *
 349 * userinfo      = *( unreserved / pct-encoded / sub-delims / ":" )
 350 *
 351 * Returns 0 or the error code
 352 */
 353static int
 354rfc3986_parse_user_info(URI *uri, const char **str)
 355{
 356    const char *cur;
 357
 358    cur = *str;
 359    while (ISA_UNRESERVED(cur) || ISA_PCT_ENCODED(cur) ||
 360           ISA_SUB_DELIM(cur) || (*cur == ':'))
 361        NEXT(cur);
 362    if (*cur == '@') {
 363        if (uri != NULL) {
 364            g_free(uri->user);
 365            if (uri->cleanup & 2)
 366                uri->user = g_strndup(*str, cur - *str);
 367            else
 368                uri->user = uri_string_unescape(*str, cur - *str, NULL);
 369        }
 370        *str = cur;
 371        return(0);
 372    }
 373    return(1);
 374}
 375
 376/**
 377 * rfc3986_parse_dec_octet:
 378 * @str:  the string to analyze
 379 *
 380 *    dec-octet     = DIGIT                 ; 0-9
 381 *                  / %x31-39 DIGIT         ; 10-99
 382 *                  / "1" 2DIGIT            ; 100-199
 383 *                  / "2" %x30-34 DIGIT     ; 200-249
 384 *                  / "25" %x30-35          ; 250-255
 385 *
 386 * Skip a dec-octet.
 387 *
 388 * Returns 0 if found and skipped, 1 otherwise
 389 */
 390static int
 391rfc3986_parse_dec_octet(const char **str) {
 392    const char *cur = *str;
 393
 394    if (!(ISA_DIGIT(cur)))
 395        return(1);
 396    if (!ISA_DIGIT(cur+1))
 397        cur++;
 398    else if ((*cur != '0') && (ISA_DIGIT(cur + 1)) && (!ISA_DIGIT(cur+2)))
 399        cur += 2;
 400    else if ((*cur == '1') && (ISA_DIGIT(cur + 1)) && (ISA_DIGIT(cur + 2)))
 401        cur += 3;
 402    else if ((*cur == '2') && (*(cur + 1) >= '0') &&
 403             (*(cur + 1) <= '4') && (ISA_DIGIT(cur + 2)))
 404        cur += 3;
 405    else if ((*cur == '2') && (*(cur + 1) == '5') &&
 406             (*(cur + 2) >= '0') && (*(cur + 1) <= '5'))
 407        cur += 3;
 408    else
 409        return(1);
 410    *str = cur;
 411    return(0);
 412}
 413/**
 414 * rfc3986_parse_host:
 415 * @uri:  pointer to an URI structure
 416 * @str:  the string to analyze
 417 *
 418 * Parse an host part and fills in the appropriate fields
 419 * of the @uri structure
 420 *
 421 * host          = IP-literal / IPv4address / reg-name
 422 * IP-literal    = "[" ( IPv6address / IPvFuture  ) "]"
 423 * IPv4address   = dec-octet "." dec-octet "." dec-octet "." dec-octet
 424 * reg-name      = *( unreserved / pct-encoded / sub-delims )
 425 *
 426 * Returns 0 or the error code
 427 */
 428static int
 429rfc3986_parse_host(URI *uri, const char **str)
 430{
 431    const char *cur = *str;
 432    const char *host;
 433
 434    host = cur;
 435    /*
 436     * IPv6 and future addressing scheme are enclosed between brackets
 437     */
 438    if (*cur == '[') {
 439        cur++;
 440        while ((*cur != ']') && (*cur != 0))
 441            cur++;
 442        if (*cur != ']')
 443            return(1);
 444        cur++;
 445        goto found;
 446    }
 447    /*
 448     * try to parse an IPv4
 449     */
 450    if (ISA_DIGIT(cur)) {
 451        if (rfc3986_parse_dec_octet(&cur) != 0)
 452            goto not_ipv4;
 453        if (*cur != '.')
 454            goto not_ipv4;
 455        cur++;
 456        if (rfc3986_parse_dec_octet(&cur) != 0)
 457            goto not_ipv4;
 458        if (*cur != '.')
 459            goto not_ipv4;
 460        if (rfc3986_parse_dec_octet(&cur) != 0)
 461            goto not_ipv4;
 462        if (*cur != '.')
 463            goto not_ipv4;
 464        if (rfc3986_parse_dec_octet(&cur) != 0)
 465            goto not_ipv4;
 466        goto found;
 467not_ipv4:
 468        cur = *str;
 469    }
 470    /*
 471     * then this should be a hostname which can be empty
 472     */
 473    while (ISA_UNRESERVED(cur) || ISA_PCT_ENCODED(cur) || ISA_SUB_DELIM(cur))
 474        NEXT(cur);
 475found:
 476    if (uri != NULL) {
 477        g_free(uri->authority);
 478        uri->authority = NULL;
 479        g_free(uri->server);
 480        if (cur != host) {
 481            if (uri->cleanup & 2)
 482                uri->server = g_strndup(host, cur - host);
 483            else
 484                uri->server = uri_string_unescape(host, cur - host, NULL);
 485        } else
 486            uri->server = NULL;
 487    }
 488    *str = cur;
 489    return(0);
 490}
 491
 492/**
 493 * rfc3986_parse_authority:
 494 * @uri:  pointer to an URI structure
 495 * @str:  the string to analyze
 496 *
 497 * Parse an authority part and fills in the appropriate fields
 498 * of the @uri structure
 499 *
 500 * authority     = [ userinfo "@" ] host [ ":" port ]
 501 *
 502 * Returns 0 or the error code
 503 */
 504static int
 505rfc3986_parse_authority(URI *uri, const char **str)
 506{
 507    const char *cur;
 508    int ret;
 509
 510    cur = *str;
 511    /*
 512     * try to parse an userinfo and check for the trailing @
 513     */
 514    ret = rfc3986_parse_user_info(uri, &cur);
 515    if ((ret != 0) || (*cur != '@'))
 516        cur = *str;
 517    else
 518        cur++;
 519    ret = rfc3986_parse_host(uri, &cur);
 520    if (ret != 0) return(ret);
 521    if (*cur == ':') {
 522        cur++;
 523        ret = rfc3986_parse_port(uri, &cur);
 524        if (ret != 0) return(ret);
 525    }
 526    *str = cur;
 527    return(0);
 528}
 529
 530/**
 531 * rfc3986_parse_segment:
 532 * @str:  the string to analyze
 533 * @forbid: an optional forbidden character
 534 * @empty: allow an empty segment
 535 *
 536 * Parse a segment and fills in the appropriate fields
 537 * of the @uri structure
 538 *
 539 * segment       = *pchar
 540 * segment-nz    = 1*pchar
 541 * segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" )
 542 *               ; non-zero-length segment without any colon ":"
 543 *
 544 * Returns 0 or the error code
 545 */
 546static int
 547rfc3986_parse_segment(const char **str, char forbid, int empty)
 548{
 549    const char *cur;
 550
 551    cur = *str;
 552    if (!ISA_PCHAR(cur)) {
 553        if (empty)
 554            return(0);
 555        return(1);
 556    }
 557    while (ISA_PCHAR(cur) && (*cur != forbid))
 558        NEXT(cur);
 559    *str = cur;
 560    return (0);
 561}
 562
 563/**
 564 * rfc3986_parse_path_ab_empty:
 565 * @uri:  pointer to an URI structure
 566 * @str:  the string to analyze
 567 *
 568 * Parse an path absolute or empty and fills in the appropriate fields
 569 * of the @uri structure
 570 *
 571 * path-abempty  = *( "/" segment )
 572 *
 573 * Returns 0 or the error code
 574 */
 575static int
 576rfc3986_parse_path_ab_empty(URI *uri, const char **str)
 577{
 578    const char *cur;
 579    int ret;
 580
 581    cur = *str;
 582
 583    while (*cur == '/') {
 584        cur++;
 585        ret = rfc3986_parse_segment(&cur, 0, 1);
 586        if (ret != 0) return(ret);
 587    }
 588    if (uri != NULL) {
 589        g_free(uri->path);
 590        if (*str != cur) {
 591            if (uri->cleanup & 2)
 592                uri->path = g_strndup(*str, cur - *str);
 593            else
 594                uri->path = uri_string_unescape(*str, cur - *str, NULL);
 595        } else {
 596            uri->path = NULL;
 597        }
 598    }
 599    *str = cur;
 600    return (0);
 601}
 602
 603/**
 604 * rfc3986_parse_path_absolute:
 605 * @uri:  pointer to an URI structure
 606 * @str:  the string to analyze
 607 *
 608 * Parse an path absolute and fills in the appropriate fields
 609 * of the @uri structure
 610 *
 611 * path-absolute = "/" [ segment-nz *( "/" segment ) ]
 612 *
 613 * Returns 0 or the error code
 614 */
 615static int
 616rfc3986_parse_path_absolute(URI *uri, const char **str)
 617{
 618    const char *cur;
 619    int ret;
 620
 621    cur = *str;
 622
 623    if (*cur != '/')
 624        return(1);
 625    cur++;
 626    ret = rfc3986_parse_segment(&cur, 0, 0);
 627    if (ret == 0) {
 628        while (*cur == '/') {
 629            cur++;
 630            ret = rfc3986_parse_segment(&cur, 0, 1);
 631            if (ret != 0) return(ret);
 632        }
 633    }
 634    if (uri != NULL) {
 635        g_free(uri->path);
 636        if (cur != *str) {
 637            if (uri->cleanup & 2)
 638                uri->path = g_strndup(*str, cur - *str);
 639            else
 640                uri->path = uri_string_unescape(*str, cur - *str, NULL);
 641        } else {
 642            uri->path = NULL;
 643        }
 644    }
 645    *str = cur;
 646    return (0);
 647}
 648
 649/**
 650 * rfc3986_parse_path_rootless:
 651 * @uri:  pointer to an URI structure
 652 * @str:  the string to analyze
 653 *
 654 * Parse an path without root and fills in the appropriate fields
 655 * of the @uri structure
 656 *
 657 * path-rootless = segment-nz *( "/" segment )
 658 *
 659 * Returns 0 or the error code
 660 */
 661static int
 662rfc3986_parse_path_rootless(URI *uri, const char **str)
 663{
 664    const char *cur;
 665    int ret;
 666
 667    cur = *str;
 668
 669    ret = rfc3986_parse_segment(&cur, 0, 0);
 670    if (ret != 0) return(ret);
 671    while (*cur == '/') {
 672        cur++;
 673        ret = rfc3986_parse_segment(&cur, 0, 1);
 674        if (ret != 0) return(ret);
 675    }
 676    if (uri != NULL) {
 677        g_free(uri->path);
 678        if (cur != *str) {
 679            if (uri->cleanup & 2)
 680                uri->path = g_strndup(*str, cur - *str);
 681            else
 682                uri->path = uri_string_unescape(*str, cur - *str, NULL);
 683        } else {
 684            uri->path = NULL;
 685        }
 686    }
 687    *str = cur;
 688    return (0);
 689}
 690
 691/**
 692 * rfc3986_parse_path_no_scheme:
 693 * @uri:  pointer to an URI structure
 694 * @str:  the string to analyze
 695 *
 696 * Parse an path which is not a scheme and fills in the appropriate fields
 697 * of the @uri structure
 698 *
 699 * path-noscheme = segment-nz-nc *( "/" segment )
 700 *
 701 * Returns 0 or the error code
 702 */
 703static int
 704rfc3986_parse_path_no_scheme(URI *uri, const char **str)
 705{
 706    const char *cur;
 707    int ret;
 708
 709    cur = *str;
 710
 711    ret = rfc3986_parse_segment(&cur, ':', 0);
 712    if (ret != 0) return(ret);
 713    while (*cur == '/') {
 714        cur++;
 715        ret = rfc3986_parse_segment(&cur, 0, 1);
 716        if (ret != 0) return(ret);
 717    }
 718    if (uri != NULL) {
 719        g_free(uri->path);
 720        if (cur != *str) {
 721            if (uri->cleanup & 2)
 722                uri->path = g_strndup(*str, cur - *str);
 723            else
 724                uri->path = uri_string_unescape(*str, cur - *str, NULL);
 725        } else {
 726            uri->path = NULL;
 727        }
 728    }
 729    *str = cur;
 730    return (0);
 731}
 732
 733/**
 734 * rfc3986_parse_hier_part:
 735 * @uri:  pointer to an URI structure
 736 * @str:  the string to analyze
 737 *
 738 * Parse an hierarchical part and fills in the appropriate fields
 739 * of the @uri structure
 740 *
 741 * hier-part     = "//" authority path-abempty
 742 *                / path-absolute
 743 *                / path-rootless
 744 *                / path-empty
 745 *
 746 * Returns 0 or the error code
 747 */
 748static int
 749rfc3986_parse_hier_part(URI *uri, const char **str)
 750{
 751    const char *cur;
 752    int ret;
 753
 754    cur = *str;
 755
 756    if ((*cur == '/') && (*(cur + 1) == '/')) {
 757        cur += 2;
 758        ret = rfc3986_parse_authority(uri, &cur);
 759        if (ret != 0) return(ret);
 760        ret = rfc3986_parse_path_ab_empty(uri, &cur);
 761        if (ret != 0) return(ret);
 762        *str = cur;
 763        return(0);
 764    } else if (*cur == '/') {
 765        ret = rfc3986_parse_path_absolute(uri, &cur);
 766        if (ret != 0) return(ret);
 767    } else if (ISA_PCHAR(cur)) {
 768        ret = rfc3986_parse_path_rootless(uri, &cur);
 769        if (ret != 0) return(ret);
 770    } else {
 771        /* path-empty is effectively empty */
 772        if (uri != NULL) {
 773            g_free(uri->path);
 774            uri->path = NULL;
 775        }
 776    }
 777    *str = cur;
 778    return (0);
 779}
 780
 781/**
 782 * rfc3986_parse_relative_ref:
 783 * @uri:  pointer to an URI structure
 784 * @str:  the string to analyze
 785 *
 786 * Parse an URI string and fills in the appropriate fields
 787 * of the @uri structure
 788 *
 789 * relative-ref  = relative-part [ "?" query ] [ "#" fragment ]
 790 * relative-part = "//" authority path-abempty
 791 *               / path-absolute
 792 *               / path-noscheme
 793 *               / path-empty
 794 *
 795 * Returns 0 or the error code
 796 */
 797static int
 798rfc3986_parse_relative_ref(URI *uri, const char *str) {
 799    int ret;
 800
 801    if ((*str == '/') && (*(str + 1) == '/')) {
 802        str += 2;
 803        ret = rfc3986_parse_authority(uri, &str);
 804        if (ret != 0) return(ret);
 805        ret = rfc3986_parse_path_ab_empty(uri, &str);
 806        if (ret != 0) return(ret);
 807    } else if (*str == '/') {
 808        ret = rfc3986_parse_path_absolute(uri, &str);
 809        if (ret != 0) return(ret);
 810    } else if (ISA_PCHAR(str)) {
 811        ret = rfc3986_parse_path_no_scheme(uri, &str);
 812        if (ret != 0) return(ret);
 813    } else {
 814        /* path-empty is effectively empty */
 815        if (uri != NULL) {
 816            g_free(uri->path);
 817            uri->path = NULL;
 818        }
 819    }
 820
 821    if (*str == '?') {
 822        str++;
 823        ret = rfc3986_parse_query(uri, &str);
 824        if (ret != 0) return(ret);
 825    }
 826    if (*str == '#') {
 827        str++;
 828        ret = rfc3986_parse_fragment(uri, &str);
 829        if (ret != 0) return(ret);
 830    }
 831    if (*str != 0) {
 832        uri_clean(uri);
 833        return(1);
 834    }
 835    return(0);
 836}
 837
 838
 839/**
 840 * rfc3986_parse:
 841 * @uri:  pointer to an URI structure
 842 * @str:  the string to analyze
 843 *
 844 * Parse an URI string and fills in the appropriate fields
 845 * of the @uri structure
 846 *
 847 * scheme ":" hier-part [ "?" query ] [ "#" fragment ]
 848 *
 849 * Returns 0 or the error code
 850 */
 851static int
 852rfc3986_parse(URI *uri, const char *str) {
 853    int ret;
 854
 855    ret = rfc3986_parse_scheme(uri, &str);
 856    if (ret != 0) return(ret);
 857    if (*str != ':') {
 858        return(1);
 859    }
 860    str++;
 861    ret = rfc3986_parse_hier_part(uri, &str);
 862    if (ret != 0) return(ret);
 863    if (*str == '?') {
 864        str++;
 865        ret = rfc3986_parse_query(uri, &str);
 866        if (ret != 0) return(ret);
 867    }
 868    if (*str == '#') {
 869        str++;
 870        ret = rfc3986_parse_fragment(uri, &str);
 871        if (ret != 0) return(ret);
 872    }
 873    if (*str != 0) {
 874        uri_clean(uri);
 875        return(1);
 876    }
 877    return(0);
 878}
 879
 880/**
 881 * rfc3986_parse_uri_reference:
 882 * @uri:  pointer to an URI structure
 883 * @str:  the string to analyze
 884 *
 885 * Parse an URI reference string and fills in the appropriate fields
 886 * of the @uri structure
 887 *
 888 * URI-reference = URI / relative-ref
 889 *
 890 * Returns 0 or the error code
 891 */
 892static int
 893rfc3986_parse_uri_reference(URI *uri, const char *str) {
 894    int ret;
 895
 896    if (str == NULL)
 897        return(-1);
 898    uri_clean(uri);
 899
 900    /*
 901     * Try first to parse absolute refs, then fallback to relative if
 902     * it fails.
 903     */
 904    ret = rfc3986_parse(uri, str);
 905    if (ret != 0) {
 906        uri_clean(uri);
 907        ret = rfc3986_parse_relative_ref(uri, str);
 908        if (ret != 0) {
 909            uri_clean(uri);
 910            return(ret);
 911        }
 912    }
 913    return(0);
 914}
 915
 916/**
 917 * uri_parse:
 918 * @str:  the URI string to analyze
 919 *
 920 * Parse an URI based on RFC 3986
 921 *
 922 * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
 923 *
 924 * Returns a newly built URI or NULL in case of error
 925 */
 926URI *
 927uri_parse(const char *str) {
 928    URI *uri;
 929    int ret;
 930
 931    if (str == NULL)
 932        return(NULL);
 933    uri = uri_new();
 934    ret = rfc3986_parse_uri_reference(uri, str);
 935    if (ret) {
 936        uri_free(uri);
 937        return(NULL);
 938    }
 939    return(uri);
 940}
 941
 942/**
 943 * uri_parse_into:
 944 * @uri:  pointer to an URI structure
 945 * @str:  the string to analyze
 946 *
 947 * Parse an URI reference string based on RFC 3986 and fills in the
 948 * appropriate fields of the @uri structure
 949 *
 950 * URI-reference = URI / relative-ref
 951 *
 952 * Returns 0 or the error code
 953 */
 954int
 955uri_parse_into(URI *uri, const char *str) {
 956    return(rfc3986_parse_uri_reference(uri, str));
 957}
 958
 959/**
 960 * uri_parse_raw:
 961 * @str:  the URI string to analyze
 962 * @raw:  if 1 unescaping of URI pieces are disabled
 963 *
 964 * Parse an URI but allows to keep intact the original fragments.
 965 *
 966 * URI-reference = URI / relative-ref
 967 *
 968 * Returns a newly built URI or NULL in case of error
 969 */
 970URI *
 971uri_parse_raw(const char *str, int raw) {
 972    URI *uri;
 973    int ret;
 974
 975    if (str == NULL)
 976        return(NULL);
 977    uri = uri_new();
 978    if (raw) {
 979        uri->cleanup |= 2;
 980    }
 981    ret = uri_parse_into(uri, str);
 982    if (ret) {
 983        uri_free(uri);
 984        return(NULL);
 985    }
 986    return(uri);
 987}
 988
 989/************************************************************************
 990 *                                                                      *
 991 *                      Generic URI structure functions                 *
 992 *                                                                      *
 993 ************************************************************************/
 994
 995/**
 996 * uri_new:
 997 *
 998 * Simply creates an empty URI
 999 *
1000 * Returns the new structure or NULL in case of error
1001 */
1002URI *
1003uri_new(void) {
1004    URI *ret;
1005
1006    ret = g_new0(URI, 1);
1007    return(ret);
1008}
1009
1010/**
1011 * realloc2n:
1012 *
1013 * Function to handle properly a reallocation when saving an URI
1014 * Also imposes some limit on the length of an URI string output
1015 */
1016static char *
1017realloc2n(char *ret, int *max) {
1018    char *temp;
1019    int tmp;
1020
1021    tmp = *max * 2;
1022    temp = g_realloc(ret, (tmp + 1));
1023    *max = tmp;
1024    return(temp);
1025}
1026
1027/**
1028 * uri_to_string:
1029 * @uri:  pointer to an URI
1030 *
1031 * Save the URI as an escaped string
1032 *
1033 * Returns a new string (to be deallocated by caller)
1034 */
1035char *
1036uri_to_string(URI *uri) {
1037    char *ret = NULL;
1038    char *temp;
1039    const char *p;
1040    int len;
1041    int max;
1042
1043    if (uri == NULL) return(NULL);
1044
1045
1046    max = 80;
1047    ret = g_malloc(max + 1);
1048    len = 0;
1049
1050    if (uri->scheme != NULL) {
1051        p = uri->scheme;
1052        while (*p != 0) {
1053            if (len >= max) {
1054                temp = realloc2n(ret, &max);
1055                ret = temp;
1056            }
1057            ret[len++] = *p++;
1058        }
1059        if (len >= max) {
1060            temp = realloc2n(ret, &max);
1061            ret = temp;
1062        }
1063        ret[len++] = ':';
1064    }
1065    if (uri->opaque != NULL) {
1066        p = uri->opaque;
1067        while (*p != 0) {
1068            if (len + 3 >= max) {
1069                temp = realloc2n(ret, &max);
1070                ret = temp;
1071            }
1072            if (IS_RESERVED(*(p)) || IS_UNRESERVED(*(p)))
1073                ret[len++] = *p++;
1074            else {
1075                int val = *(unsigned char *)p++;
1076                int hi = val / 0x10, lo = val % 0x10;
1077                ret[len++] = '%';
1078                ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1079                ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1080            }
1081        }
1082    } else {
1083        if (uri->server != NULL) {
1084            if (len + 3 >= max) {
1085                temp = realloc2n(ret, &max);
1086                ret = temp;
1087            }
1088            ret[len++] = '/';
1089            ret[len++] = '/';
1090            if (uri->user != NULL) {
1091                p = uri->user;
1092                while (*p != 0) {
1093                    if (len + 3 >= max) {
1094                        temp = realloc2n(ret, &max);
1095                        ret = temp;
1096                    }
1097                    if ((IS_UNRESERVED(*(p))) ||
1098                        ((*(p) == ';')) || ((*(p) == ':')) ||
1099                        ((*(p) == '&')) || ((*(p) == '=')) ||
1100                        ((*(p) == '+')) || ((*(p) == '$')) ||
1101                        ((*(p) == ',')))
1102                        ret[len++] = *p++;
1103                    else {
1104                        int val = *(unsigned char *)p++;
1105                        int hi = val / 0x10, lo = val % 0x10;
1106                        ret[len++] = '%';
1107                        ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1108                        ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1109                    }
1110                }
1111                if (len + 3 >= max) {
1112                    temp = realloc2n(ret, &max);
1113                    ret = temp;
1114                }
1115                ret[len++] = '@';
1116            }
1117            p = uri->server;
1118            while (*p != 0) {
1119                if (len >= max) {
1120                    temp = realloc2n(ret, &max);
1121                    ret = temp;
1122                }
1123                ret[len++] = *p++;
1124            }
1125            if (uri->port > 0) {
1126                if (len + 10 >= max) {
1127                    temp = realloc2n(ret, &max);
1128                    ret = temp;
1129                }
1130                len += snprintf(&ret[len], max - len, ":%d", uri->port);
1131            }
1132        } else if (uri->authority != NULL) {
1133            if (len + 3 >= max) {
1134                temp = realloc2n(ret, &max);
1135                ret = temp;
1136            }
1137            ret[len++] = '/';
1138            ret[len++] = '/';
1139            p = uri->authority;
1140            while (*p != 0) {
1141                if (len + 3 >= max) {
1142                    temp = realloc2n(ret, &max);
1143                    ret = temp;
1144                }
1145                if ((IS_UNRESERVED(*(p))) ||
1146                    ((*(p) == '$')) || ((*(p) == ',')) || ((*(p) == ';')) ||
1147                    ((*(p) == ':')) || ((*(p) == '@')) || ((*(p) == '&')) ||
1148                    ((*(p) == '=')) || ((*(p) == '+')))
1149                    ret[len++] = *p++;
1150                else {
1151                    int val = *(unsigned char *)p++;
1152                    int hi = val / 0x10, lo = val % 0x10;
1153                    ret[len++] = '%';
1154                    ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1155                    ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1156                }
1157            }
1158        } else if (uri->scheme != NULL) {
1159            if (len + 3 >= max) {
1160                temp = realloc2n(ret, &max);
1161                ret = temp;
1162            }
1163            ret[len++] = '/';
1164            ret[len++] = '/';
1165        }
1166        if (uri->path != NULL) {
1167            p = uri->path;
1168            /*
1169             * the colon in file:///d: should not be escaped or
1170             * Windows accesses fail later.
1171             */
1172            if ((uri->scheme != NULL) &&
1173                (p[0] == '/') &&
1174                (((p[1] >= 'a') && (p[1] <= 'z')) ||
1175                 ((p[1] >= 'A') && (p[1] <= 'Z'))) &&
1176                (p[2] == ':') &&
1177                (!strcmp(uri->scheme, "file"))) {
1178                if (len + 3 >= max) {
1179                    temp = realloc2n(ret, &max);
1180                    ret = temp;
1181                }
1182                ret[len++] = *p++;
1183                ret[len++] = *p++;
1184                ret[len++] = *p++;
1185            }
1186            while (*p != 0) {
1187                if (len + 3 >= max) {
1188                    temp = realloc2n(ret, &max);
1189                    ret = temp;
1190                }
1191                if ((IS_UNRESERVED(*(p))) || ((*(p) == '/')) ||
1192                    ((*(p) == ';')) || ((*(p) == '@')) || ((*(p) == '&')) ||
1193                    ((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) ||
1194                    ((*(p) == ',')))
1195                    ret[len++] = *p++;
1196                else {
1197                    int val = *(unsigned char *)p++;
1198                    int hi = val / 0x10, lo = val % 0x10;
1199                    ret[len++] = '%';
1200                    ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1201                    ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1202                }
1203            }
1204        }
1205        if (uri->query != NULL) {
1206            if (len + 1 >= max) {
1207                temp = realloc2n(ret, &max);
1208                ret = temp;
1209            }
1210            ret[len++] = '?';
1211            p = uri->query;
1212            while (*p != 0) {
1213                if (len + 1 >= max) {
1214                    temp = realloc2n(ret, &max);
1215                    ret = temp;
1216                }
1217                ret[len++] = *p++;
1218            }
1219        }
1220    }
1221    if (uri->fragment != NULL) {
1222        if (len + 3 >= max) {
1223            temp = realloc2n(ret, &max);
1224            ret = temp;
1225        }
1226        ret[len++] = '#';
1227        p = uri->fragment;
1228        while (*p != 0) {
1229            if (len + 3 >= max) {
1230                temp = realloc2n(ret, &max);
1231                ret = temp;
1232            }
1233            if ((IS_UNRESERVED(*(p))) || (IS_RESERVED(*(p))))
1234                ret[len++] = *p++;
1235            else {
1236                int val = *(unsigned char *)p++;
1237                int hi = val / 0x10, lo = val % 0x10;
1238                ret[len++] = '%';
1239                ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1240                ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1241            }
1242        }
1243    }
1244    if (len >= max) {
1245        temp = realloc2n(ret, &max);
1246        ret = temp;
1247    }
1248    ret[len] = 0;
1249    return(ret);
1250}
1251
1252/**
1253 * uri_clean:
1254 * @uri:  pointer to an URI
1255 *
1256 * Make sure the URI struct is free of content
1257 */
1258static void
1259uri_clean(URI *uri) {
1260    if (uri == NULL) return;
1261
1262    g_free(uri->scheme);
1263    uri->scheme = NULL;
1264    g_free(uri->server);
1265    uri->server = NULL;
1266    g_free(uri->user);
1267    uri->user = NULL;
1268    g_free(uri->path);
1269    uri->path = NULL;
1270    g_free(uri->fragment);
1271    uri->fragment = NULL;
1272    g_free(uri->opaque);
1273    uri->opaque = NULL;
1274    g_free(uri->authority);
1275    uri->authority = NULL;
1276    g_free(uri->query);
1277    uri->query = NULL;
1278}
1279
1280/**
1281 * uri_free:
1282 * @uri:  pointer to an URI
1283 *
1284 * Free up the URI struct
1285 */
1286void
1287uri_free(URI *uri) {
1288    uri_clean(uri);
1289    g_free(uri);
1290}
1291
1292/************************************************************************
1293 *                                                                      *
1294 *                      Helper functions                                *
1295 *                                                                      *
1296 ************************************************************************/
1297
1298/**
1299 * normalize_uri_path:
1300 * @path:  pointer to the path string
1301 *
1302 * Applies the 5 normalization steps to a path string--that is, RFC 2396
1303 * Section 5.2, steps 6.c through 6.g.
1304 *
1305 * Normalization occurs directly on the string, no new allocation is done
1306 *
1307 * Returns 0 or an error code
1308 */
1309static int
1310normalize_uri_path(char *path) {
1311    char *cur, *out;
1312
1313    if (path == NULL)
1314        return(-1);
1315
1316    /* Skip all initial "/" chars.  We want to get to the beginning of the
1317     * first non-empty segment.
1318     */
1319    cur = path;
1320    while (cur[0] == '/')
1321      ++cur;
1322    if (cur[0] == '\0')
1323      return(0);
1324
1325    /* Keep everything we've seen so far.  */
1326    out = cur;
1327
1328    /*
1329     * Analyze each segment in sequence for cases (c) and (d).
1330     */
1331    while (cur[0] != '\0') {
1332        /*
1333         * c) All occurrences of "./", where "." is a complete path segment,
1334         *    are removed from the buffer string.
1335         */
1336        if ((cur[0] == '.') && (cur[1] == '/')) {
1337            cur += 2;
1338            /* '//' normalization should be done at this point too */
1339            while (cur[0] == '/')
1340                cur++;
1341            continue;
1342        }
1343
1344        /*
1345         * d) If the buffer string ends with "." as a complete path segment,
1346         *    that "." is removed.
1347         */
1348        if ((cur[0] == '.') && (cur[1] == '\0'))
1349            break;
1350
1351        /* Otherwise keep the segment.  */
1352        while (cur[0] != '/') {
1353            if (cur[0] == '\0')
1354              goto done_cd;
1355            (out++)[0] = (cur++)[0];
1356        }
1357        /* nomalize // */
1358        while ((cur[0] == '/') && (cur[1] == '/'))
1359            cur++;
1360
1361        (out++)[0] = (cur++)[0];
1362    }
1363 done_cd:
1364    out[0] = '\0';
1365
1366    /* Reset to the beginning of the first segment for the next sequence.  */
1367    cur = path;
1368    while (cur[0] == '/')
1369      ++cur;
1370    if (cur[0] == '\0')
1371        return(0);
1372
1373    /*
1374     * Analyze each segment in sequence for cases (e) and (f).
1375     *
1376     * e) All occurrences of "<segment>/../", where <segment> is a
1377     *    complete path segment not equal to "..", are removed from the
1378     *    buffer string.  Removal of these path segments is performed
1379     *    iteratively, removing the leftmost matching pattern on each
1380     *    iteration, until no matching pattern remains.
1381     *
1382     * f) If the buffer string ends with "<segment>/..", where <segment>
1383     *    is a complete path segment not equal to "..", that
1384     *    "<segment>/.." is removed.
1385     *
1386     * To satisfy the "iterative" clause in (e), we need to collapse the
1387     * string every time we find something that needs to be removed.  Thus,
1388     * we don't need to keep two pointers into the string: we only need a
1389     * "current position" pointer.
1390     */
1391    while (1) {
1392        char *segp, *tmp;
1393
1394        /* At the beginning of each iteration of this loop, "cur" points to
1395         * the first character of the segment we want to examine.
1396         */
1397
1398        /* Find the end of the current segment.  */
1399        segp = cur;
1400        while ((segp[0] != '/') && (segp[0] != '\0'))
1401          ++segp;
1402
1403        /* If this is the last segment, we're done (we need at least two
1404         * segments to meet the criteria for the (e) and (f) cases).
1405         */
1406        if (segp[0] == '\0')
1407          break;
1408
1409        /* If the first segment is "..", or if the next segment _isn't_ "..",
1410         * keep this segment and try the next one.
1411         */
1412        ++segp;
1413        if (((cur[0] == '.') && (cur[1] == '.') && (segp == cur+3))
1414            || ((segp[0] != '.') || (segp[1] != '.')
1415                || ((segp[2] != '/') && (segp[2] != '\0')))) {
1416          cur = segp;
1417          continue;
1418        }
1419
1420        /* If we get here, remove this segment and the next one and back up
1421         * to the previous segment (if there is one), to implement the
1422         * "iteratively" clause.  It's pretty much impossible to back up
1423         * while maintaining two pointers into the buffer, so just compact
1424         * the whole buffer now.
1425         */
1426
1427        /* If this is the end of the buffer, we're done.  */
1428        if (segp[2] == '\0') {
1429          cur[0] = '\0';
1430          break;
1431        }
1432        /* Valgrind complained, strcpy(cur, segp + 3); */
1433        /* string will overlap, do not use strcpy */
1434        tmp = cur;
1435        segp += 3;
1436        while ((*tmp++ = *segp++) != 0)
1437          ;
1438
1439        /* If there are no previous segments, then keep going from here.  */
1440        segp = cur;
1441        while ((segp > path) && ((--segp)[0] == '/'))
1442          ;
1443        if (segp == path)
1444          continue;
1445
1446        /* "segp" is pointing to the end of a previous segment; find it's
1447         * start.  We need to back up to the previous segment and start
1448         * over with that to handle things like "foo/bar/../..".  If we
1449         * don't do this, then on the first pass we'll remove the "bar/..",
1450         * but be pointing at the second ".." so we won't realize we can also
1451         * remove the "foo/..".
1452         */
1453        cur = segp;
1454        while ((cur > path) && (cur[-1] != '/'))
1455          --cur;
1456    }
1457    out[0] = '\0';
1458
1459    /*
1460     * g) If the resulting buffer string still begins with one or more
1461     *    complete path segments of "..", then the reference is
1462     *    considered to be in error. Implementations may handle this
1463     *    error by retaining these components in the resolved path (i.e.,
1464     *    treating them as part of the final URI), by removing them from
1465     *    the resolved path (i.e., discarding relative levels above the
1466     *    root), or by avoiding traversal of the reference.
1467     *
1468     * We discard them from the final path.
1469     */
1470    if (path[0] == '/') {
1471      cur = path;
1472      while ((cur[0] == '/') && (cur[1] == '.') && (cur[2] == '.')
1473             && ((cur[3] == '/') || (cur[3] == '\0')))
1474        cur += 3;
1475
1476      if (cur != path) {
1477        out = path;
1478        while (cur[0] != '\0')
1479          (out++)[0] = (cur++)[0];
1480        out[0] = 0;
1481      }
1482    }
1483
1484    return(0);
1485}
1486
1487static int is_hex(char c) {
1488    if (((c >= '0') && (c <= '9')) ||
1489        ((c >= 'a') && (c <= 'f')) ||
1490        ((c >= 'A') && (c <= 'F')))
1491        return(1);
1492    return(0);
1493}
1494
1495
1496/**
1497 * uri_string_unescape:
1498 * @str:  the string to unescape
1499 * @len:   the length in bytes to unescape (or <= 0 to indicate full string)
1500 * @target:  optional destination buffer
1501 *
1502 * Unescaping routine, but does not check that the string is an URI. The
1503 * output is a direct unsigned char translation of %XX values (no encoding)
1504 * Note that the length of the result can only be smaller or same size as
1505 * the input string.
1506 *
1507 * Returns a copy of the string, but unescaped, will return NULL only in case
1508 * of error
1509 */
1510char *
1511uri_string_unescape(const char *str, int len, char *target) {
1512    char *ret, *out;
1513    const char *in;
1514
1515    if (str == NULL)
1516        return(NULL);
1517    if (len <= 0) len = strlen(str);
1518    if (len < 0) return(NULL);
1519
1520    if (target == NULL) {
1521        ret = g_malloc(len + 1);
1522    } else
1523        ret = target;
1524    in = str;
1525    out = ret;
1526    while(len > 0) {
1527        if ((len > 2) && (*in == '%') && (is_hex(in[1])) && (is_hex(in[2]))) {
1528            in++;
1529            if ((*in >= '0') && (*in <= '9'))
1530                *out = (*in - '0');
1531            else if ((*in >= 'a') && (*in <= 'f'))
1532                *out = (*in - 'a') + 10;
1533            else if ((*in >= 'A') && (*in <= 'F'))
1534                *out = (*in - 'A') + 10;
1535            in++;
1536            if ((*in >= '0') && (*in <= '9'))
1537                *out = *out * 16 + (*in - '0');
1538            else if ((*in >= 'a') && (*in <= 'f'))
1539                *out = *out * 16 + (*in - 'a') + 10;
1540            else if ((*in >= 'A') && (*in <= 'F'))
1541                *out = *out * 16 + (*in - 'A') + 10;
1542            in++;
1543            len -= 3;
1544            out++;
1545        } else {
1546            *out++ = *in++;
1547            len--;
1548        }
1549    }
1550    *out = 0;
1551    return(ret);
1552}
1553
1554/**
1555 * uri_string_escape:
1556 * @str:  string to escape
1557 * @list: exception list string of chars not to escape
1558 *
1559 * This routine escapes a string to hex, ignoring reserved characters (a-z)
1560 * and the characters in the exception list.
1561 *
1562 * Returns a new escaped string or NULL in case of error.
1563 */
1564char *
1565uri_string_escape(const char *str, const char *list) {
1566    char *ret, ch;
1567    char *temp;
1568    const char *in;
1569    int len, out;
1570
1571    if (str == NULL)
1572        return(NULL);
1573    if (str[0] == 0)
1574        return(g_strdup(str));
1575    len = strlen(str);
1576    if (!(len > 0)) return(NULL);
1577
1578    len += 20;
1579    ret = g_malloc(len);
1580    in = str;
1581    out = 0;
1582    while(*in != 0) {
1583        if (len - out <= 3) {
1584            temp = realloc2n(ret, &len);
1585            ret = temp;
1586        }
1587
1588        ch = *in;
1589
1590        if ((ch != '@') && (!IS_UNRESERVED(ch)) && (!strchr(list, ch))) {
1591            unsigned char val;
1592            ret[out++] = '%';
1593            val = ch >> 4;
1594            if (val <= 9)
1595                ret[out++] = '0' + val;
1596            else
1597                ret[out++] = 'A' + val - 0xA;
1598            val = ch & 0xF;
1599            if (val <= 9)
1600                ret[out++] = '0' + val;
1601            else
1602                ret[out++] = 'A' + val - 0xA;
1603            in++;
1604        } else {
1605            ret[out++] = *in++;
1606        }
1607
1608    }
1609    ret[out] = 0;
1610    return(ret);
1611}
1612
1613/************************************************************************
1614 *                                                                      *
1615 *                      Public functions                                *
1616 *                                                                      *
1617 ************************************************************************/
1618
1619/**
1620 * uri_resolve:
1621 * @URI:  the URI instance found in the document
1622 * @base:  the base value
1623 *
1624 * Computes he final URI of the reference done by checking that
1625 * the given URI is valid, and building the final URI using the
1626 * base URI. This is processed according to section 5.2 of the
1627 * RFC 2396
1628 *
1629 * 5.2. Resolving Relative References to Absolute Form
1630 *
1631 * Returns a new URI string (to be freed by the caller) or NULL in case
1632 *         of error.
1633 */
1634char *
1635uri_resolve(const char *uri, const char *base) {
1636    char *val = NULL;
1637    int ret, len, indx, cur, out;
1638    URI *ref = NULL;
1639    URI *bas = NULL;
1640    URI *res = NULL;
1641
1642    /*
1643     * 1) The URI reference is parsed into the potential four components and
1644     *    fragment identifier, as described in Section 4.3.
1645     *
1646     *    NOTE that a completely empty URI is treated by modern browsers
1647     *    as a reference to "." rather than as a synonym for the current
1648     *    URI.  Should we do that here?
1649     */
1650    if (uri == NULL)
1651        ret = -1;
1652    else {
1653        if (*uri) {
1654            ref = uri_new();
1655            ret = uri_parse_into(ref, uri);
1656        }
1657        else
1658            ret = 0;
1659    }
1660    if (ret != 0)
1661        goto done;
1662    if ((ref != NULL) && (ref->scheme != NULL)) {
1663        /*
1664         * The URI is absolute don't modify.
1665         */
1666        val = g_strdup(uri);
1667        goto done;
1668    }
1669    if (base == NULL)
1670        ret = -1;
1671    else {
1672        bas = uri_new();
1673        ret = uri_parse_into(bas, base);
1674    }
1675    if (ret != 0) {
1676        if (ref)
1677            val = uri_to_string(ref);
1678        goto done;
1679    }
1680    if (ref == NULL) {
1681        /*
1682         * the base fragment must be ignored
1683         */
1684        g_free(bas->fragment);
1685        bas->fragment = NULL;
1686        val = uri_to_string(bas);
1687        goto done;
1688    }
1689
1690    /*
1691     * 2) If the path component is empty and the scheme, authority, and
1692     *    query components are undefined, then it is a reference to the
1693     *    current document and we are done.  Otherwise, the reference URI's
1694     *    query and fragment components are defined as found (or not found)
1695     *    within the URI reference and not inherited from the base URI.
1696     *
1697     *    NOTE that in modern browsers, the parsing differs from the above
1698     *    in the following aspect:  the query component is allowed to be
1699     *    defined while still treating this as a reference to the current
1700     *    document.
1701     */
1702    res = uri_new();
1703    if ((ref->scheme == NULL) && (ref->path == NULL) &&
1704        ((ref->authority == NULL) && (ref->server == NULL))) {
1705        res->scheme = g_strdup(bas->scheme);
1706        if (bas->authority != NULL)
1707            res->authority = g_strdup(bas->authority);
1708        else if (bas->server != NULL) {
1709            res->server = g_strdup(bas->server);
1710            res->user = g_strdup(bas->user);
1711            res->port = bas->port;
1712        }
1713        res->path = g_strdup(bas->path);
1714        if (ref->query != NULL) {
1715            res->query = g_strdup (ref->query);
1716        } else {
1717            res->query = g_strdup(bas->query);
1718        }
1719        res->fragment = g_strdup(ref->fragment);
1720        goto step_7;
1721    }
1722
1723    /*
1724     * 3) If the scheme component is defined, indicating that the reference
1725     *    starts with a scheme name, then the reference is interpreted as an
1726     *    absolute URI and we are done.  Otherwise, the reference URI's
1727     *    scheme is inherited from the base URI's scheme component.
1728     */
1729    if (ref->scheme != NULL) {
1730        val = uri_to_string(ref);
1731        goto done;
1732    }
1733    res->scheme = g_strdup(bas->scheme);
1734
1735    res->query = g_strdup(ref->query);
1736    res->fragment = g_strdup(ref->fragment);
1737
1738    /*
1739     * 4) If the authority component is defined, then the reference is a
1740     *    network-path and we skip to step 7.  Otherwise, the reference
1741     *    URI's authority is inherited from the base URI's authority
1742     *    component, which will also be undefined if the URI scheme does not
1743     *    use an authority component.
1744     */
1745    if ((ref->authority != NULL) || (ref->server != NULL)) {
1746        if (ref->authority != NULL)
1747            res->authority = g_strdup(ref->authority);
1748        else {
1749            res->server = g_strdup(ref->server);
1750            res->user = g_strdup(ref->user);
1751            res->port = ref->port;
1752        }
1753        res->path = g_strdup(ref->path);
1754        goto step_7;
1755    }
1756    if (bas->authority != NULL)
1757        res->authority = g_strdup(bas->authority);
1758    else if (bas->server != NULL) {
1759        res->server = g_strdup(bas->server);
1760        res->user = g_strdup(bas->user);
1761        res->port = bas->port;
1762    }
1763
1764    /*
1765     * 5) If the path component begins with a slash character ("/"), then
1766     *    the reference is an absolute-path and we skip to step 7.
1767     */
1768    if ((ref->path != NULL) && (ref->path[0] == '/')) {
1769        res->path = g_strdup(ref->path);
1770        goto step_7;
1771    }
1772
1773
1774    /*
1775     * 6) If this step is reached, then we are resolving a relative-path
1776     *    reference.  The relative path needs to be merged with the base
1777     *    URI's path.  Although there are many ways to do this, we will
1778     *    describe a simple method using a separate string buffer.
1779     *
1780     * Allocate a buffer large enough for the result string.
1781     */
1782    len = 2; /* extra / and 0 */
1783    if (ref->path != NULL)
1784        len += strlen(ref->path);
1785    if (bas->path != NULL)
1786        len += strlen(bas->path);
1787    res->path = g_malloc(len);
1788    res->path[0] = 0;
1789
1790    /*
1791     * a) All but the last segment of the base URI's path component is
1792     *    copied to the buffer.  In other words, any characters after the
1793     *    last (right-most) slash character, if any, are excluded.
1794     */
1795    cur = 0;
1796    out = 0;
1797    if (bas->path != NULL) {
1798        while (bas->path[cur] != 0) {
1799            while ((bas->path[cur] != 0) && (bas->path[cur] != '/'))
1800                cur++;
1801            if (bas->path[cur] == 0)
1802                break;
1803
1804            cur++;
1805            while (out < cur) {
1806                res->path[out] = bas->path[out];
1807                out++;
1808            }
1809        }
1810    }
1811    res->path[out] = 0;
1812
1813    /*
1814     * b) The reference's path component is appended to the buffer
1815     *    string.
1816     */
1817    if (ref->path != NULL && ref->path[0] != 0) {
1818        indx = 0;
1819        /*
1820         * Ensure the path includes a '/'
1821         */
1822        if ((out == 0) && (bas->server != NULL))
1823            res->path[out++] = '/';
1824        while (ref->path[indx] != 0) {
1825            res->path[out++] = ref->path[indx++];
1826        }
1827    }
1828    res->path[out] = 0;
1829
1830    /*
1831     * Steps c) to h) are really path normalization steps
1832     */
1833    normalize_uri_path(res->path);
1834
1835step_7:
1836
1837    /*
1838     * 7) The resulting URI components, including any inherited from the
1839     *    base URI, are recombined to give the absolute form of the URI
1840     *    reference.
1841     */
1842    val = uri_to_string(res);
1843
1844done:
1845    if (ref != NULL)
1846        uri_free(ref);
1847    if (bas != NULL)
1848        uri_free(bas);
1849    if (res != NULL)
1850        uri_free(res);
1851    return(val);
1852}
1853
1854/**
1855 * uri_resolve_relative:
1856 * @URI:  the URI reference under consideration
1857 * @base:  the base value
1858 *
1859 * Expresses the URI of the reference in terms relative to the
1860 * base.  Some examples of this operation include:
1861 *     base = "http://site1.com/docs/book1.html"
1862 *        URI input                        URI returned
1863 *     docs/pic1.gif                    pic1.gif
1864 *     docs/img/pic1.gif                img/pic1.gif
1865 *     img/pic1.gif                     ../img/pic1.gif
1866 *     http://site1.com/docs/pic1.gif   pic1.gif
1867 *     http://site2.com/docs/pic1.gif   http://site2.com/docs/pic1.gif
1868 *
1869 *     base = "docs/book1.html"
1870 *        URI input                        URI returned
1871 *     docs/pic1.gif                    pic1.gif
1872 *     docs/img/pic1.gif                img/pic1.gif
1873 *     img/pic1.gif                     ../img/pic1.gif
1874 *     http://site1.com/docs/pic1.gif   http://site1.com/docs/pic1.gif
1875 *
1876 *
1877 * Note: if the URI reference is really weird or complicated, it may be
1878 *       worthwhile to first convert it into a "nice" one by calling
1879 *       uri_resolve (using 'base') before calling this routine,
1880 *       since this routine (for reasonable efficiency) assumes URI has
1881 *       already been through some validation.
1882 *
1883 * Returns a new URI string (to be freed by the caller) or NULL in case
1884 * error.
1885 */
1886char *
1887uri_resolve_relative (const char *uri, const char * base)
1888{
1889    char *val = NULL;
1890    int ret;
1891    int ix;
1892    int pos = 0;
1893    int nbslash = 0;
1894    int len;
1895    URI *ref = NULL;
1896    URI *bas = NULL;
1897    char *bptr, *uptr, *vptr;
1898    int remove_path = 0;
1899
1900    if ((uri == NULL) || (*uri == 0))
1901        return NULL;
1902
1903    /*
1904     * First parse URI into a standard form
1905     */
1906    ref = uri_new ();
1907    /* If URI not already in "relative" form */
1908    if (uri[0] != '.') {
1909        ret = uri_parse_into (ref, uri);
1910        if (ret != 0)
1911            goto done;          /* Error in URI, return NULL */
1912    } else
1913        ref->path = g_strdup(uri);
1914
1915    /*
1916     * Next parse base into the same standard form
1917     */
1918    if ((base == NULL) || (*base == 0)) {
1919        val = g_strdup (uri);
1920        goto done;
1921    }
1922    bas = uri_new ();
1923    if (base[0] != '.') {
1924        ret = uri_parse_into (bas, base);
1925        if (ret != 0)
1926            goto done;          /* Error in base, return NULL */
1927    } else
1928        bas->path = g_strdup(base);
1929
1930    /*
1931     * If the scheme / server on the URI differs from the base,
1932     * just return the URI
1933     */
1934    if ((ref->scheme != NULL) &&
1935        ((bas->scheme == NULL) ||
1936         (strcmp (bas->scheme, ref->scheme)) ||
1937         (strcmp (bas->server, ref->server)))) {
1938        val = g_strdup (uri);
1939        goto done;
1940    }
1941    if (bas->path == ref->path ||
1942        (bas->path && ref->path && !strcmp(bas->path, ref->path))) {
1943        val = g_strdup("");
1944        goto done;
1945    }
1946    if (bas->path == NULL) {
1947        val = g_strdup(ref->path);
1948        goto done;
1949    }
1950    if (ref->path == NULL) {
1951        ref->path = (char *) "/";
1952        remove_path = 1;
1953    }
1954
1955    /*
1956     * At this point (at last!) we can compare the two paths
1957     *
1958     * First we take care of the special case where either of the
1959     * two path components may be missing (bug 316224)
1960     */
1961    if (bas->path == NULL) {
1962        if (ref->path != NULL) {
1963            uptr = ref->path;
1964            if (*uptr == '/')
1965                uptr++;
1966            /* exception characters from uri_to_string */
1967            val = uri_string_escape(uptr, "/;&=+$,");
1968        }
1969        goto done;
1970    }
1971    bptr = bas->path;
1972    if (ref->path == NULL) {
1973        for (ix = 0; bptr[ix] != 0; ix++) {
1974            if (bptr[ix] == '/')
1975                nbslash++;
1976        }
1977        uptr = NULL;
1978        len = 1;        /* this is for a string terminator only */
1979    } else {
1980    /*
1981     * Next we compare the two strings and find where they first differ
1982     */
1983        if ((ref->path[pos] == '.') && (ref->path[pos+1] == '/'))
1984            pos += 2;
1985        if ((*bptr == '.') && (bptr[1] == '/'))
1986            bptr += 2;
1987        else if ((*bptr == '/') && (ref->path[pos] != '/'))
1988            bptr++;
1989        while ((bptr[pos] == ref->path[pos]) && (bptr[pos] != 0))
1990            pos++;
1991
1992        if (bptr[pos] == ref->path[pos]) {
1993            val = g_strdup("");
1994            goto done;          /* (I can't imagine why anyone would do this) */
1995        }
1996
1997        /*
1998         * In URI, "back up" to the last '/' encountered.  This will be the
1999         * beginning of the "unique" suffix of URI
2000         */
2001        ix = pos;
2002        if ((ref->path[ix] == '/') && (ix > 0))
2003            ix--;
2004        else if ((ref->path[ix] == 0) && (ix > 1) && (ref->path[ix - 1] == '/'))
2005            ix -= 2;
2006        for (; ix > 0; ix--) {
2007            if (ref->path[ix] == '/')
2008                break;
2009        }
2010        if (ix == 0) {
2011            uptr = ref->path;
2012        } else {
2013            ix++;
2014            uptr = &ref->path[ix];
2015        }
2016
2017        /*
2018         * In base, count the number of '/' from the differing point
2019         */
2020        if (bptr[pos] != ref->path[pos]) {/* check for trivial URI == base */
2021            for (; bptr[ix] != 0; ix++) {
2022                if (bptr[ix] == '/')
2023                    nbslash++;
2024            }
2025        }
2026        len = strlen (uptr) + 1;
2027    }
2028
2029    if (nbslash == 0) {
2030        if (uptr != NULL)
2031            /* exception characters from uri_to_string */
2032            val = uri_string_escape(uptr, "/;&=+$,");
2033        goto done;
2034    }
2035
2036    /*
2037     * Allocate just enough space for the returned string -
2038     * length of the remainder of the URI, plus enough space
2039     * for the "../" groups, plus one for the terminator
2040     */
2041    val = g_malloc (len + 3 * nbslash);
2042    vptr = val;
2043    /*
2044     * Put in as many "../" as needed
2045     */
2046    for (; nbslash>0; nbslash--) {
2047        *vptr++ = '.';
2048        *vptr++ = '.';
2049        *vptr++ = '/';
2050    }
2051    /*
2052     * Finish up with the end of the URI
2053     */
2054    if (uptr != NULL) {
2055        if ((vptr > val) && (len > 0) &&
2056            (uptr[0] == '/') && (vptr[-1] == '/')) {
2057            memcpy (vptr, uptr + 1, len - 1);
2058            vptr[len - 2] = 0;
2059        } else {
2060            memcpy (vptr, uptr, len);
2061            vptr[len - 1] = 0;
2062        }
2063    } else {
2064        vptr[len - 1] = 0;
2065    }
2066
2067    /* escape the freshly-built path */
2068    vptr = val;
2069        /* exception characters from uri_to_string */
2070    val = uri_string_escape(vptr, "/;&=+$,");
2071    g_free(vptr);
2072
2073done:
2074    /*
2075     * Free the working variables
2076     */
2077    if (remove_path != 0)
2078        ref->path = NULL;
2079    if (ref != NULL)
2080        uri_free (ref);
2081    if (bas != NULL)
2082        uri_free (bas);
2083
2084    return val;
2085}
2086
2087/*
2088 * Utility functions to help parse and assemble query strings.
2089 */
2090
2091struct QueryParams *
2092query_params_new (int init_alloc)
2093{
2094    struct QueryParams *ps;
2095
2096    if (init_alloc <= 0) init_alloc = 1;
2097
2098    ps = g_new(QueryParams, 1);
2099    ps->n = 0;
2100    ps->alloc = init_alloc;
2101    ps->p = g_new(QueryParam, ps->alloc);
2102
2103    return ps;
2104}
2105
2106/* Ensure there is space to store at least one more parameter
2107 * at the end of the set.
2108 */
2109static int
2110query_params_append (struct QueryParams *ps,
2111               const char *name, const char *value)
2112{
2113    if (ps->n >= ps->alloc) {
2114        ps->p = g_renew(QueryParam, ps->p, ps->alloc * 2);
2115        ps->alloc *= 2;
2116    }
2117
2118    ps->p[ps->n].name = g_strdup(name);
2119    ps->p[ps->n].value = g_strdup(value);
2120    ps->p[ps->n].ignore = 0;
2121    ps->n++;
2122
2123    return 0;
2124}
2125
2126void
2127query_params_free (struct QueryParams *ps)
2128{
2129    int i;
2130
2131    for (i = 0; i < ps->n; ++i) {
2132        g_free (ps->p[i].name);
2133        g_free (ps->p[i].value);
2134    }
2135    g_free (ps->p);
2136    g_free (ps);
2137}
2138
2139struct QueryParams *
2140query_params_parse (const char *query)
2141{
2142    struct QueryParams *ps;
2143    const char *end, *eq;
2144
2145    ps = query_params_new (0);
2146    if (!query || query[0] == '\0') return ps;
2147
2148    while (*query) {
2149        char *name = NULL, *value = NULL;
2150
2151        /* Find the next separator, or end of the string. */
2152        end = strchr (query, '&');
2153        if (!end)
2154            end = strchr (query, ';');
2155        if (!end)
2156            end = query + strlen (query);
2157
2158        /* Find the first '=' character between here and end. */
2159        eq = strchr (query, '=');
2160        if (eq && eq >= end) eq = NULL;
2161
2162        /* Empty section (eg. "&&"). */
2163        if (end == query)
2164            goto next;
2165
2166        /* If there is no '=' character, then we have just "name"
2167         * and consistent with CGI.pm we assume value is "".
2168         */
2169        else if (!eq) {
2170            name = uri_string_unescape (query, end - query, NULL);
2171            value = NULL;
2172        }
2173        /* Or if we have "name=" here (works around annoying
2174         * problem when calling uri_string_unescape with len = 0).
2175         */
2176        else if (eq+1 == end) {
2177            name = uri_string_unescape (query, eq - query, NULL);
2178            value = g_new0(char, 1);
2179        }
2180        /* If the '=' character is at the beginning then we have
2181         * "=value" and consistent with CGI.pm we _ignore_ this.
2182         */
2183        else if (query == eq)
2184            goto next;
2185
2186        /* Otherwise it's "name=value". */
2187        else {
2188            name = uri_string_unescape (query, eq - query, NULL);
2189            value = uri_string_unescape (eq+1, end - (eq+1), NULL);
2190        }
2191
2192        /* Append to the parameter set. */
2193        query_params_append (ps, name, value);
2194        g_free(name);
2195        g_free(value);
2196
2197    next:
2198        query = end;
2199        if (*query) query ++; /* skip '&' separator */
2200    }
2201
2202    return ps;
2203}
2204