Module URI
In: uri.rb
uri/mailto.rb
uri/ldap.rb
uri/https.rb
uri/http.rb
uri/generic.rb
uri/ftp.rb
uri/common.rb

uri/common.rb

Author:Akira Yamada <akira@ruby-lang.org>
Revision:$Id: common.rb,v 1.11.2.7 2005/06/24 04:15:20 akira Exp $
License:You can redistribute it and/or modify it under the same term as Ruby.

Methods

extract   join   parse   regexp   split  

Included Modules

REGEXP

Classes and Modules

Module URI::Escape
Module URI::REGEXP
Class URI::BadURIError
Class URI::Error
Class URI::FTP
Class URI::Generic
Class URI::HTTP
Class URI::HTTPS
Class URI::InvalidComponentError
Class URI::InvalidURIError
Class URI::LDAP
Class URI::MailTo

Public Class methods

Synopsis

  URI::extract(str[, schemes][,&blk])

Args

str:String to extract URIs from.
schemes:Limit URI matching to a specific schemes.

Description

Extracts URIs from a string. If block given, iterates through all matched URIs. Returns nil if block given or array with matches.

Usage

  require "uri"

  URI.extract("text here http://foo.example.org/bla and here mailto:test@example.com and here also.")
  # => ["http://foo.example.org/bla", "mailto:test@example.com"]

[Source]

# File uri/common.rb, line 547
  def self.extract(str, schemes = nil, &block)
    if block_given?
      str.scan(regexp(schemes)) { yield $& }
      nil
    else
      result = []
      str.scan(regexp(schemes)) { result.push $& }
      result
    end
  end

Synopsis

  URI::join(str[, str, ...])

Args

str:String(s) to work with

Description

Joins URIs.

Usage

  require 'uri'

  p URI.join("http://localhost/","main.rbx")
  # => #<URI::HTTP:0x2022ac02 URL:http://localhost/main.rbx>

[Source]

# File uri/common.rb, line 515
  def self.join(*str)
    u = self.parse(str[0])
    str[1 .. -1].each do |x|
      u = u.merge(x)
    end
    u
  end

Synopsis

  URI::parse(uri_str)

Args

uri_str:String with URI.

Description

Creates one of the URI’s subclasses instance from the string.

Raises

URI::InvalidURIError

  Raised if URI given is not a correct one.

Usage

  require 'uri'

  uri = URI.parse("http://www.ruby-lang.org/")
  p uri
  # => #<URI::HTTP:0x202281be URL:http://www.ruby-lang.org/>
  p uri.scheme
  # => "http"
  p uri.host
  # => "www.ruby-lang.org"

[Source]

# File uri/common.rb, line 479
  def self.parse(uri)
    scheme, userinfo, host, port, 
      registry, path, opaque, query, fragment = self.split(uri)

    if scheme && @@schemes.include?(scheme.upcase)
      @@schemes[scheme.upcase].new(scheme, userinfo, host, port, 
                                   registry, path, opaque, query, 
                                   fragment)
    else
      Generic.new(scheme, userinfo, host, port, 
                  registry, path, opaque, query, 
                  fragment)
    end
  end

Synopsis

  URI::regexp([match_schemes])

Args

match_schemes:Array of schemes. If given, resulting regexp matches to URIs whose scheme is one of the match_schemes.

Description

Returns a Regexp object which matches to URI-like strings. The Regexp object returned by this method includes arbitrary number of capture group (parentheses). Never rely on it’s number.

Usage

  require 'uri'

  # extract first URI from html_string
  html_string.slice(URI.regexp)

  # remove ftp URIs
  html_string.sub(URI.regexp(['ftp'])

  # You should not rely on the number of parentheses
  html_string.scan(URI.regexp) do |*matches|
    p $&
  end

[Source]

# File uri/common.rb, line 589
  def self.regexp(schemes = nil)
    unless schemes
      ABS_URI_REF
    else
      /(?=#{Regexp.union(*schemes)}:)#{PATTERN::X_ABS_URI}/xn
    end
  end

Synopsis

  URI::split(uri)

Args

uri:String with URI.

Description

Splits the string on following parts and returns array with result:

  * Scheme
  * Userinfo
  * Host
  * Port
  * Registry
  * Path
  * Opaque
  * Query
  * Fragment

Usage

  require 'uri'

  p URI.split("http://www.ruby-lang.org/")
  # => ["http", nil, "www.ruby-lang.org", nil, nil, "/", nil, nil, nil]

[Source]

# File uri/common.rb, line 376
  def self.split(uri)
    case uri
    when ''
      # null uri

    when ABS_URI
      scheme, opaque, userinfo, host, port, 
        registry, path, query, fragment = $~[1..-1]

      # URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]

      # absoluteURI   = scheme ":" ( hier_part | opaque_part )
      # hier_part     = ( net_path | abs_path ) [ "?" query ]
      # opaque_part   = uric_no_slash *uric

      # abs_path      = "/"  path_segments
      # net_path      = "//" authority [ abs_path ]

      # authority     = server | reg_name
      # server        = [ [ userinfo "@" ] hostport ]

      if !scheme
        raise InvalidURIError, 
          "bad URI(absolute but no scheme): #{uri}"
      end
      if !opaque && (!path && (!host && !registry))
        raise InvalidURIError,
          "bad URI(absolute but no path): #{uri}" 
      end

    when REL_URI
      scheme = nil
      opaque = nil

      userinfo, host, port, registry, 
        rel_segment, abs_path, query, fragment = $~[1..-1]
      if rel_segment && abs_path
        path = rel_segment + abs_path
      elsif rel_segment
        path = rel_segment
      elsif abs_path
        path = abs_path
      end

      # URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]

      # relativeURI   = ( net_path | abs_path | rel_path ) [ "?" query ]

      # net_path      = "//" authority [ abs_path ]
      # abs_path      = "/"  path_segments
      # rel_path      = rel_segment [ abs_path ]

      # authority     = server | reg_name
      # server        = [ [ userinfo "@" ] hostport ]

    else
      raise InvalidURIError, "bad URI(is not URI?): #{uri}"
    end

    path = '' if !path && !opaque # (see RFC2396 Section 5.2)
    ret = [
      scheme, 
      userinfo, host, port,         # X
      registry,                        # X
      path,                         # Y
      opaque,                        # Y
      query,
      fragment
    ]
    return ret
  end

[Validate]