VBA – Extract Base URL From A Full URL

Ever needed to get the Base URL (also referred to as Origin) from a full URL?

So given a URL like:

http://office.microsoft.com/en-us/templates/CT101527321033.aspx?av=ZAC

get/return:

http://office.microsoft.com

Well, it truly isn’t hard once you know how. Below are a couple different approaches that you can employ to get the job done:

 

The Solution

Option 1 – Microsoft HTML Object Library

'---------------------------------------------------------------------------------------
' Procedure : MSHTML_GetBaseURL
' Author    : Daniel Pineault, CARDA Consultants Inc.
' Website   : http://www.cardaconsultants.com
' Purpose   : Extract the base URL from a full URL
' Copyright : The following is release as Attribution-ShareAlike 4.0 International
'             (CC BY-SA 4.0) - https://creativecommons.org/licenses/by-sa/4.0/
' Req'd Refs: Late Binding  -> None required
'             Early Binding -> Microsoft HTML Object Library
'
' Input Variables:
' ~~~~~~~~~~~~~~~~
' sURL      : Full URL to extract the base URL from
'
' Usage:
' ~~~~~~
' ? MSHTML_GetBaseURL("http://office.microsoft.com/en-us/templates/CT101527321033.aspx?av=ZAC")
'   Returns -> http://office.microsoft.com
'
' Revision History:
' Rev       Date(yyyy-mm-dd)        Description
' **************************************************************************************
' 1         2023-03-23
'---------------------------------------------------------------------------------------
Function MSHTML_GetBaseURL(ByVal sURL As String) As String
    On Error GoTo Error_Handler
    #Const HF_EarlyBind = False     'True => Early Binding / False => Late Binding
    #If HF_EarlyBind = True Then
        Dim oHTMLFile         As MSHTML.HTMLDocument
        Dim oElem             As MSHTML.HTMLGenericElement
        Dim oLink             As MSHTML.HTMLLinkElement

        Set oHTMLFile = New MSHTML.HTMLDocument
    #Else
        Dim oHTMLFile         As Object
        Dim oElem             As Object
        Dim oLink             As Object

        Set oHTMLFile = CreateObject("HTMLFile")
    #End If
    Dim bAppendPrefix         As Boolean
    Dim sOutput               As String

    If InStr(sURL, "//") = 0 Then
        bAppendPrefix = True
        sURL = "https://" & sURL
    End If

    oHTMLFile.Body.innerHTML = ""
    Set oLink = oHTMLFile.createElement("a")
    Call oLink.setAttribute("href", sURL)
    Set oElem = oHTMLFile.createElement("p")
    Call oElem.appendChild(oLink)
    Call oHTMLFile.Body.appendChild(oElem)

    '    http://office.microsoft.com/en-us/templates/CT101527321033.aspx?av=ZAC
    '    Debug.Print oLink.protocol => http: | https: | ftp:
    '    Debug.Print oLink.host     => office.microsoft.com
    '    Debug.Print oLink.hostname => office.microsoft.com
    '    Debug.Print oLink.PathName => /en-us/templates/CT101527321033.aspx
    '    Debug.Print oLink.Search   => ?av=ZAC
    '    Debug.Print oLink.Port     => 80

    sOutput = oLink.protocol & "//" & oLink.hostname
    If bAppendPrefix Then sOutput = Mid(sOutput, 9)
    MSHTML_GetBaseURL = sOutput

Error_Handler_Exit:
    On Error Resume Next
    Set oElem = Nothing
    Set oLink = Nothing
    Set oHTMLFile = Nothing
    Exit Function

Error_Handler:
    MsgBox "The following error has occurred" & vbCrLf & vbCrLf & _
           "Error Source: MSHTML_GetBaseURL" & vbCrLf & _
           "Error Number: " & Err.Number & vbCrLf & _
           "Error Description: " & Err.Description & _
           Switch(Erl = 0, "", Erl <> 0, vbCrLf & "Line No: " & Erl) _
           , vbOKOnly + vbCritical, "An Error has Occurred!"
    Resume Error_Handler_Exit
End Function

Notice in the code a commented section, to show you a few other elements you could extract if required.
 

Option 2 – Microsoft VBScript Regular Expressions (RegEx)

'---------------------------------------------------------------------------------------
' Procedure : RegEx_GetBaseURL
' Author    : Daniel Pineault, CARDA Consultants Inc.
' Website   : http://www.cardaconsultants.com
' Purpose   : Extract the base URL from a full URL
' Copyright : The following is release as Attribution-ShareAlike 4.0 International
'             (CC BY-SA 4.0) - https://creativecommons.org/licenses/by-sa/4.0/
' Req'd Refs: Early Binding -> Microsoft VBScript Regular Expressions X.X
'             Late Binding  -> None required
'
' Input Variables:
' ~~~~~~~~~~~~~~~~
' sURL      : Full URL to extract the base URL from
'
' Usage:
' ~~~~~~
' ? RegEx_GetBaseURL("http://office.microsoft.com/en-us/templates/CT101527321033.aspx?av=ZAC")
'   Returns -> http://office.microsoft.com
'
' Revision History:
' Rev       Date(yyyy-mm-dd)        Description
' **************************************************************************************
' 1         2023-03-23
'---------------------------------------------------------------------------------------
Function RegEx_GetBaseURL(ByVal sURL As String) As String
    On Error GoTo Error_Handler
    #Const RegEx_EarlyBind = False   'True => Early Binding / False => Late Binding
    #If RegEx_EarlyBind = True Then
        Dim oRegEx            As VBScript_RegExp_55.RegExp
        Dim oMatches          As VBScript_RegExp_55.MatchCollection

        Set oRegEx = New VBScript_RegExp_55.RegExp
    #Else
        Dim oRegEx            As Object
        Dim oMatches          As Object

        Set oRegEx = CreateObject("VBScript.RegExp")
    #End If
    Dim bAppendPrefix         As Boolean
    Dim sOutput               As String

    If InStr(sURL, "//") = 0 Then
        bAppendPrefix = True
        sURL = "https://" & sURL
    End If

    With oRegEx
        .Pattern = "^https?:\/\/[^\/]+"
        .Global = True
        .IgnoreCase = True
        .MultiLine = True
        Set oMatches = .Execute(sURL)
    End With
    If Not oMatches Is Nothing Then
        If oMatches.Count <> 0 Then
            sOutput = oMatches(0)
        End If
    End If
    
    If bAppendPrefix Then sOutput = Mid(sOutput, 9)
    RegEx_GetBaseURL = sOutput

Error_Handler_Exit:
    On Error Resume Next
    Set oMatches = Nothing
    Set oRegEx = Nothing
    Exit Function

Error_Handler:
    MsgBox "The following error has occurred" & vbCrLf & vbCrLf & _
           "Error Number: " & Err.Number & vbCrLf & _
           "Error Source: RegEx_GetBaseURL" & vbCrLf & _
           "Error Description: " & Err.Description & _
           Switch(Erl = 0, "", Erl <> 0, vbCrLf & "Line No: " & Erl) _
           , vbOKOnly + vbCritical, "An Error has Occurred!"
    Resume Error_Handler_Exit
End Function

 

Option 3 – Microsoft Script Control

'---------------------------------------------------------------------------------------
' Procedure : SC_GetBaseURL
' Author    : Daniel Pineault, CARDA Consultants Inc.
' Website   : http://www.cardaconsultants.com
' Purpose   : Extract the base URL from a full URL
' Copyright : The following is release as Attribution-ShareAlike 4.0 International
'             (CC BY-SA 4.0) - https://creativecommons.org/licenses/by-sa/4.0/
' Req'd Refs: Early Binding -> Microsoft Script Control 1.0
'             Late Binding  -> None required
'
' Input Variables:
' ~~~~~~~~~~~~~~~~
' sURL      : Full URL to extract the base URL from
'
' Usage:
' ~~~~~~
' ? SC_GetBaseURL("http://office.microsoft.com/en-us/templates/CT101527321033.aspx?av=ZAC")
'   Returns -> http://office.microsoft.com
'
' Revision History:
' Rev       Date(yyyy-mm-dd)        Description
' **************************************************************************************
' 1         2023-03-23
'---------------------------------------------------------------------------------------
Public Function SC_GetBaseURL(ByVal sURL As String) As String
    On Error GoTo Error_Handler
    #Const ScriptControl_EarlyBind = False    'Should normally be in the Module header
    #If ScriptControl_EarlyBind = True Then
        Dim oSC               As MSScriptControl.ScriptControl

        Set oSC = New ScriptControl
    #Else
        Static oSC            As Object

        Set oSC = CreateObject("ScriptControl")
    #End If
    Dim bAppendPrefix         As Boolean
    Dim sOutput               As String

    If InStr(sURL, "//") = 0 Then
        bAppendPrefix = True
        sURL = "https://" & sURL
    End If

    If Not oSC Is Nothing Then
        With oSC
            .Language = "JScript"
            .AddCode "function hostnameFromURL() {" & _
                     "var url = '" & sURL & "';" & _
                     "var aUrlElements = url.split('/');" & _
                     "return aUrlElements[0] + '//' + aUrlElements[2];" & _
                     "}"
            sOutput = .Run("hostnameFromURL")
        End With
    End If
    
    If bAppendPrefix Then sOutput = Mid(sOutput, 9)
    SC_GetBaseURL = sOutput

Error_Handler_Exit:
    On Error Resume Next
    Set oSC = Nothing
    Exit Function

Error_Handler:
    MsgBox "The following error has occurred" & vbCrLf & vbCrLf & _
           "Error Source: SC_GetBaseURL" & vbCrLf & _
           "Error Number: " & Err.Number & vbCrLf & _
           "Error Description: " & Err.Description & _
           Switch(Erl = 0, "", Erl <> 0, vbCrLf & "Line No: " & Erl) _
           , vbOKOnly + vbCritical, "An Error has Occurred!"
    Resume Error_Handler_Exit
End Function

 

Option 4 – Plain VBA

Obvious, this is just a string manipulation, so we could just as easily do so using VBA string functions (Left, Right, InStr, …). One approach, amongst many others, could be:

'---------------------------------------------------------------------------------------
' Procedure : GetBaseURL
' Author    : Daniel Pineault, CARDA Consultants Inc.
' Website   : http://www.cardaconsultants.com
' Purpose   : Extract the base URL from a full URL
' Copyright : The following is release as Attribution-ShareAlike 4.0 International
'             (CC BY-SA 4.0) - https://creativecommons.org/licenses/by-sa/4.0/
' Req'd Refs: None required
'
' Input Variables:
' ~~~~~~~~~~~~~~~~
' sURL      : Full URL to extract the base URL from
'
' Usage:
' ~~~~~~
' ? GetBaseURL("http://office.microsoft.com/en-us/templates/CT101527321033.aspx?av=ZAC")
'   Returns -> http://office.microsoft.com
'
' Revision History:
' Rev       Date(yyyy-mm-dd)        Description
' **************************************************************************************
' 1         2023-03-23
'---------------------------------------------------------------------------------------
Public Function GetBaseURL(ByVal sURL As String) As String
    On Error GoTo Error_Handler
    Dim aUrlElements() As String
    Dim bAppendPrefix         As Boolean
    Dim sOutput               As String

    If InStr(sURL, "//") = 0 Then
        bAppendPrefix = True
        sURL = "https://" & sURL
    End If
    
    aUrlElements = Split(sURL, "/")
    sOutput = aUrlElements(0) & "//" & aUrlElements(2)
    If bAppendPrefix Then sOutput = Mid(sOutput, 9)
    GetBaseURL = sOutput

Error_Handler_Exit:
    On Error Resume Next
    Exit Function

Error_Handler:
    MsgBox "The following error has occurred" & vbCrLf & vbCrLf & _
           "Error Source: GetBaseURL" & vbCrLf & _
           "Error Number: " & Err.Number & vbCrLf & _
           "Error Description: " & Err.Description & _
           Switch(Erl = 0, "", Erl <> 0, vbCrLf & "Line No: " & Erl) _
           , vbOKOnly + vbCritical, "An Error has Occurred!"
    Resume Error_Handler_Exit
End Function

One response on “VBA – Extract Base URL From A Full URL

  1. Kent N Gorrell

    Nice to see you back. I was just getting into your writing about the new Browser control when you went for a sabbatical. Brilliant work. Thank you