Ever needed to get the Base URL (also referred to as Origin) from a full URL?
So given a URL like:
http://office.microsoft.com/en-us/templates/CT101527321033.aspx?av=ZAC
get/return:
http://office.microsoft.com
Well, it truly isn’t hard once you know how. Below are a couple different approaches that you can employ to get the job done:
- Microsoft HTML Object Library
- Microsoft VBScript Regular Expressions (RegEx)
- Microsoft Script Control
- Plain VBA
The Solution
Option 1 – Microsoft HTML Object Library
'---------------------------------------------------------------------------------------
' Procedure : MSHTML_GetBaseURL
' Author : Daniel Pineault, CARDA Consultants Inc.
' Website : http://www.cardaconsultants.com
' Purpose : Extract the base URL from a full URL
' Copyright : The following is release as Attribution-ShareAlike 4.0 International
' (CC BY-SA 4.0) - https://creativecommons.org/licenses/by-sa/4.0/
' Req'd Refs: Late Binding -> None required
' Early Binding -> Microsoft HTML Object Library
'
' Input Variables:
' ~~~~~~~~~~~~~~~~
' sURL : Full URL to extract the base URL from
'
' Usage:
' ~~~~~~
' ? MSHTML_GetBaseURL("http://office.microsoft.com/en-us/templates/CT101527321033.aspx?av=ZAC")
' Returns -> http://office.microsoft.com
'
' Revision History:
' Rev Date(yyyy-mm-dd) Description
' **************************************************************************************
' 1 2023-03-23
'---------------------------------------------------------------------------------------
Function MSHTML_GetBaseURL(ByVal sURL As String) As String
On Error GoTo Error_Handler
#Const HF_EarlyBind = False 'True => Early Binding / False => Late Binding
#If HF_EarlyBind = True Then
Dim oHTMLFile As MSHTML.HTMLDocument
Dim oElem As MSHTML.HTMLGenericElement
Dim oLink As MSHTML.HTMLLinkElement
Set oHTMLFile = New MSHTML.HTMLDocument
#Else
Dim oHTMLFile As Object
Dim oElem As Object
Dim oLink As Object
Set oHTMLFile = CreateObject("HTMLFile")
#End If
Dim bAppendPrefix As Boolean
Dim sOutput As String
If InStr(sURL, "//") = 0 Then
bAppendPrefix = True
sURL = "https://" & sURL
End If
oHTMLFile.Body.innerHTML = ""
Set oLink = oHTMLFile.createElement("a")
Call oLink.setAttribute("href", sURL)
Set oElem = oHTMLFile.createElement("p")
Call oElem.appendChild(oLink)
Call oHTMLFile.Body.appendChild(oElem)
' http://office.microsoft.com/en-us/templates/CT101527321033.aspx?av=ZAC
' Debug.Print oLink.protocol => http: | https: | ftp:
' Debug.Print oLink.host => office.microsoft.com
' Debug.Print oLink.hostname => office.microsoft.com
' Debug.Print oLink.PathName => /en-us/templates/CT101527321033.aspx
' Debug.Print oLink.Search => ?av=ZAC
' Debug.Print oLink.Port => 80
sOutput = oLink.protocol & "//" & oLink.hostname
If bAppendPrefix Then sOutput = Mid(sOutput, 9)
MSHTML_GetBaseURL = sOutput
Error_Handler_Exit:
On Error Resume Next
Set oElem = Nothing
Set oLink = Nothing
Set oHTMLFile = Nothing
Exit Function
Error_Handler:
MsgBox "The following error has occurred" & vbCrLf & vbCrLf & _
"Error Source: MSHTML_GetBaseURL" & vbCrLf & _
"Error Number: " & Err.Number & vbCrLf & _
"Error Description: " & Err.Description & _
Switch(Erl = 0, "", Erl <> 0, vbCrLf & "Line No: " & Erl) _
, vbOKOnly + vbCritical, "An Error has Occurred!"
Resume Error_Handler_Exit
End Function
Notice in the code a commented section, to show you a few other elements you could extract if required.
Option 2 – Microsoft VBScript Regular Expressions (RegEx)
'---------------------------------------------------------------------------------------
' Procedure : RegEx_GetBaseURL
' Author : Daniel Pineault, CARDA Consultants Inc.
' Website : http://www.cardaconsultants.com
' Purpose : Extract the base URL from a full URL
' Copyright : The following is release as Attribution-ShareAlike 4.0 International
' (CC BY-SA 4.0) - https://creativecommons.org/licenses/by-sa/4.0/
' Req'd Refs: Early Binding -> Microsoft VBScript Regular Expressions X.X
' Late Binding -> None required
'
' Input Variables:
' ~~~~~~~~~~~~~~~~
' sURL : Full URL to extract the base URL from
'
' Usage:
' ~~~~~~
' ? RegEx_GetBaseURL("http://office.microsoft.com/en-us/templates/CT101527321033.aspx?av=ZAC")
' Returns -> http://office.microsoft.com
'
' Revision History:
' Rev Date(yyyy-mm-dd) Description
' **************************************************************************************
' 1 2023-03-23
'---------------------------------------------------------------------------------------
Function RegEx_GetBaseURL(ByVal sURL As String) As String
On Error GoTo Error_Handler
#Const RegEx_EarlyBind = False 'True => Early Binding / False => Late Binding
#If RegEx_EarlyBind = True Then
Dim oRegEx As VBScript_RegExp_55.RegExp
Dim oMatches As VBScript_RegExp_55.MatchCollection
Set oRegEx = New VBScript_RegExp_55.RegExp
#Else
Dim oRegEx As Object
Dim oMatches As Object
Set oRegEx = CreateObject("VBScript.RegExp")
#End If
Dim bAppendPrefix As Boolean
Dim sOutput As String
If InStr(sURL, "//") = 0 Then
bAppendPrefix = True
sURL = "https://" & sURL
End If
With oRegEx
.Pattern = "^https?:\/\/[^\/]+"
.Global = True
.IgnoreCase = True
.MultiLine = True
Set oMatches = .Execute(sURL)
End With
If Not oMatches Is Nothing Then
If oMatches.Count <> 0 Then
sOutput = oMatches(0)
End If
End If
If bAppendPrefix Then sOutput = Mid(sOutput, 9)
RegEx_GetBaseURL = sOutput
Error_Handler_Exit:
On Error Resume Next
Set oMatches = Nothing
Set oRegEx = Nothing
Exit Function
Error_Handler:
MsgBox "The following error has occurred" & vbCrLf & vbCrLf & _
"Error Number: " & Err.Number & vbCrLf & _
"Error Source: RegEx_GetBaseURL" & vbCrLf & _
"Error Description: " & Err.Description & _
Switch(Erl = 0, "", Erl <> 0, vbCrLf & "Line No: " & Erl) _
, vbOKOnly + vbCritical, "An Error has Occurred!"
Resume Error_Handler_Exit
End Function
Option 3 – Microsoft Script Control
'---------------------------------------------------------------------------------------
' Procedure : SC_GetBaseURL
' Author : Daniel Pineault, CARDA Consultants Inc.
' Website : http://www.cardaconsultants.com
' Purpose : Extract the base URL from a full URL
' Copyright : The following is release as Attribution-ShareAlike 4.0 International
' (CC BY-SA 4.0) - https://creativecommons.org/licenses/by-sa/4.0/
' Req'd Refs: Early Binding -> Microsoft Script Control 1.0
' Late Binding -> None required
'
' Input Variables:
' ~~~~~~~~~~~~~~~~
' sURL : Full URL to extract the base URL from
'
' Usage:
' ~~~~~~
' ? SC_GetBaseURL("http://office.microsoft.com/en-us/templates/CT101527321033.aspx?av=ZAC")
' Returns -> http://office.microsoft.com
'
' Revision History:
' Rev Date(yyyy-mm-dd) Description
' **************************************************************************************
' 1 2023-03-23
'---------------------------------------------------------------------------------------
Public Function SC_GetBaseURL(ByVal sURL As String) As String
On Error GoTo Error_Handler
#Const ScriptControl_EarlyBind = False 'Should normally be in the Module header
#If ScriptControl_EarlyBind = True Then
Dim oSC As MSScriptControl.ScriptControl
Set oSC = New ScriptControl
#Else
Static oSC As Object
Set oSC = CreateObject("ScriptControl")
#End If
Dim bAppendPrefix As Boolean
Dim sOutput As String
If InStr(sURL, "//") = 0 Then
bAppendPrefix = True
sURL = "https://" & sURL
End If
If Not oSC Is Nothing Then
With oSC
.Language = "JScript"
.AddCode "function hostnameFromURL() {" & _
"var url = '" & sURL & "';" & _
"var aUrlElements = url.split('/');" & _
"return aUrlElements[0] + '//' + aUrlElements[2];" & _
"}"
sOutput = .Run("hostnameFromURL")
End With
End If
If bAppendPrefix Then sOutput = Mid(sOutput, 9)
SC_GetBaseURL = sOutput
Error_Handler_Exit:
On Error Resume Next
Set oSC = Nothing
Exit Function
Error_Handler:
MsgBox "The following error has occurred" & vbCrLf & vbCrLf & _
"Error Source: SC_GetBaseURL" & vbCrLf & _
"Error Number: " & Err.Number & vbCrLf & _
"Error Description: " & Err.Description & _
Switch(Erl = 0, "", Erl <> 0, vbCrLf & "Line No: " & Erl) _
, vbOKOnly + vbCritical, "An Error has Occurred!"
Resume Error_Handler_Exit
End Function
Option 4 – Plain VBA
Obvious, this is just a string manipulation, so we could just as easily do so using VBA string functions (Left, Right, InStr, …). One approach, amongst many others, could be:
'---------------------------------------------------------------------------------------
' Procedure : GetBaseURL
' Author : Daniel Pineault, CARDA Consultants Inc.
' Website : http://www.cardaconsultants.com
' Purpose : Extract the base URL from a full URL
' Copyright : The following is release as Attribution-ShareAlike 4.0 International
' (CC BY-SA 4.0) - https://creativecommons.org/licenses/by-sa/4.0/
' Req'd Refs: None required
'
' Input Variables:
' ~~~~~~~~~~~~~~~~
' sURL : Full URL to extract the base URL from
'
' Usage:
' ~~~~~~
' ? GetBaseURL("http://office.microsoft.com/en-us/templates/CT101527321033.aspx?av=ZAC")
' Returns -> http://office.microsoft.com
'
' Revision History:
' Rev Date(yyyy-mm-dd) Description
' **************************************************************************************
' 1 2023-03-23
'---------------------------------------------------------------------------------------
Public Function GetBaseURL(ByVal sURL As String) As String
On Error GoTo Error_Handler
Dim aUrlElements() As String
Dim bAppendPrefix As Boolean
Dim sOutput As String
If InStr(sURL, "//") = 0 Then
bAppendPrefix = True
sURL = "https://" & sURL
End If
aUrlElements = Split(sURL, "/")
sOutput = aUrlElements(0) & "//" & aUrlElements(2)
If bAppendPrefix Then sOutput = Mid(sOutput, 9)
GetBaseURL = sOutput
Error_Handler_Exit:
On Error Resume Next
Exit Function
Error_Handler:
MsgBox "The following error has occurred" & vbCrLf & vbCrLf & _
"Error Source: GetBaseURL" & vbCrLf & _
"Error Number: " & Err.Number & vbCrLf & _
"Error Description: " & Err.Description & _
Switch(Erl = 0, "", Erl <> 0, vbCrLf & "Line No: " & Erl) _
, vbOKOnly + vbCritical, "An Error has Occurred!"
Resume Error_Handler_Exit
End Function
Nice to see you back. I was just getting into your writing about the new Browser control when you went for a sabbatical. Brilliant work. Thank you