🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
171 lines
4.7 KiB
TypeScript
Executable File
171 lines
4.7 KiB
TypeScript
Executable File
/**
|
|
* @param url - URL we want to extract a hostname from.
|
|
* @param urlIsValidHostname - hint from caller; true if `url` is already a valid hostname.
|
|
*/
|
|
export default function extractHostname(
|
|
url: string,
|
|
urlIsValidHostname: boolean,
|
|
): string | null {
|
|
let start = 0;
|
|
let end: number = url.length;
|
|
let hasUpper = false;
|
|
|
|
// If url is not already a valid hostname, then try to extract hostname.
|
|
if (!urlIsValidHostname) {
|
|
// Special handling of data URLs
|
|
if (url.startsWith('data:')) {
|
|
return null;
|
|
}
|
|
|
|
// Trim leading spaces
|
|
while (start < url.length && url.charCodeAt(start) <= 32) {
|
|
start += 1;
|
|
}
|
|
|
|
// Trim trailing spaces
|
|
while (end > start + 1 && url.charCodeAt(end - 1) <= 32) {
|
|
end -= 1;
|
|
}
|
|
|
|
// Skip scheme.
|
|
if (
|
|
url.charCodeAt(start) === 47 /* '/' */ &&
|
|
url.charCodeAt(start + 1) === 47 /* '/' */
|
|
) {
|
|
start += 2;
|
|
} else {
|
|
const indexOfProtocol = url.indexOf(':/', start);
|
|
if (indexOfProtocol !== -1) {
|
|
// Implement fast-path for common protocols. We expect most protocols
|
|
// should be one of these 4 and thus we will not need to perform the
|
|
// more expansive validity check most of the time.
|
|
const protocolSize = indexOfProtocol - start;
|
|
const c0 = url.charCodeAt(start);
|
|
const c1 = url.charCodeAt(start + 1);
|
|
const c2 = url.charCodeAt(start + 2);
|
|
const c3 = url.charCodeAt(start + 3);
|
|
const c4 = url.charCodeAt(start + 4);
|
|
|
|
if (
|
|
protocolSize === 5 &&
|
|
c0 === 104 /* 'h' */ &&
|
|
c1 === 116 /* 't' */ &&
|
|
c2 === 116 /* 't' */ &&
|
|
c3 === 112 /* 'p' */ &&
|
|
c4 === 115 /* 's' */
|
|
) {
|
|
// https
|
|
} else if (
|
|
protocolSize === 4 &&
|
|
c0 === 104 /* 'h' */ &&
|
|
c1 === 116 /* 't' */ &&
|
|
c2 === 116 /* 't' */ &&
|
|
c3 === 112 /* 'p' */
|
|
) {
|
|
// http
|
|
} else if (
|
|
protocolSize === 3 &&
|
|
c0 === 119 /* 'w' */ &&
|
|
c1 === 115 /* 's' */ &&
|
|
c2 === 115 /* 's' */
|
|
) {
|
|
// wss
|
|
} else if (
|
|
protocolSize === 2 &&
|
|
c0 === 119 /* 'w' */ &&
|
|
c1 === 115 /* 's' */
|
|
) {
|
|
// ws
|
|
} else {
|
|
// Check that scheme is valid
|
|
for (let i = start; i < indexOfProtocol; i += 1) {
|
|
const lowerCaseCode = url.charCodeAt(i) | 32;
|
|
if (
|
|
!(
|
|
(
|
|
(lowerCaseCode >= 97 && lowerCaseCode <= 122) || // [a, z]
|
|
(lowerCaseCode >= 48 && lowerCaseCode <= 57) || // [0, 9]
|
|
lowerCaseCode === 46 || // '.'
|
|
lowerCaseCode === 45 || // '-'
|
|
lowerCaseCode === 43
|
|
) // '+'
|
|
)
|
|
) {
|
|
return null;
|
|
}
|
|
}
|
|
}
|
|
|
|
// Skip 0, 1 or more '/' after ':/'
|
|
start = indexOfProtocol + 2;
|
|
while (url.charCodeAt(start) === 47 /* '/' */) {
|
|
start += 1;
|
|
}
|
|
}
|
|
}
|
|
|
|
// Detect first occurrence of '/', '?' or '#'. We also keep track of the
|
|
// last occurrence of '@', ']' or ':' to speed-up subsequent parsing of
|
|
// (respectively), identifier, ipv6 or port.
|
|
let indexOfIdentifier = -1;
|
|
let indexOfClosingBracket = -1;
|
|
let indexOfPort = -1;
|
|
for (let i = start; i < end; i += 1) {
|
|
const code: number = url.charCodeAt(i);
|
|
if (
|
|
code === 35 || // '#'
|
|
code === 47 || // '/'
|
|
code === 63 // '?'
|
|
) {
|
|
end = i;
|
|
break;
|
|
} else if (code === 64) {
|
|
// '@'
|
|
indexOfIdentifier = i;
|
|
} else if (code === 93) {
|
|
// ']'
|
|
indexOfClosingBracket = i;
|
|
} else if (code === 58) {
|
|
// ':'
|
|
indexOfPort = i;
|
|
} else if (code >= 65 && code <= 90) {
|
|
hasUpper = true;
|
|
}
|
|
}
|
|
|
|
// Detect identifier: '@'
|
|
if (
|
|
indexOfIdentifier !== -1 &&
|
|
indexOfIdentifier > start &&
|
|
indexOfIdentifier < end
|
|
) {
|
|
start = indexOfIdentifier + 1;
|
|
}
|
|
|
|
// Handle ipv6 addresses
|
|
if (url.charCodeAt(start) === 91 /* '[' */) {
|
|
if (indexOfClosingBracket !== -1) {
|
|
return url.slice(start + 1, indexOfClosingBracket).toLowerCase();
|
|
}
|
|
return null;
|
|
} else if (indexOfPort !== -1 && indexOfPort > start && indexOfPort < end) {
|
|
// Detect port: ':'
|
|
end = indexOfPort;
|
|
}
|
|
}
|
|
|
|
// Trim trailing dots
|
|
while (end > start + 1 && url.charCodeAt(end - 1) === 46 /* '.' */) {
|
|
end -= 1;
|
|
}
|
|
|
|
const hostname: string =
|
|
start !== 0 || end !== url.length ? url.slice(start, end) : url;
|
|
|
|
if (hasUpper) {
|
|
return hostname.toLowerCase();
|
|
}
|
|
|
|
return hostname;
|
|
}
|