0

Page HTML by URL

by
Published Nov 5, 2024

Returns the full HTML content of a webpage specified by the URL. The response is in plain text. Proxies and Chromium JavaScript rendering are used for page retrieval and processing.

Script webscrapingai Verified

The script

Submitted by hugo697 Bun
Verified 581 days ago
1
//native
2
type Webscrapingai = {
3
	apiKey: string
4
}
5
/**
6
 * Page HTML by URL
7
 * Returns the full HTML content of a webpage specified by the URL. The response is in plain text. Proxies and Chromium JavaScript rendering are used for page retrieval and processing.
8
 */
9
export async function main(
10
	auth: Webscrapingai,
11
	url: string | undefined,
12
	headers: any,
13
	timeout: string | undefined,
14
	js: string | undefined,
15
	js_timeout: string | undefined,
16
	wait_for: string | undefined,
17
	proxy: 'datacenter' | 'residential' | undefined,
18
	country: 'us' | 'gb' | 'de' | 'it' | 'fr' | 'ca' | 'es' | 'ru' | 'jp' | 'kr' | 'in' | undefined,
19
	custom_proxy: string | undefined,
20
	device: 'desktop' | 'mobile' | 'tablet' | undefined,
21
	error_on_404: string | undefined,
22
	error_on_redirect: string | undefined,
23
	js_script: string | undefined,
24
	return_script_result: string | undefined,
25
	format: 'json' | 'text' | undefined
26
) {
27
	const url_ = new URL(`https://api.webscraping.ai/html`)
28

29
	url_.searchParams.append('api_key', auth.apiKey)
30

31
	for (const [k, v] of [
32
		['url', url],
33
		['timeout', timeout],
34
		['js', js],
35
		['js_timeout', js_timeout],
36
		['wait_for', wait_for],
37
		['proxy', proxy],
38
		['country', country],
39
		['custom_proxy', custom_proxy],
40
		['device', device],
41
		['error_on_404', error_on_404],
42
		['error_on_redirect', error_on_redirect],
43
		['js_script', js_script],
44
		['return_script_result', return_script_result],
45
		['format', format]
46
	]) {
47
		if (v !== undefined && v !== '' && k !== undefined) {
48
			url_.searchParams.append(k, v)
49
		}
50
	}
51
	encodeParams({ headers }).forEach((v, k) => {
52
		if (v !== undefined && v !== '') {
53
			url_.searchParams.append(k, v)
54
		}
55
	})
56
	const response = await fetch(url_, {
57
		method: 'GET',
58
		body: undefined
59
	})
60
	if (!response.ok) {
61
		const text = await response.text()
62
		throw new Error(`${response.status} ${text}`)
63
	}
64
	return await response.text()
65
}
66

67
function encodeParams(o: any) {
68
	function iter(o: any, path: string) {
69
		if (Array.isArray(o)) {
70
			o.forEach(function (a) {
71
				iter(a, path + '[]')
72
			})
73
			return
74
		}
75
		if (o !== null && typeof o === 'object') {
76
			Object.keys(o).forEach(function (k) {
77
				iter(o[k], path + '[' + k + ']')
78
			})
79
			return
80
		}
81
		data.push(path + '=' + o)
82
	}
83
	const data: string[] = []
84
	Object.keys(o).forEach(function (k) {
85
		if (o[k] !== undefined) {
86
			iter(o[k], k)
87
		}
88
	})
89
	return new URLSearchParams(data.join('&'))
90
}
91