0

HTML of a selected page area by URL and CSS selector

by
Published Nov 5, 2024

Returns HTML of a selected page area by URL and CSS selector. Useful if you don't want to do the HTML parsing on your side.

Script webscrapingai Verified

The script

Submitted by hugo697 Bun
Verified 581 days ago
1
//native
2
type Webscrapingai = {
3
	apiKey: string
4
}
5
/**
6
 * HTML of a selected page area by URL and CSS selector
7
 * Returns HTML of a selected page area by URL and CSS selector. Useful if you don't want to do the HTML parsing on your side.
8
 */
9
export async function main(
10
	auth: Webscrapingai,
11
	selector: string | undefined,
12
	url: string | undefined,
13
	headers: any,
14
	timeout: string | undefined,
15
	js: string | undefined,
16
	js_timeout: string | undefined,
17
	wait_for: string | undefined,
18
	proxy: 'datacenter' | 'residential' | undefined,
19
	country: 'us' | 'gb' | 'de' | 'it' | 'fr' | 'ca' | 'es' | 'ru' | 'jp' | 'kr' | 'in' | undefined,
20
	custom_proxy: string | undefined,
21
	device: 'desktop' | 'mobile' | 'tablet' | undefined,
22
	error_on_404: string | undefined,
23
	error_on_redirect: string | undefined,
24
	js_script: string | undefined,
25
	format: 'json' | 'text' | undefined
26
) {
27
	const url_ = new URL(`https://api.webscraping.ai/selected`)
28

29
	url_.searchParams.append('api_key', auth.apiKey)
30

31
	for (const [k, v] of [
32
		['selector', selector],
33
		['url', url],
34
		['timeout', timeout],
35
		['js', js],
36
		['js_timeout', js_timeout],
37
		['wait_for', wait_for],
38
		['proxy', proxy],
39
		['country', country],
40
		['custom_proxy', custom_proxy],
41
		['device', device],
42
		['error_on_404', error_on_404],
43
		['error_on_redirect', error_on_redirect],
44
		['js_script', js_script],
45
		['format', format]
46
	]) {
47
		if (v !== undefined && v !== '' && k !== undefined) {
48
			url_.searchParams.append(k, v)
49
		}
50
	}
51
	encodeParams({ headers }).forEach((v, k) => {
52
		if (v !== undefined && v !== '') {
53
			url_.searchParams.append(k, v)
54
		}
55
	})
56
	const response = await fetch(url_, {
57
		method: 'GET',
58
		body: undefined
59
	})
60
	if (!response.ok) {
61
		const text = await response.text()
62
		throw new Error(`${response.status} ${text}`)
63
	}
64
	return await response.text()
65
}
66

67
function encodeParams(o: any) {
68
	function iter(o: any, path: string) {
69
		if (Array.isArray(o)) {
70
			o.forEach(function (a) {
71
				iter(a, path + '[]')
72
			})
73
			return
74
		}
75
		if (o !== null && typeof o === 'object') {
76
			Object.keys(o).forEach(function (k) {
77
				iter(o[k], path + '[' + k + ']')
78
			})
79
			return
80
		}
81
		data.push(path + '=' + o)
82
	}
83
	const data: string[] = []
84
	Object.keys(o).forEach(function (k) {
85
		if (o[k] !== undefined) {
86
			iter(o[k], k)
87
		}
88
	})
89
	return new URLSearchParams(data.join('&'))
90
}
91