0

HTML of multiple page areas by URL and CSS selectors

by
Published Nov 5, 2024

Returns HTML of multiple page areas by URL and CSS selectors. Useful if you don't want to do the HTML parsing on your side.

Script webscrapingai Verified

The script

Submitted by hugo697 Bun
Verified 581 days ago
1
//native
2
type Webscrapingai = {
3
	apiKey: string
4
}
5
/**
6
 * HTML of multiple page areas by URL and CSS selectors
7
 * Returns HTML of multiple page areas by URL and CSS selectors. Useful if you don't want to do the HTML parsing on your side.
8
 */
9
export async function main(
10
	auth: Webscrapingai,
11
	selectors: string | undefined,
12
	url: string | undefined,
13
	headers: any,
14
	timeout: string | undefined,
15
	js: string | undefined,
16
	js_timeout: string | undefined,
17
	wait_for: string | undefined,
18
	proxy: 'datacenter' | 'residential' | undefined,
19
	country: 'us' | 'gb' | 'de' | 'it' | 'fr' | 'ca' | 'es' | 'ru' | 'jp' | 'kr' | 'in' | undefined,
20
	custom_proxy: string | undefined,
21
	device: 'desktop' | 'mobile' | 'tablet' | undefined,
22
	error_on_404: string | undefined,
23
	error_on_redirect: string | undefined,
24
	js_script: string | undefined
25
) {
26
	const url_ = new URL(`https://api.webscraping.ai/selected-multiple`)
27

28
	url_.searchParams.append('api_key', auth.apiKey)
29

30
	for (const [k, v] of [
31
		['selectors', selectors],
32
		['url', url],
33
		['timeout', timeout],
34
		['js', js],
35
		['js_timeout', js_timeout],
36
		['wait_for', wait_for],
37
		['proxy', proxy],
38
		['country', country],
39
		['custom_proxy', custom_proxy],
40
		['device', device],
41
		['error_on_404', error_on_404],
42
		['error_on_redirect', error_on_redirect],
43
		['js_script', js_script]
44
	]) {
45
		if (v !== undefined && v !== '' && k !== undefined) {
46
			url_.searchParams.append(k, v)
47
		}
48
	}
49
	encodeParams({ headers }).forEach((v, k) => {
50
		if (v !== undefined && v !== '') {
51
			url_.searchParams.append(k, v)
52
		}
53
	})
54
	const response = await fetch(url_, {
55
		method: 'GET',
56
		body: undefined
57
	})
58
	if (!response.ok) {
59
		const text = await response.text()
60
		throw new Error(`${response.status} ${text}`)
61
	}
62
	return await response.json()
63
}
64

65
function encodeParams(o: any) {
66
	function iter(o: any, path: string) {
67
		if (Array.isArray(o)) {
68
			o.forEach(function (a) {
69
				iter(a, path + '[]')
70
			})
71
			return
72
		}
73
		if (o !== null && typeof o === 'object') {
74
			Object.keys(o).forEach(function (k) {
75
				iter(o[k], path + '[' + k + ']')
76
			})
77
			return
78
		}
79
		data.push(path + '=' + o)
80
	}
81
	const data: string[] = []
82
	Object.keys(o).forEach(function (k) {
83
		if (o[k] !== undefined) {
84
			iter(o[k], k)
85
		}
86
	})
87
	return new URLSearchParams(data.join('&'))
88
}
89