1 | |
2 | type Webscrapingai = { |
3 | apiKey: string |
4 | } |
5 | |
6 | * Page HTML by URL |
7 | * Returns the full HTML content of a webpage specified by the URL. The response is in plain text. Proxies and Chromium JavaScript rendering are used for page retrieval and processing. |
8 | */ |
9 | export async function main( |
10 | auth: Webscrapingai, |
11 | url: string | undefined, |
12 | headers: any, |
13 | timeout: string | undefined, |
14 | js: string | undefined, |
15 | js_timeout: string | undefined, |
16 | wait_for: string | undefined, |
17 | proxy: 'datacenter' | 'residential' | undefined, |
18 | country: 'us' | 'gb' | 'de' | 'it' | 'fr' | 'ca' | 'es' | 'ru' | 'jp' | 'kr' | 'in' | undefined, |
19 | custom_proxy: string | undefined, |
20 | device: 'desktop' | 'mobile' | 'tablet' | undefined, |
21 | error_on_404: string | undefined, |
22 | error_on_redirect: string | undefined, |
23 | js_script: string | undefined, |
24 | return_script_result: string | undefined, |
25 | format: 'json' | 'text' | undefined |
26 | ) { |
27 | const url_ = new URL(`https://api.webscraping.ai/html`) |
28 |
|
29 | url_.searchParams.append('api_key', auth.apiKey) |
30 |
|
31 | for (const [k, v] of [ |
32 | ['url', url], |
33 | ['timeout', timeout], |
34 | ['js', js], |
35 | ['js_timeout', js_timeout], |
36 | ['wait_for', wait_for], |
37 | ['proxy', proxy], |
38 | ['country', country], |
39 | ['custom_proxy', custom_proxy], |
40 | ['device', device], |
41 | ['error_on_404', error_on_404], |
42 | ['error_on_redirect', error_on_redirect], |
43 | ['js_script', js_script], |
44 | ['return_script_result', return_script_result], |
45 | ['format', format] |
46 | ]) { |
47 | if (v !== undefined && v !== '' && k !== undefined) { |
48 | url_.searchParams.append(k, v) |
49 | } |
50 | } |
51 | encodeParams({ headers }).forEach((v, k) => { |
52 | if (v !== undefined && v !== '') { |
53 | url_.searchParams.append(k, v) |
54 | } |
55 | }) |
56 | const response = await fetch(url_, { |
57 | method: 'GET', |
58 | body: undefined |
59 | }) |
60 | if (!response.ok) { |
61 | const text = await response.text() |
62 | throw new Error(`${response.status} ${text}`) |
63 | } |
64 | return await response.text() |
65 | } |
66 |
|
67 | function encodeParams(o: any) { |
68 | function iter(o: any, path: string) { |
69 | if (Array.isArray(o)) { |
70 | o.forEach(function (a) { |
71 | iter(a, path + '[]') |
72 | }) |
73 | return |
74 | } |
75 | if (o !== null && typeof o === 'object') { |
76 | Object.keys(o).forEach(function (k) { |
77 | iter(o[k], path + '[' + k + ']') |
78 | }) |
79 | return |
80 | } |
81 | data.push(path + '=' + o) |
82 | } |
83 | const data: string[] = [] |
84 | Object.keys(o).forEach(function (k) { |
85 | if (o[k] !== undefined) { |
86 | iter(o[k], k) |
87 | } |
88 | }) |
89 | return new URLSearchParams(data.join('&')) |
90 | } |
91 |
|