Edits history of script submission #9337 for ' Page text by URL (webscrapingai)'

  • bun
    One script reply has been approved by the moderators
    Ap­pro­ved
    //native
    type Webscrapingai = {
    	apiKey: string
    }
    /**
     * Page text by URL
     * Returns the visible text content of a webpage specified by the URL. Can be used to feed data to GPT or other LLM models. The response can be in plain text, JSON, or XML format based on the text_format parameter. Proxies and Chromium JavaScript rendering are used for page retrieval and processing. Returns JSON on error.
     */
    export async function main(
    	auth: Webscrapingai,
    	text_format: 'plain' | 'xml' | 'json' | undefined,
    	return_links: string | undefined,
    	url: string | undefined,
    	headers: any,
    	timeout: string | undefined,
    	js: string | undefined,
    	js_timeout: string | undefined,
    	wait_for: string | undefined,
    	proxy: 'datacenter' | 'residential' | undefined,
    	country: 'us' | 'gb' | 'de' | 'it' | 'fr' | 'ca' | 'es' | 'ru' | 'jp' | 'kr' | 'in' | undefined,
    	custom_proxy: string | undefined,
    	device: 'desktop' | 'mobile' | 'tablet' | undefined,
    	error_on_404: string | undefined,
    	error_on_redirect: string | undefined,
    	js_script: string | undefined
    ) {
    	const url_ = new URL(`https://api.webscraping.ai/text`)
    
    	url_.searchParams.append('api_key', auth.apiKey)
    
    	for (const [k, v] of [
    		['text_format', text_format],
    		['return_links', return_links],
    		['url', url],
    		['timeout', timeout],
    		['js', js],
    		['js_timeout', js_timeout],
    		['wait_for', wait_for],
    		['proxy', proxy],
    		['country', country],
    		['custom_proxy', custom_proxy],
    		['device', device],
    		['error_on_404', error_on_404],
    		['error_on_redirect', error_on_redirect],
    		['js_script', js_script]
    	]) {
    		if (v !== undefined && v !== '' && k !== undefined) {
    			url_.searchParams.append(k, v)
    		}
    	}
    	encodeParams({ headers }).forEach((v, k) => {
    		if (v !== undefined && v !== '') {
    			url_.searchParams.append(k, v)
    		}
    	})
    	const response = await fetch(url_, {
    		method: 'GET',
    		body: undefined
    	})
    	if (!response.ok) {
    		const text = await response.text()
    		throw new Error(`${response.status} ${text}`)
    	}
    	return await response.json()
    }
    
    function encodeParams(o: any) {
    	function iter(o: any, path: string) {
    		if (Array.isArray(o)) {
    			o.forEach(function (a) {
    				iter(a, path + '[]')
    			})
    			return
    		}
    		if (o !== null && typeof o === 'object') {
    			Object.keys(o).forEach(function (k) {
    				iter(o[k], path + '[' + k + ']')
    			})
    			return
    		}
    		data.push(path + '=' + o)
    	}
    	const data: string[] = []
    	Object.keys(o).forEach(function (k) {
    		if (o[k] !== undefined) {
    			iter(o[k], k)
    		}
    	})
    	return new URLSearchParams(data.join('&'))
    }
    

    Submitted by hugo697 581 days ago