I believe in Python supremacy, so here’s a simple example of one of the methods I use to scrape or download in parallel instances: the asyncio package. It’s not let me down thus far.
Code
import asyncioimport aiohttpimport aiosqliteasyncdef download_pdf(session, url):asyncwith session.get(url) as response:if response.status ==200:returnawait response.read()else:raiseException(f"Error downloading {url}")asyncdef save_pdf_to_db(db, url, data):asyncwith db.execute("INSERT INTO pdfs (url, content) VALUES (?, ?)", (url, data)):await db.commit()asyncdef main(urls):asyncwith aiohttp.ClientSession() as session:# Create database and table db =await aiosqlite.connect("pdf_database.db")await db.execute("CREATE TABLE IF NOT EXISTS pdfs (url TEXT, content BLOB)") tasks = []for url in urls: task = asyncio.create_task(download_pdf(session, url)) tasks.append(task) pdf_contents =await asyncio.gather(*tasks)for url, content inzip(urls, pdf_contents):await save_pdf_to_db(db, url, content)await db.close()