How to download all FDA guidance documents that are listed on this website?

2023/03/27 2:48

To download all FDA guidance documents listed on the given website, you can modify the previous Python script to handle pagination and extract the links to the individual guidance documents. Here’s the updated code:

import os

import requests

from bs4 import BeautifulSoup

from urllib.parse import urljoin

def download_pdf(url, filename):

response = requests. get(url)

with open(filename, ‘wb’) as file:

file.write(response.content)

get_all_pages(url):

pages = []

while url:

print(f’Processing page: {url}’)

response = requests.get(url)

soup = BeautifulSoupCresponse.text, ‘html.parser’)

pages. append(soup)

next_page = soup.find(‘a’, {‘title’: ‘Go to next page”})

url = next_page[‘href’] if next_page else None

return pages

def main():

base_url = ‘https://www.fda.gov’

starting_url = f’ {base_url}/regulatory-information/search-fda-guidance-

all_pages = get_all_pages(starting_url)

output_directory = ‘guidance_documents:

os.makedirs(output_directory, exist_ok=True)

for soup in all_pages:

for row in soup. find_all¢’tr’)[1:]:

columns = row. find_all(‘td’)

title = columns[0].text.stripQ

pdf_url = urljoin(base_url, columns[1].find(‘a’)[‘href’])

filename = os.path.join(output_directory, title + ‘.pdf”)

printf” Downloading {filename}’)

download_pdf(pdf_url, filename)

printC’ALL FDA guidance documents downloaded. ‘)

if _name__ == ‘__main__’:

main()

Back to top