import requests
from bs4 import BeautifulSoup
indeed_result = requests.get("https://www.indeed.com/jobs?q=python&limit=50")
indeed_soup = BeautifulSoup(indeed_result.text,"html.parser")
pagination = indeed_soup.find("div",{"class":"pagination"})
links = pagination.find_all("a")
pages =[]for link in links:
pages.append(link.find("span").string)# pages.append(link.string)는 위와 같은 결과가 나옴
pages = pages[0:-1]print(pages)// output(페이지 숫자 전부 가져오기)['2','3','4','5','6','7','8','9','10','11','12','13','14','15','16','17','18','19','20']
Requesting Each Page
# main.pyfrom indeed import extract_indeed_pages, extract_indeed_jobs
last_indeed_pages = extract_indeed_pages()
indeed_jobs = extract_indeed_jobs(last_indeed_pages)# indeed.pyimport requests
from bs4 import BeautifulSoup
LIMIT =50
URL = f"https://www.indeed.com/jobs?q=python&limit={LIMIT}"defextract_indeed_pages():
result = requests.get(URL)
soup = BeautifulSoup(result.text,"html.parser")
pagination = soup.find("div",{"class":"pagination"})
links = pagination.find_all("a")
pages =[]for link in links[:-1]:
pages.append(int(link.string))
max_page = pages[-1]return max_page
defextract_indeed_jobs(last_page):
jobs =[]for page inrange(last_page):
result = requests.get(f"{URL}&start={page*LIMIT}")print(result.status_code)return jobs
// output
200200200...
각 페이지를 돌면서 값을 가져오는 것을 확인할 수 있다.
Extracting Titles
# indeed.pyimport requests
from bs4 import BeautifulSoup
LIMIT =50
URL = f"https://www.indeed.com/jobs?q=python&limit={LIMIT}"defextract_indeed_pages():
result = requests.get(URL)
soup = BeautifulSoup(result.text,"html.parser")
pagination = soup.find("div",{"class":"pagination"})
links = pagination.find_all("a")
pages =[]for link in links[:-1]:
pages.append(int(link.string))
max_page = pages[-1]return max_page
defextract_indeed_jobs(last_page):
jobs =[]# for page in range(last_page):
result = requests.get(f"{URL}&start={0*LIMIT}")
soup = BeautifulSoup(result.text,"html.parser")// html 요소 가져오기
results = soup.find_all("div",{"class":"jobsearch-SerpJobCard"})// 가져온 html 요소에서 div이면서 클래스명이 jobsearch-SerpJobCard인 요소 모두 찾기
for result in results://for loop 돌리기
title = result.find("div",{"class":"title"}).find("a")["title"]// div이면서 클래스명이 title이고, a tag의 title 찾기
print(title)return jobs
// output
Python Developer
Android/ Mobile Application Developer
Software Engineer
Senior Advanced Analytics Analyst
Software Engineer - Direct Hire
참고
range - 연속된 숫자를 생성해주는 함수(마지막 숫자는 포함되지 않는다. 파라미터 수의 따라 생성하는 값이 다르다.(참고)
댓글
댓글 쓰기