result = requests.get(f"{URL}&start={0*LIMIT}")
soup = BeautifulSoup(result.text,"html.parser")
results = soup.find_all("div",{"class":"jobsearch-SerpJobCard"})for result in results:
title = result.find("div",{"class":"title"}).find("a")["title"]
company = result.find("span",{"class":"company"})
company_anchor = company.find("a")if company_anchor isnotNone:# 회사 이름이 <span>...</span>으로 되어진 것과 <span><a>...</a></span>으로 되어진 것이 있기 때문에 if문 처리함print(company_anchor.string)else:print(company.string)
company = company.strip()# whitespace 제거해줌print(company)// output
Piper Companies
Amazon Data Services, Inc.
inficare technologies
Capgemini
...
Extracting Locations and Finishing up
#indeed.pydefextract_job(html):
title = html.find("div",{"class":"title"}).find("a")["title"]
company = html.find("span",{"class":"company"})if company
# company가 없는 경우에도 있기 때문에 if문으로 감싸준다.
company_anchor = company.find("a")if company_anchor isnotNone:
company =str(company_anchor.string)else:
company =str(company.string)
company = company.strip()# data-rc-loc이라고 하는 attribute 속성 값 가져오기
location = html.find("div",{"class":"recJobLoc"})['data-rc-loc']
job_id = html["data-jk"]return{'title': title,'company': company,'location': location,'link': f"https://www.indeed.com/viewjob?jk={job_id}"}defextract_indeed_jobs(last_page):
jobs =[]for page inrange(last_page):
result = requests.get(f"{URL}&start={0*LIMIT}")
soup = BeautifulSoup(result.text,"html.parser")
results = soup.find_all("div",{"class":"jobsearch-SerpJobCard"})for result in results:
job = extract_job(result)
jobs.append(job)return jobs
#main.pyfrom indeed import extract_indeed_pages, extract_indeed_jobs
last_indeed_pages = extract_indeed_pages()
indeed_jobs = extract_indeed_jobs(last_indeed_pages)print(indeed_jobs)
댓글
댓글 쓰기