diff --git a/cb.en.u4cse16237_p2_q2.py b/cb.en.u4cse16237_p2_q2.py new file mode 100644 index 0000000000000000000000000000000000000000..315de1874f01dac63369fe2e544907f56dd8acae --- /dev/null +++ b/cb.en.u4cse16237_p2_q2.py @@ -0,0 +1,38 @@ +import requests +from urllib.request import urlopen +from bs4 import BeautifulSoup + +page = requests.get("https://news.google.com/?hl=en-IN&gl=IN&ceid=IN:en") +page1 = requests.get("https://in.news.yahoo.com/") +bus = requests.get("https://news.google.com/topics/CAAqJggKIiBDQkFTRWdvSUwyMHZNRGx6TVdZU0FtVnVHZ0pKVGlnQVAB?hl=en-IN&gl=IN&ceid=IN%3Aen") +page2 = requests.get("https://www.rediff.com/headline.html") + + + +list1 = [] +list2 = [] +list3 = [] + + + +soup = BeautifulSoup(page.content, 'html.parser') +soup1 = BeautifulSoup(page1.content, 'html.parser') +soup2 = BeautifulSoup(page2.content, 'html.parser') + +for line in soup.select('a'): + if len(line) > 25: + list1.append(line.get_text()) + +for line1 in soup1.select('a'): + if len(line1) > 25: + list2.append(line1.get_text()) + +for line2 in soup2.select('a'): + if len(line2) > 25: + list3.append(line2.get_text()) + +print(list1[:10]) +print(list2[:10]) +print(list3[:10]) + +