From 5b14013246e56acb316ceaff395a487495a2bfa2 Mon Sep 17 00:00:00 2001 From: K V Ragul <cb.en.u4cse16237@cb.students.amrita.edu> Date: Wed, 6 Mar 2019 13:06:25 +0530 Subject: [PATCH] Periodical 2 --- cb.en.u4cse16237_p2_q2.py | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) create mode 100644 cb.en.u4cse16237_p2_q2.py diff --git a/cb.en.u4cse16237_p2_q2.py b/cb.en.u4cse16237_p2_q2.py new file mode 100644 index 0000000..315de18 --- /dev/null +++ b/cb.en.u4cse16237_p2_q2.py @@ -0,0 +1,38 @@ +import requests +from urllib.request import urlopen +from bs4 import BeautifulSoup + +page = requests.get("https://news.google.com/?hl=en-IN&gl=IN&ceid=IN:en") +page1 = requests.get("https://in.news.yahoo.com/") +bus = requests.get("https://news.google.com/topics/CAAqJggKIiBDQkFTRWdvSUwyMHZNRGx6TVdZU0FtVnVHZ0pKVGlnQVAB?hl=en-IN&gl=IN&ceid=IN%3Aen") +page2 = requests.get("https://www.rediff.com/headline.html") + + + +list1 = [] +list2 = [] +list3 = [] + + + +soup = BeautifulSoup(page.content, 'html.parser') +soup1 = BeautifulSoup(page1.content, 'html.parser') +soup2 = BeautifulSoup(page2.content, 'html.parser') + +for line in soup.select('a'): + if len(line) > 25: + list1.append(line.get_text()) + +for line1 in soup1.select('a'): + if len(line1) > 25: + list2.append(line1.get_text()) + +for line2 in soup2.select('a'): + if len(line2) > 25: + list3.append(line2.get_text()) + +print(list1[:10]) +print(list2[:10]) +print(list3[:10]) + + -- GitLab