Crawling using beautiful soup 4

'''
import requests
import re
from bs4 import BeautifulSoup

target_url = 'https://www.poftut.com/python-main-function-use/'

def download_page(page_url):
	print(page_url)
	try:
		return requests.get(page_url).text
	except Exception as e:
		print ("Error: invalid ip and port", e)

def parse_page(page):
	tags = []
	soup = BeautifulSoup(page, 'html.parser')
	for tag in soup.find_all(re.compile("t")):
		tags.append(tag.name)
	return tags

def main():
	print('Basic with BeautifulSoup is starting...')
	page = download_page(target_url)
	tags = parse_page(page)
	print(tags)
main()

Basic web-crawler process flow demo using Python

'''
Demonstrate basic process in web static snipplet cralwing.
Pree Thiengburanathum
Python 3.7
'''
import re
import requests
from urllib.parse import urlparse


target_url = 'https://www.poftut.com/python-main-function-use/'

def get_links(page_url):
	host = urlparse(page_url)
	page = download_page(page_url)
	links = extract_links(page)
	return links

def extract_links(page):
	if not page:
		return []
	link_regex = re.compile('(?<=href=").*?(?=")')
	return link_regex.findall(page)

def download_page(url):
	print(url)
	try:
		return requests.get(url).text
	except Exception as e:
		print ("Error: invalid ip and port", e)
		
def main():
	print('Basic crawler is starting...')
	links = get_links(target_url)
	print(len(links))
	for link in links:
		print(link)
	print('Program terminated successfully')

main()