import requests
from bs4 import BeautifulSoup
from django import forms
from django.core.exceptions import ValidationError
def clean_string(stringToClean):
''' Cleans the string. It returns None if there's a problem '''
f = forms.CharField(required=False)
try:
clean_string = f.clean(stringToClean)
return clean_string
except ValidationError:
return None
class Fanfic:
@staticmethod
def get_site(url):
''' Get the site name of url '''
allowed_sites = ["ficwad.com", "avengersfanfiction.com",
"archiveofourown.org"]
for site in allowed_sites:
if site in url:
return site
return None
@staticmethod
def check_url_format(url):
''' Check if url format is ok '''
site = Fanfic.get_site(url)
if site is not None:
if (str(url).startswith("http://{}".format(site)) or
str(url).startswith("https://{}".format(site))):
return True
return False
@staticmethod
def check_if_online(url):
''' check if 200 '''
try:
r = requests.head(url)
if r.status_code == 200:
return True
return False
except Exception:
return False
@staticmethod
def get_title_and_author(url):
''' Parse html and get title and author of the fanfic '''
site = Fanfic.get_site(url)
if "ficwad" in site:
title, author = FicWad.get_title_and_author(url)
elif "avengers" in site:
title, author = AvengersFanfiction.get_title_and_author(url)
elif "archiveofourown" in site:
title, author = ArchiveOfOurOwn.get_title_and_author(url)
return title, author
class FicWad:
@staticmethod
def get_title_and_author(url):
page = requests.get(url).text
soup = BeautifulSoup(page, 'html.parser')
soup = soup.find('div', id="story")
title = soup.find('h4').text
author_with_by = soup.find_all('span', 'author')[0].text
author = author_with_by.split('by ', 1)[1] # just first occurrence
return title, author
class AvengersFanfiction:
@staticmethod
def get_title_and_author(url):
page = requests.get(url).text
soup = BeautifulSoup(page, 'html.parser')
title = soup.find('h1').get_text(strip=True)
soup = soup.find('div', id='sidebar')
author = soup.find('h3').get_text(strip=True)
return title, author
class ArchiveOfOurOwn:
@staticmethod
def get_title_and_author(url):
page = requests.get(url).text
soup = BeautifulSoup(page, 'html.parser')
title = soup.find_all('h2', 'title')[0].get_text(strip=True)
author = soup.find_all('h3', 'byline')[0].get_text(strip=True)
return title, author
def url_without_errors(url):
''' Check if a url is completely correct '''
clean_url_fanfic = clean_string(url)
if clean_url_fanfic is None:
# bad written url
return "Error: Sorry, are you sure the url is correct?"
format_ok = Fanfic.check_url_format(
clean_url_fanfic)
if format_ok is False:
# wrong site or error in url
return "Error: Sorry, are you sure the url is valid?"
is_online = Fanfic.check_if_online(
clean_url_fanfic)
if is_online is False:
# url not working anymore
return "Error: Sorry, the url doesn't seem to be working anymore."
return clean_url_fanfic
|