Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

34

35

36

37

38

39

40

41

42

43

44

45

46

47

48

49

50

51

52

53

54

55

56

57

58

59

60

61

62

63

64

65

66

67

68

69

70

71

72

73

74

75

76

77

78

79

80

81

82

83

84

85

86

87

88

89

90

91

92

93

94

95

96

97

98

99

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

import requests 

from bs4 import BeautifulSoup 

from django import forms 

from django.core.exceptions import ValidationError 

 

 

def clean_string(stringToClean): 

''' Cleans the string. It returns None if there's a problem ''' 

f = forms.CharField(required=False) 

try: 

clean_string = f.clean(stringToClean) 

return clean_string 

except ValidationError: 

return None 

 

 

class Fanfic: 

 

@staticmethod 

def get_site(url): 

''' Get the site name of url ''' 

allowed_sites = ["ficwad.com", "avengersfanfiction.com", 

"archiveofourown.org"] 

for site in allowed_sites: 

if site in url: 

return site 

return None 

 

@staticmethod 

def check_url_format(url): 

''' Check if url format is ok ''' 

site = Fanfic.get_site(url) 

if site is not None: 

if (str(url).startswith("http://{}".format(site)) or 

str(url).startswith("https://{}".format(site))): 

return True 

return False 

 

@staticmethod 

def check_if_online(url): 

''' check if 200 ''' 

try: 

r = requests.head(url) 

if r.status_code == 200: 

return True 

return False 

except Exception: 

return False 

 

@staticmethod 

def get_title_and_author(url): 

''' Parse html and get title and author of the fanfic ''' 

site = Fanfic.get_site(url) 

if "ficwad" in site: 

title, author = FicWad.get_title_and_author(url) 

elif "avengers" in site: 

title, author = AvengersFanfiction.get_title_and_author(url) 

elif "archiveofourown" in site: 

title, author = ArchiveOfOurOwn.get_title_and_author(url) 

return title, author 

 

 

class FicWad: 

 

@staticmethod 

def get_title_and_author(url): 

page = requests.get(url).text 

soup = BeautifulSoup(page, 'html.parser') 

soup = soup.find('div', id="story") 

title = soup.find('h4').text 

author_with_by = soup.find_all('span', 'author')[0].text 

author = author_with_by.split('by ', 1)[1] # just first occurrence 

return title, author 

 

 

class AvengersFanfiction: 

 

@staticmethod 

def get_title_and_author(url): 

page = requests.get(url).text 

soup = BeautifulSoup(page, 'html.parser') 

title = soup.find('h1').get_text(strip=True) 

soup = soup.find('div', id='sidebar') 

author = soup.find('h3').get_text(strip=True) 

return title, author 

 

 

class ArchiveOfOurOwn: 

 

@staticmethod 

def get_title_and_author(url): 

page = requests.get(url).text 

soup = BeautifulSoup(page, 'html.parser') 

title = soup.find_all('h2', 'title')[0].get_text(strip=True) 

author = soup.find_all('h3', 'byline')[0].get_text(strip=True) 

return title, author 

 

 

def url_without_errors(url): 

''' Check if a url is completely correct ''' 

clean_url_fanfic = clean_string(url) 

if clean_url_fanfic is None: 

# bad written url 

return "Error: Sorry, are you sure the url is correct?" 

 

format_ok = Fanfic.check_url_format( 

clean_url_fanfic) 

if format_ok is False: 

# wrong site or error in url 

return "Error: Sorry, are you sure the url is valid?" 

 

is_online = Fanfic.check_if_online( 

clean_url_fanfic) 

if is_online is False: 

# url not working anymore 

return "Error: Sorry, the url doesn't seem to be working anymore." 

 

return clean_url_fanfic