Skip to content
Snippets Groups Projects
Commit 09fd1199 authored by Robin Stecher's avatar Robin Stecher
Browse files

Rewritten project structure

parent a3ca3f1e
No related branches found
No related tags found
1 merge request!1Dev
from src.detect_ssl import *
import json
from src.url_grabber import get_urls_from_website
def check_url_list_for_ssl(url_input_list_path: str):
with open(f'{url_input_list_path}', 'rb') as f:
data = f.read()
parsed = json.loads(data)
universities = parsed['universities']
for university in universities:
website_url = university['website']
if not check_ssl_with_api(website_url):
print(f'Website {website_url} seems to have no certificate!')
def get_url_list(url_input_list_path: str) -> [str]:
with open(f'{url_input_list_path}', 'rb') as f:
data = f.read()
parsed = json.loads(data)
universities: [str] = []
for university in parsed['universities']:
universities.append(university['website'])
return universities
def get_all_urls_from_websites(url_list: [str]):
all_urls: [str] = []
for url in url_list:
urls_from_website: [str] = get_urls_from_website(url)
all_urls = all_urls + urls_from_website
return list(filter(filter_urls, all_urls))
def filter_urls(current_url: str) -> bool:
return current_url.startswith('https://') or current_url.startswith('http://')
def filter_http_urls(current_url: str) -> bool:
return current_url.startswith('http://')
def get_only_http_domains(url_list: [str]) -> [str]:
return list(filter(filter_http_urls, url_list))
def count_urls_in_list(url_list: [str]) -> int:
return len(url_list)
from src.url_list_processor import get_url_list
from src.modules.count_url_list import count_urls_in_list
from src.modules.ssl_check import check_url_list_for_ssl
from src.modules.get_urls_from_website import get_all_urls_from_websites, get_only_http_domains
def parse_text_to_number(text_input: str) -> int:
"""
Parses text input of str to an integer value, if it is a number
:param text_input: The number as str
:return: Returns the number as integer
"""
if not str.isnumeric(text_input):
raise Exception('Given string is not a number!')
return int(text_input)
def mode_count_in_list(university_list: [str]) -> None:
"""
Executing the module for counting the urls in the university list
:param university_list: The list of university urls
"""
print(count_urls_in_list(university_list))
def mode_check_for_ssl(university_list_path: str) -> None:
"""
Executing the module for checking all university urls in list for ssl encryption
:param university_list_path: The path to the university list
"""
print(processing_message)
check_url_list_for_ssl(university_list_path)
def mode_get_urls_from_website(university_list: [str]) -> None:
"""
Executing module: Fetches all urls listed on the given url website array
:param university_list: The list where the urls should be fetched from
"""
print(processing_message)
all_urls = get_all_urls_from_websites(university_list)
print(f'All urls: {all_urls}')
print(f'Only http urls: {get_only_http_domains(all_urls)}')
def print_main_menu() -> None:
"""
Prints the main menu of the python script
"""
path_to_url_list = 'assets/university_main_pages.json'
university_list = get_url_list(path_to_url_list)
print(f'===== Welcome to the ssl check script! =====')
print(f'== (Quit: ctrl + c) ==')
print('Which module do you want to run?')
print('0: Count the urls in url list')
print('1: Check all urls of the url list for ssl encryption certificate')
print('2: Get all urls from the university list websites')
chosen_mode = input('Select the mode: ')
mode = parse_text_to_number(chosen_mode)
# Choose correct mode
if mode == 0: # count items in url list
mode_count_in_list(university_list)
elif mode == 1: # Check website for ssl
mode_check_for_ssl(path_to_url_list)
elif mode == 2: # Get all urls from websites
mode_get_urls_from_website(university_list)
else:
print('This mode does not exist!')
if __name__ == '__main__':
# print('Hello World!')
# check_url_list_for_ssl('assets/university_main_pages.json')
# print(check_ssl_with_api('http://httpforever.com'))
# get_urls_from_website('https://stecher42.de')
university_list = get_url_list('assets/university_main_pages.json')
# university_list = ['https://stecher42.de', 'https://go.stecher42.de']
# result_list = get_all_urls_from_websites(university_list)
# print(result_list)
# print(get_only_http_domains(result_list))
print(f'Count of universities: {count_urls_in_list(university_list)}')
processing_message = 'Processing... This may take a while'
while True:
print_main_menu()
def count_urls_in_list(url_list: [str]) -> int:
return len(url_list)
\ No newline at end of file
from src.url_grabber import get_urls_from_website
def get_all_urls_from_websites(url_list: [str]):
all_urls: [str] = []
for url in url_list:
urls_from_website: [str] = get_urls_from_website(url)
all_urls = all_urls + urls_from_website
return list(filter(filter_urls, all_urls))
def filter_urls(current_url: str) -> bool:
return current_url.startswith('https://') or current_url.startswith('http://')
def filter_http_urls(current_url: str) -> bool:
return current_url.startswith('http://')
def get_only_http_domains(url_list: [str]) -> [str]:
return list(filter(filter_http_urls, url_list))
\ No newline at end of file
import json
from src.detect_ssl import check_ssl_with_api
def check_url_list_for_ssl(url_input_list_path: str):
with open(f'{url_input_list_path}', 'rb') as f:
data = f.read()
parsed = json.loads(data)
universities = parsed['universities']
for university in universities:
website_url = university['website']
if not check_ssl_with_api(website_url):
print(f'Website {website_url} seems to have no certificate!')
\ No newline at end of file
import json
def get_url_list(url_input_list_path: str) -> [str]:
with open(f'{url_input_list_path}', 'rb') as f:
data = f.read()
parsed = json.loads(data)
universities: [str] = []
for university in parsed['universities']:
universities.append(university['website'])
return universities
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment