Rewritten project structure

09fd1199 · Robin Stecher · a3ca3f1e · 09fd1199 · 09fd1199 · 09fd1199
Commit 09fd1199 authored Apr 1, 2024 by Robin Stecher
--- a/main.py
+++ b/main.py
-from src.detect_ssl import *
-import json
-from src.url_grabber import get_urls_from_website
-
-
-def check_url_list_for_ssl(url_input_list_path: str):
-    with open(f'{url_input_list_path}', 'rb') as f:
-        data = f.read()
-        parsed = json.loads(data)
-        universities = parsed['universities']
-        for university in universities:
-            website_url = university['website']
-            if not check_ssl_with_api(website_url):
-                print(f'Website {website_url} seems to have no certificate!')
-
-
-def get_url_list(url_input_list_path: str) -> [str]:
-    with open(f'{url_input_list_path}', 'rb') as f:
-        data = f.read()
-        parsed = json.loads(data)
-        universities: [str] = []
-        for university in parsed['universities']:
-            universities.append(university['website'])
-        return universities
-
-
-def get_all_urls_from_websites(url_list: [str]):
-    all_urls: [str] = []
-    for url in url_list:
-        urls_from_website: [str] = get_urls_from_website(url)
-        all_urls = all_urls + urls_from_website
-    return list(filter(filter_urls, all_urls))
-
-
-def filter_urls(current_url: str) -> bool:
-    return current_url.startswith('https://') or current_url.startswith('http://')
-
-
-def filter_http_urls(current_url: str) -> bool:
-    return current_url.startswith('http://')
-
-
-def get_only_http_domains(url_list: [str]) -> [str]:
-    return list(filter(filter_http_urls, url_list))
-
-
-def count_urls_in_list(url_list: [str]) -> int:
-    return len(url_list)
+from src.url_list_processor import get_url_list
+from src.modules.count_url_list import count_urls_in_list
+from src.modules.ssl_check import check_url_list_for_ssl
+from src.modules.get_urls_from_website import get_all_urls_from_websites, get_only_http_domains
+
+
+def parse_text_to_number(text_input: str) -> int:
+    """
+    Parses text input of str to an integer value, if it is a number
+    :param text_input: The number as str
+    :return: Returns the number as integer
+    """
+    if not str.isnumeric(text_input):
+        raise Exception('Given string is not a number!')
+    return int(text_input)
+
+
+def mode_count_in_list(university_list: [str]) -> None:
+    """
+    Executing the module for counting the urls in the university list
+    :param university_list: The list of university urls
+    """
+    print(count_urls_in_list(university_list))
+
+
+def mode_check_for_ssl(university_list_path: str) -> None:
+    """
+    Executing the module for checking all university urls in list for ssl encryption
+    :param university_list_path: The path to the university list
+    """
+    print(processing_message)
+    check_url_list_for_ssl(university_list_path)
+
+
+def mode_get_urls_from_website(university_list: [str]) -> None:
+    """
+    Executing module: Fetches all urls listed on the given url website array
+    :param university_list: The list where the urls should be fetched from
+    """
+    print(processing_message)
+    all_urls = get_all_urls_from_websites(university_list)
+    print(f'All urls: {all_urls}')
+    print(f'Only http urls: {get_only_http_domains(all_urls)}')
+
+
+def print_main_menu() -> None:
+    """
+    Prints the main menu of the python script
+    """
+    path_to_url_list = 'assets/university_main_pages.json'
+    university_list = get_url_list(path_to_url_list)
+    print(f'===== Welcome to the ssl check script! =====')
+    print(f'==          (Quit: ctrl + c)              ==')
+    print('Which module do you want to run?')
+    print('0: Count the urls in url list')
+    print('1: Check all urls of the url list for ssl encryption certificate')
+    print('2: Get all urls from the university list websites')
+    chosen_mode = input('Select the mode: ')
+    mode = parse_text_to_number(chosen_mode)
+
+    # Choose correct mode
+    if mode == 0:       # count items in url list
+        mode_count_in_list(university_list)
+    elif mode == 1:     # Check website for ssl
+        mode_check_for_ssl(path_to_url_list)
+    elif mode == 2:     # Get all urls from websites
+        mode_get_urls_from_website(university_list)
+    else:
+        print('This mode does not exist!')


 if __name__ == '__main__':
-    # print('Hello World!')
-    # check_url_list_for_ssl('assets/university_main_pages.json')
-    # print(check_ssl_with_api('http://httpforever.com'))
-    # get_urls_from_website('https://stecher42.de')
-
-    university_list = get_url_list('assets/university_main_pages.json')
-    # university_list = ['https://stecher42.de', 'https://go.stecher42.de']
-    # result_list = get_all_urls_from_websites(university_list)
-    # print(result_list)
-    # print(get_only_http_domains(result_list))
-    print(f'Count of universities: {count_urls_in_list(university_list)}')
+    processing_message = 'Processing... This may take a while'
+    while True:
+        print_main_menu()


--- a/src/modules/count_url_list.py
+++ b/src/modules/count_url_list.py
+def count_urls_in_list(url_list: [str]) -> int:
+    return len(url_list)
\ No newline at end of file
--- a/src/modules/get_urls_from_website.py
+++ b/src/modules/get_urls_from_website.py
+from src.url_grabber import get_urls_from_website
+
+
+def get_all_urls_from_websites(url_list: [str]):
+    all_urls: [str] = []
+    for url in url_list:
+        urls_from_website: [str] = get_urls_from_website(url)
+        all_urls = all_urls + urls_from_website
+    return list(filter(filter_urls, all_urls))
+
+
+def filter_urls(current_url: str) -> bool:
+    return current_url.startswith('https://') or current_url.startswith('http://')
+
+
+def filter_http_urls(current_url: str) -> bool:
+    return current_url.startswith('http://')
+
+
+def get_only_http_domains(url_list: [str]) -> [str]:
+    return list(filter(filter_http_urls, url_list))
\ No newline at end of file
--- a/src/modules/ssl_check.py
+++ b/src/modules/ssl_check.py
+import json
+from src.detect_ssl import check_ssl_with_api
+
+
+def check_url_list_for_ssl(url_input_list_path: str):
+    with open(f'{url_input_list_path}', 'rb') as f:
+        data = f.read()
+        parsed = json.loads(data)
+        universities = parsed['universities']
+        for university in universities:
+            website_url = university['website']
+            if not check_ssl_with_api(website_url):
+                print(f'Website {website_url} seems to have no certificate!')
\ No newline at end of file
--- a/src/url_list_processor.py
+++ b/src/url_list_processor.py
+import json
+
+
+def get_url_list(url_input_list_path: str) -> [str]:
+    with open(f'{url_input_list_path}', 'rb') as f:
+        data = f.read()
+        parsed = json.loads(data)
+        universities: [str] = []
+        for university in parsed['universities']:
+            universities.append(university['website'])
+        return universities
+