From 1248196dde9ffebdfc9920f5eba9afc67da7c3d3 Mon Sep 17 00:00:00 2001 From: Alicia Sykes Date: Sun, 16 Apr 2023 10:50:14 +0100 Subject: [PATCH] Writes Pyton scripts to download, parse, combine templates --- lib/combine.py | 30 ++++++++++++++++++++++++++++++ lib/download.py | 38 ++++++++++++++++++++++++++++++++++++++ lib/requirements.txt | 1 + 3 files changed, 69 insertions(+) create mode 100644 lib/combine.py create mode 100644 lib/download.py create mode 100644 lib/requirements.txt diff --git a/lib/combine.py b/lib/combine.py new file mode 100644 index 0000000..e21686e --- /dev/null +++ b/lib/combine.py @@ -0,0 +1,30 @@ +import os +import json + +# Get list of files in external-templates +files = os.listdir('../external-templates') + +# Initialize empty list to store template objects +templates = [] + +# For each file in external-templates +for file in files: + # Open the file + with open('../external-templates/' + file) as f: + # Load the JSON into a variable + data = json.load(f)['templates'] + # Append the template object to the templates list + templates = templates + data + +# Remove duplicates +seen_titles = set() +filtered_data = [x for x in templates if x['title'] not in seen_titles and not seen_titles.add(x['title'])] + +fileData = { + 'version': '2', + 'templates': filtered_data +} + +# Open the templates.json file, and write results to it +with open('../templates.json', 'w') as f: + json.dump(fileData, f, indent=2, sort_keys=False) diff --git a/lib/download.py b/lib/download.py new file mode 100644 index 0000000..d10454e --- /dev/null +++ b/lib/download.py @@ -0,0 +1,38 @@ +import os +import csv +import requests + +destination_dir = '../external-templates' +sources_list = '../sources.csv' + +# Downloads the file from a given URL, to the local destination +def download(url: str, filename: str): + file_path = os.path.join(destination_dir, filename) + r = requests.get(url, stream=True) + if r.ok: + print('saving to', os.path.abspath(file_path)) + with open(file_path, 'wb') as f: + for chunk in r.iter_content(chunk_size=1024 * 8): + if chunk: + f.write(chunk) + f.flush() + os.fsync(f.fileno()) + else: # HTTP status code 4XX/5XX + print('Download failed: status code {}\n{}'.format(r.status_code, r.text)) + +# Gets list of URLs to download from CSV file +def get_source_list(): + sources=[] + with open('../sources.csv', mode='r') as file: + csvFile = csv.reader(file) + for lines in csvFile:# + sources.append(lines) + return sources + +# Create destination folder if not yet present +if not os.path.exists(destination_dir): + os.makedirs(destination_dir) + +# # For each source, download the templates JSON file +for sourceUrl in get_source_list(): + download(sourceUrl[1], sourceUrl[0] + '.json') diff --git a/lib/requirements.txt b/lib/requirements.txt new file mode 100644 index 0000000..f229360 --- /dev/null +++ b/lib/requirements.txt @@ -0,0 +1 @@ +requests