Writes Pyton scripts to download, parse, combine templates

This commit is contained in:
Alicia Sykes
2023-04-16 10:50:14 +01:00
parent f05a39b85f
commit 1248196dde
3 changed files with 69 additions and 0 deletions

30
lib/combine.py Normal file
View File

@@ -0,0 +1,30 @@
import os
import json
# Get list of files in external-templates
files = os.listdir('../external-templates')
# Initialize empty list to store template objects
templates = []
# For each file in external-templates
for file in files:
# Open the file
with open('../external-templates/' + file) as f:
# Load the JSON into a variable
data = json.load(f)['templates']
# Append the template object to the templates list
templates = templates + data
# Remove duplicates
seen_titles = set()
filtered_data = [x for x in templates if x['title'] not in seen_titles and not seen_titles.add(x['title'])]
fileData = {
'version': '2',
'templates': filtered_data
}
# Open the templates.json file, and write results to it
with open('../templates.json', 'w') as f:
json.dump(fileData, f, indent=2, sort_keys=False)

38
lib/download.py Normal file
View File

@@ -0,0 +1,38 @@
import os
import csv
import requests
destination_dir = '../external-templates'
sources_list = '../sources.csv'
# Downloads the file from a given URL, to the local destination
def download(url: str, filename: str):
file_path = os.path.join(destination_dir, filename)
r = requests.get(url, stream=True)
if r.ok:
print('saving to', os.path.abspath(file_path))
with open(file_path, 'wb') as f:
for chunk in r.iter_content(chunk_size=1024 * 8):
if chunk:
f.write(chunk)
f.flush()
os.fsync(f.fileno())
else: # HTTP status code 4XX/5XX
print('Download failed: status code {}\n{}'.format(r.status_code, r.text))
# Gets list of URLs to download from CSV file
def get_source_list():
sources=[]
with open('../sources.csv', mode='r') as file:
csvFile = csv.reader(file)
for lines in csvFile:#
sources.append(lines)
return sources
# Create destination folder if not yet present
if not os.path.exists(destination_dir):
os.makedirs(destination_dir)
# # For each source, download the templates JSON file
for sourceUrl in get_source_list():
download(sourceUrl[1], sourceUrl[0] + '.json')

1
lib/requirements.txt Normal file
View File

@@ -0,0 +1 @@
requests