Writes Pyton scripts to download, parse, combine templates
This commit is contained in:
30
lib/combine.py
Normal file
30
lib/combine.py
Normal file
@@ -0,0 +1,30 @@
|
|||||||
|
import os
|
||||||
|
import json
|
||||||
|
|
||||||
|
# Get list of files in external-templates
|
||||||
|
files = os.listdir('../external-templates')
|
||||||
|
|
||||||
|
# Initialize empty list to store template objects
|
||||||
|
templates = []
|
||||||
|
|
||||||
|
# For each file in external-templates
|
||||||
|
for file in files:
|
||||||
|
# Open the file
|
||||||
|
with open('../external-templates/' + file) as f:
|
||||||
|
# Load the JSON into a variable
|
||||||
|
data = json.load(f)['templates']
|
||||||
|
# Append the template object to the templates list
|
||||||
|
templates = templates + data
|
||||||
|
|
||||||
|
# Remove duplicates
|
||||||
|
seen_titles = set()
|
||||||
|
filtered_data = [x for x in templates if x['title'] not in seen_titles and not seen_titles.add(x['title'])]
|
||||||
|
|
||||||
|
fileData = {
|
||||||
|
'version': '2',
|
||||||
|
'templates': filtered_data
|
||||||
|
}
|
||||||
|
|
||||||
|
# Open the templates.json file, and write results to it
|
||||||
|
with open('../templates.json', 'w') as f:
|
||||||
|
json.dump(fileData, f, indent=2, sort_keys=False)
|
||||||
38
lib/download.py
Normal file
38
lib/download.py
Normal file
@@ -0,0 +1,38 @@
|
|||||||
|
import os
|
||||||
|
import csv
|
||||||
|
import requests
|
||||||
|
|
||||||
|
destination_dir = '../external-templates'
|
||||||
|
sources_list = '../sources.csv'
|
||||||
|
|
||||||
|
# Downloads the file from a given URL, to the local destination
|
||||||
|
def download(url: str, filename: str):
|
||||||
|
file_path = os.path.join(destination_dir, filename)
|
||||||
|
r = requests.get(url, stream=True)
|
||||||
|
if r.ok:
|
||||||
|
print('saving to', os.path.abspath(file_path))
|
||||||
|
with open(file_path, 'wb') as f:
|
||||||
|
for chunk in r.iter_content(chunk_size=1024 * 8):
|
||||||
|
if chunk:
|
||||||
|
f.write(chunk)
|
||||||
|
f.flush()
|
||||||
|
os.fsync(f.fileno())
|
||||||
|
else: # HTTP status code 4XX/5XX
|
||||||
|
print('Download failed: status code {}\n{}'.format(r.status_code, r.text))
|
||||||
|
|
||||||
|
# Gets list of URLs to download from CSV file
|
||||||
|
def get_source_list():
|
||||||
|
sources=[]
|
||||||
|
with open('../sources.csv', mode='r') as file:
|
||||||
|
csvFile = csv.reader(file)
|
||||||
|
for lines in csvFile:#
|
||||||
|
sources.append(lines)
|
||||||
|
return sources
|
||||||
|
|
||||||
|
# Create destination folder if not yet present
|
||||||
|
if not os.path.exists(destination_dir):
|
||||||
|
os.makedirs(destination_dir)
|
||||||
|
|
||||||
|
# # For each source, download the templates JSON file
|
||||||
|
for sourceUrl in get_source_list():
|
||||||
|
download(sourceUrl[1], sourceUrl[0] + '.json')
|
||||||
1
lib/requirements.txt
Normal file
1
lib/requirements.txt
Normal file
@@ -0,0 +1 @@
|
|||||||
|
requests
|
||||||
Reference in New Issue
Block a user