Updates the duplicate remover to remove more dupplicates

This commit is contained in:
Alicia Sykes
2023-04-18 18:41:39 +01:00
parent 1e8af535ea
commit d3fb62828d

View File

@@ -1,4 +1,5 @@
import os import os
import string
import json import json
# Get list of files in sources # Get list of files in sources
@@ -21,9 +22,15 @@ for file in files:
# Append the template object to the templates list # Append the template object to the templates list
templates = templates + data templates = templates + data
# Remove duplicates
seen_titles = set() seen_titles = set()
filtered_data = [x for x in templates if x['title'] not in seen_titles and not seen_titles.add(x['title'])] filtered_data = []
for x in templates:
normalized_title = x['title'].translate(str.maketrans('', '', string.punctuation)).replace(' ', '').lower()
if normalized_title not in seen_titles:
seen_titles.add(normalized_title)
filtered_data.append(x)
fileData = { fileData = {
'version': '2', 'version': '2',