mirror of
https://github.com/2004content/rarbg.git
synced 2026-01-26 14:48:02 -08:00
Compare commits
10 Commits
f02a2844b6
...
9abafb9c65
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
9abafb9c65 | ||
|
|
3b38f4bd9c | ||
|
|
17b26dc14d | ||
|
|
cbadd7e746 | ||
|
|
4f5f2fa8de | ||
|
|
c6c70743e7 | ||
|
|
19797654f3 | ||
|
|
ff24e25d00 | ||
|
|
475f3fcbc4 | ||
|
|
ec032bea3b |
@@ -2,10 +2,12 @@ rarbg
|
|||||||
Backup of magnets from RARBG
|
Backup of magnets from RARBG
|
||||||
|
|
||||||
Currently:
|
Currently:
|
||||||
|
clean.py is my Python script for cleaning up magnets post-extraction. I think it might have some finnicky thing going on with the way it fixes two magnets in one line, but it works.
|
||||||
moviesrarbg.txt holds my original post, cleaned up a lot. (117,392)
|
moviesrarbg.txt holds my original post, cleaned up a lot. (117,392)
|
||||||
showsother.txt holds my original post, cleaned up a little. (137,671)
|
showsother.txt holds my original post, cleaned up a little. (137,671)
|
||||||
showsrarbg.txt holds my original post, cleaned up a lot. (11,699)
|
showsrarbg.txt holds my original post, cleaned up a lot. (11,699)
|
||||||
everything.7z holds what i've compiled so far from all the sources given me (1,857,452)
|
everything.7z holds what i've compiled so far from some of the sources given me (3,459,526)
|
||||||
|
|
||||||
I'm nowhere near done adding to everything.7z. Then I'll filter it and sort it and split it into its relevant categories. Alongside the movies and shows, it also holds porn, music, and games, which will each get new .txt files.
|
I'm confident that some of the stuff in everything.7z did not come from RARBG, and that will be my first step to remedy once I get everything in there.
|
||||||
|
I'm about a fourth of the way done adding to everything.7z. Then I'll filter it and sort it and split it into its relevant categories. Alongside the movies and shows, it also holds porn, music, and games, which will each get new .txt files.
|
||||||
Thanks guys.
|
Thanks guys.
|
||||||
|
|||||||
1
archive/blank
Normal file
1
archive/blank
Normal file
@@ -0,0 +1 @@
|
|||||||
|
|
||||||
Binary file not shown.
BIN
archive/everything.7z.002
Normal file
BIN
archive/everything.7z.002
Normal file
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
BIN
archive/everything.7z.006
Normal file
BIN
archive/everything.7z.006
Normal file
Binary file not shown.
BIN
archive/everything.7z.007
Normal file
BIN
archive/everything.7z.007
Normal file
Binary file not shown.
BIN
archive/everything.7z.008
Normal file
BIN
archive/everything.7z.008
Normal file
Binary file not shown.
BIN
archive/everything.7z.009
Normal file
BIN
archive/everything.7z.009
Normal file
Binary file not shown.
BIN
archive/everything.7z.010
Normal file
BIN
archive/everything.7z.010
Normal file
Binary file not shown.
70
clean.py
Normal file
70
clean.py
Normal file
@@ -0,0 +1,70 @@
|
|||||||
|
def fix(line, data):
|
||||||
|
try:
|
||||||
|
hash = line[20:[pos for pos, char in enumerate(line) if char == '&'][0]].lower()#hash is end of prefix to first '&', lowercased
|
||||||
|
except:#if no '&dn='
|
||||||
|
hash = line[20:]
|
||||||
|
line = line + '&dn='
|
||||||
|
try:
|
||||||
|
int(hash, 16)#check if hash is hexadecimal
|
||||||
|
except:
|
||||||
|
return
|
||||||
|
if line.count('&') > 1:#look for trackers
|
||||||
|
location = 0
|
||||||
|
tocheck = []
|
||||||
|
while location < len(line):#find all occurences of '&'
|
||||||
|
location = line.find('&', location)
|
||||||
|
if location == -1:
|
||||||
|
break
|
||||||
|
tocheck.append(location)
|
||||||
|
location += 1
|
||||||
|
for index in tocheck:#iterate through occurences of '&'
|
||||||
|
try:
|
||||||
|
if (line[index + 1] == 't') and (line[index + 2] == 'r') and (line[index + 3] == '='):#if occurence is part of a tracker then ignore
|
||||||
|
pass
|
||||||
|
else:#if not, it's part of the title so replace it
|
||||||
|
line = line[:index] + line[index + 1:]
|
||||||
|
except IndexError:
|
||||||
|
line = line[:index] + line[index + 1:]
|
||||||
|
if line.count('&') > 1:#if it actually has only trackers now
|
||||||
|
title = line[[pos for pos, char in enumerate(line) if char == '='][1] + 1:[pos for pos, char in enumerate(line) if char == '&'][1]]#title is second '=' to second '&'
|
||||||
|
else:
|
||||||
|
title = line[[pos for pos, char in enumerate(line) if char == '='][1] + 1:]#title is second '=' to end if no trackers
|
||||||
|
else:
|
||||||
|
title = line[[pos for pos, char in enumerate(line) if char == '='][1] + 1:]#title is second '=' to end if no trackers
|
||||||
|
title = ''.join(char for char in title if ord(char) < 128)#strip non-ascii characters
|
||||||
|
linesplit = ['magnet:?xt=urn:btih:', hash, '&dn=', title]
|
||||||
|
data.append(linesplit)
|
||||||
|
return data
|
||||||
|
|
||||||
|
data = []#lists within list
|
||||||
|
with open('everything.txt', encoding='utf-8') as file:#open file
|
||||||
|
for line in file:
|
||||||
|
line = line.strip()
|
||||||
|
if line.startswith('magnet:?xt=urn:btih:'):#check for validity
|
||||||
|
if 'magnet:?xt=urn:btih:' in line[20:]:#check for paste errors on my part
|
||||||
|
secondline = line[line.find('magnet:?xt=urn:btih:', 20):]#the second magnet link in this line
|
||||||
|
line = line[:line.find('magnet:?xt=urn:btih:', 20)]#the first magnet link in this line
|
||||||
|
data = fix(secondline, data)#go ahead and add the second to data
|
||||||
|
if 'magnetxturnbtih' in line[20:]:#paste errors that got symbols removed (and 'd' after the first '&', for some reason)
|
||||||
|
hash = line[line.find('magnetxturnbtih', 20) + 15:line.find('n', line.find('magnetxturnbtih', 20) + 15)]#pull just the hash of the second magnet, which stretches from the end of the magnet prefix to the first occurrence of 'n' past the prefix
|
||||||
|
title = line[line.find('n', line.find('magnetxturnbtih', 20) + 15) + 1:]#title stretches from that 'n' to the end (any trackers will be stripped out later)
|
||||||
|
secondline = 'magnet:?xt=urn:btih:' + hash + '&dn=' + title#put it back together
|
||||||
|
line = line[:line.find('magnetxturnbtih', 20)]
|
||||||
|
data = fix(secondline, data)
|
||||||
|
data = fix(line, data)#add split line to data
|
||||||
|
|
||||||
|
for magnet in data:
|
||||||
|
for character in ['`', '~', '!', '@', '#', '$', '%', '^', '&', '*', '(', ')', '_', '+', '=', '[', '{', ']', '}', '\\', '|', ';', ':', '\'', '\"', ',', '<', '>', '?', '/']:
|
||||||
|
magnet[3] = magnet[3].replace(character, '')#get rid of symbols except '.' and '-'
|
||||||
|
magnet[3] = magnet[3].replace(' ', '.')#replace spaces
|
||||||
|
|
||||||
|
dic = {}#dictionary to eliminate duplicate hashes
|
||||||
|
for i in sorted(data, key=lambda x: x[3]):#sorted data because it lets me replace null titles because the last duplicate keeps the title and nulls are listed first in sort
|
||||||
|
dic[i[0] + i[1]] = i[2] + i[3]
|
||||||
|
results = []
|
||||||
|
for value in sorted(dic, key=dic.get):#sort dictionary
|
||||||
|
results.append('{}{}'.format(value, dic[value]))
|
||||||
|
|
||||||
|
with open('output.txt', 'a', encoding='utf-8') as output:
|
||||||
|
for i in results:
|
||||||
|
output.write(i + '\n')
|
||||||
Binary file not shown.
@@ -1,2 +1,2 @@
|
|||||||
btc bc1q0zg588lsn69anj30rewnlmgup2rzq776ll7m6y
|
btc bc1q0zg588lsn69anj30rewnlmgup2rzq776ll7m6y
|
||||||
i spent weeks working on this, never expecting anyone else to ever know about it. now that it's blown up, i'm gonna try to update my content and create a more complete backup. i would love any financial support. i've got one laptop and one hdd that i'm doing this with.
|
literally anything at all would be so appreciated
|
||||||
|
|||||||
Reference in New Issue
Block a user