.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
#!/usr/bin/env python
# License: You must be at least this tall [|||||||||||||] to ride this attraction.
import os
import re
import time
import random
import urllib, urllib2

POST_DOWNLOAD_DELAY = (60*15) + 20
RETRY_DELAY         = 60*5
SLEEP_DELAY         = 3
USER_AGENT          = 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.1) Gecko/20090624 Firefox/3.5'

r_find_url = re.compile(r'http://rs[0-9]+.rapidshare.com/files/[0-9]+/[^"]+')
r_error    = [r'Error.+You have reached the download limit for free-users.',
              r'Your IP address [0-9\.]+ is already downloading a file.+Please wait until the download is completed\.']
r_timeout  = re.compile(r'var c=([1-9][0-9]*);')
r_mirrors  = re.compile(r'''name="mirror" onclick="[^"]*'(http://[^/]+/files/[^\\]+)\\';''')

r_error = [re.compile(r, re.DOTALL|re.MULTILINE) for r in r_error]


def info(message):
    print '[%s] %s' % (time.strftime('%H:%M:%S'), message)


def opener(url, data=None):
    headers = {
        'User-Agent':      USER_AGENT,
        'Accept':          'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
        'Accept-Charset':  'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
        'Accept-Language': 'en-us,en;q=0.5',
        'Accept-Encoding': 'nothing',
        }

    req = urllib2.Request(url, data=(urllib.urlencode(data) if data else None), headers=headers)
    return urllib2.urlopen(req)


def fetch_url(*n):
    ul = opener(*n)
    buffer = ''

    while True:
        tmp = ul.read(5012)

        if tmp == '':
            break

        buffer += tmp

    return buffer


def do_wait_form(url):
    tmp = fetch_url(url, {'dl.start': 'Free'})

    for error in r_error:
        # TODO: specify which error
        if error.search(tmp):
            info('*** Download limit error. Waiting...')
            time.sleep(RETRY_DELAY)
            return False

    m = r_mirrors.findall(tmp)
    t = r_timeout.search(tmp)

    if not t:
        raise Exception, '*** Timer not found. Bailing!'

    if not m:
        raise Exception, '*** Mirrors not found. Bailing!'

    info('Going to sleep for %s + %d seconds...' % (t.group(1), SLEEP_DELAY))
    time.sleep(int(t.group(1)) + SLEEP_DELAY)
    return m


if __name__ == '__main__':
    import sys
    urls = sys.argv[1:]

    while len(urls):
        url = urls.pop(0)
        info('Trying ' + url)

        tmp = fetch_url(url)
        m = r_find_url.search(tmp)

        if not m:
            info('*** Could not find download form. Not a valid RS URL?')
            continue

        info('Posting to %s...' % m.group(0))

        try:
            mirrors = False

            while not mirrors:
                mirrors = do_wait_form(m.group(0))

            # XXX preferred mirrors?
            info('Available mirrors:')

            for m in mirrors:
                info('- ' + m)

            mirror = random.choice(mirrors)

            info('Yawn. Downloading: %s' % (mirror, ))
            os.spawnvp(os.P_WAIT, 'wget', ['wget', '-c','--user-agent=' + USER_AGENT, mirror])
        except KeyboardInterrupt:
            print
            break

        if len(urls):
            info('Sleeping until next download...')

            # XXX ETA
            time.sleep(POST_DOWNLOAD_DELAY)