euscan: only add sane versions

We don't weant *-dont-work, *-windows, etc versions...

Signed-off-by: Corentin Chary <corentincj@iksaif.net>
This commit is contained in:
Corentin Chary 2011-04-18 21:07:55 +02:00
parent e5278e0e0f
commit a7a15c0ac7

17
euscan
View File

@ -6,6 +6,14 @@ Distributed under the terms of the GNU General Public License v2
from __future__ import print_function from __future__ import print_function
"""
TODO:
- custom url handlers (portscout)
- sourceforge: use rss feeds
- respect robots.txt (portscout)
- check other distros (youri)
- clean blacklist system
"""
# Meta: # Meta:
__author__ = "Corentin Chary (iksaif)" __author__ = "Corentin Chary (iksaif)"
@ -203,7 +211,7 @@ def regex_from_template(template):
template = re.sub(r'(\$\{\d+\}\.?)+', r'([\w\.\-]+?)', template) template = re.sub(r'(\$\{\d+\}\.?)+', r'([\w\.\-]+?)', template)
#template = re.sub(r'(\$\{\d+\}\.+)+', '(.+?)\.', template) #template = re.sub(r'(\$\{\d+\}\.+)+', '(.+?)\.', template)
#template = re.sub(r'(\$\{\d+\})+', '(.+?)', template) #template = re.sub(r'(\$\{\d+\})+', '(.+?)', template)
template = template.replace('${PV}', r'([\w\.\-]+?)') template = template.replace('${PV}', r'((\d+)((\.\d+)*)([a-zA-Z]?)(((-|_)(pre|p|beta|b|alpha|a|rc|r)\d*)*))')
template = template + r'/?$' template = template + r'/?$'
return template return template
@ -268,7 +276,7 @@ def scan_directory_recursive(url, steps, vmin, vmax, output):
if href.startswith(url): if href.startswith(url):
href = href.replace(url, "", 1) href = href.replace(url, "", 1)
match = re.match(pattern, href) match = re.match(pattern, href, re.I)
if match: if match:
results.append((match.group(1), match.group(0))) results.append((match.group(1), match.group(0)))
@ -276,7 +284,7 @@ def scan_directory_recursive(url, steps, vmin, vmax, output):
buf = StringIO.StringIO(data) buf = StringIO.StringIO(data)
for line in buf.readlines(): for line in buf.readlines():
line = line.replace("\n", "").replace("\r", "") line = line.replace("\n", "").replace("\r", "")
match = re.search(pattern, line) match = re.search(pattern, line, re.I)
if match: if match:
results.append((match.group(1), match.group(0))) results.append((match.group(1), match.group(0)))
# add url # add url
@ -633,6 +641,8 @@ def scanUpstream(options, package, output):
for fileurl in fetchme[filename]: for fileurl in fetchme[filename]:
skipscan = False skipscan = False
output.einfo("SRC_URI is '%s'" % fileurl)
if '://' not in fileurl: if '://' not in fileurl:
output.einfo("Invalid url '%s'" % fileurl) output.einfo("Invalid url '%s'" % fileurl)
continue continue
@ -644,7 +654,6 @@ def scanUpstream(options, package, output):
url = parseMirror(fileurl, output) url = parseMirror(fileurl, output)
# Try list dir, but not for gentoo mirrors, it's too slow # Try list dir, but not for gentoo mirrors, it's too slow
if not skipscan: if not skipscan:
versions.extend(scan_directory(cpv, url, options, output)) versions.extend(scan_directory(cpv, url, options, output))