From 112313ab7f9b5e7230cbf4c867c5b8a89604ac44 Mon Sep 17 00:00:00 2001 From: volpino Date: Wed, 25 Jul 2012 10:44:15 +0200 Subject: [PATCH] euscan: generic handler fixes * urljoin was broken with dirs ("http://site.com/lol" joined with "wat" produces http://site.com/wat) * fixed _v regex, don't match what is not needed Signed-off-by: volpino --- pym/euscan/handlers/generic.py | 13 ++++++++++--- pym/euscan/helpers.py | 4 ++-- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/pym/euscan/handlers/generic.py b/pym/euscan/handlers/generic.py index 831dfce..12c4a99 100644 --- a/pym/euscan/handlers/generic.py +++ b/pym/euscan/handlers/generic.py @@ -35,7 +35,10 @@ def scan_html(data, url, pattern): match = re.match(pattern, href, re.I) if match: - results.append((".".join(match.groups()), match.group(0))) + results.append( + (".".join([x for x in match.groups() if x is not None]), + match.group(0)) + ) return results @@ -47,8 +50,10 @@ def scan_ftp(data, url, pattern): line = line.replace("\n", "").replace("\r", "") match = re.search(pattern, line, re.I) if match: - results.append((".".join(match.groups()), match.group(0))) - + results.append( + (".".join([x for x in match.groups() if x is not None]), + match.group(0)) + ) return results @@ -88,6 +93,8 @@ def scan_directory_recursive(cp, ver, rev, url, steps, orig_url): pv = helpers.gentoo_mangle_version(up_pv) if helpers.version_filtered(cp, ver, pv): continue + if not url.endswith("/"): + url = url + "/" path = urljoin(url, path) if not steps and path not in orig_url: diff --git a/pym/euscan/helpers.py b/pym/euscan/helpers.py index 6582393..ec721b7 100644 --- a/pym/euscan/helpers.py +++ b/pym/euscan/helpers.py @@ -30,8 +30,8 @@ VERSION_CMP_PACKAGE_QUIRKS = { 'sys-process/htop': htop_vercmp } -_v_end = '((-|_)(pre|p|beta|b|alpha|a|rc|r)\d*)' -_v = r'((\d+)((\.\d+)*)([a-zA-Z]*?)(' + _v_end + '*))' +_v_end = r'(?:(?:-|_)(?:pre|p|beta|b|alpha|a|rc|r)\d*)' +_v = r'((?:\d+)(?:(?:\.\d+)*)(?:[a-zA-Z]*?)(?:' + _v_end + '*))' # Stolen from g-pypi