From 8d912379886e00815aeb7ea1aed6af8f4bb62fbc Mon Sep 17 00:00:00 2001 From: Corentin Chary Date: Fri, 3 Aug 2012 21:50:54 +0200 Subject: [PATCH] euscan/handlers: rework handlers, better remote-id + watch support Signed-off-by: Corentin Chary --- bin/euscan | 7 +- bin/euscan_patch_metadata | 3 +- pym/euscan/handlers/__init__.py | 186 +++++++++++++++++----- pym/euscan/handlers/{url => }/cpan.py | 63 ++------ pym/euscan/handlers/{url => }/generic.py | 15 +- pym/euscan/handlers/{url => }/github.py | 18 +-- pym/euscan/handlers/{url => }/kde.py | 17 +- pym/euscan/handlers/package/__init__.py | 19 --- pym/euscan/handlers/package/remote_id.py | 44 ----- pym/euscan/handlers/package/watch.py | 139 ---------------- pym/euscan/handlers/pear.py | 11 ++ pym/euscan/handlers/pecl.py | 11 ++ pym/euscan/handlers/{url => }/php.py | 35 ++-- pym/euscan/handlers/{url => }/pypi.py | 24 ++- pym/euscan/handlers/{url => }/rubygems.py | 22 ++- pym/euscan/handlers/url.py | 98 ++++++++++++ pym/euscan/handlers/url/__init__.py | 19 --- pym/euscan/helpers.py | 115 ------------- pym/euscan/mangling.py | 163 +++++++++++++++++++ pym/euscan/scan.py | 18 --- 20 files changed, 518 insertions(+), 509 deletions(-) rename pym/euscan/handlers/{url => }/cpan.py (68%) rename pym/euscan/handlers/{url => }/generic.py (96%) rename pym/euscan/handlers/{url => }/github.py (70%) rename pym/euscan/handlers/{url => }/kde.py (61%) delete mode 100644 pym/euscan/handlers/package/__init__.py delete mode 100644 pym/euscan/handlers/package/remote_id.py delete mode 100644 pym/euscan/handlers/package/watch.py create mode 100644 pym/euscan/handlers/pear.py create mode 100644 pym/euscan/handlers/pecl.py rename pym/euscan/handlers/{url => }/php.py (54%) rename pym/euscan/handlers/{url => }/pypi.py (68%) rename pym/euscan/handlers/{url => }/rubygems.py (74%) create mode 100644 pym/euscan/handlers/url.py delete mode 100644 pym/euscan/handlers/url/__init__.py create mode 100644 pym/euscan/mangling.py diff --git a/bin/euscan b/bin/euscan index 70ced79..1149839 100755 --- a/bin/euscan +++ b/bin/euscan @@ -298,11 +298,16 @@ def main(): exit_helper(1) except Exception as err: + import traceback + print ('-'*60) + traceback.print_exc(file=sys.stderr) + print ('-'*60) + output.eerror('%s: %s' % (query, str(err))) exit_helper(1) if not ret and not CONFIG['quiet']: - output.ewarn( + output.einfo( "Didn't find any new version, check package's homepage " + "for more informations" ) diff --git a/bin/euscan_patch_metadata b/bin/euscan_patch_metadata index 2f1a834..612d9fe 100755 --- a/bin/euscan_patch_metadata +++ b/bin/euscan_patch_metadata @@ -176,8 +176,9 @@ def patch_metadata(package, watch_data, diff=False): valid = ("uversionmangle", "versionmangle", "downloadurlmangle") cleaned_opts = [] for opt in opts.split(","): - opt_name, opt_value = opt.split("=") + opt_name, opt_value = opt.split("=", 1) if opt_name in valid: + if opt_name == "uversionmangle": opt_name = "versionmangle" cleaned_opts.append('%s="%s"' % (opt_name, opt_value)) opts = " ".join(cleaned_opts) diff --git a/pym/euscan/handlers/__init__.py b/pym/euscan/handlers/__init__.py index 899ef24..018d095 100644 --- a/pym/euscan/handlers/__init__.py +++ b/pym/euscan/handlers/__init__.py @@ -1,48 +1,124 @@ -import sys +import os, sys +import pkgutil + from euscan import CONFIG, output -from euscan.handlers.package import handlers as pkg_handlers -from euscan.handlers.url import handlers as url_handlers +import euscan.mangling +from gentoolkit.metadata import MetaData -def find_best_pkg_handler(pkg): +handlers = {'package' : [], 'url' : [], 'all' : {}} + +# autoimport all modules in this directory and append them to handlers list +for loader, module_name, is_pkg in pkgutil.walk_packages(__path__): + module = loader.find_module(module_name).load_module(module_name) + if not hasattr(module, 'HANDLER_NAME'): + continue + if hasattr(module, 'scan_url'): + handlers['url'].append(module) + if hasattr(module, 'scan_pkg'): + handlers['package'].append(module) + handlers['all'][module.HANDLER_NAME] = module + +# sort handlers by priority +def sort_handlers(handlers): + return sorted( + handlers, + key=lambda handler: handler.PRIORITY, + reverse=True + ) + +handlers['package'] = sort_handlers(handlers['package']) +handlers['url'] = sort_handlers(handlers['url']) + +def find_best_handler(kind, pkg, *args): """ Find the best handler for the given package """ - for handler in pkg_handlers: - if handler.can_handle(pkg): + for handler in handlers[kind]: + if handler.can_handle(pkg, *args): return handler return None +def find_handlers(kind, names): + ret = [] -def find_best_url_handler(pkg, url): - """ - Find the best handler for the given url - """ - for handler in url_handlers: - if handler.can_handle(pkg, url): - return handler - return None + for name in names: + # Does this handler exist, and handle this kind of thing ? (pkg / url) + if name in handlers['all'] and handlers['all'][name] in handlers[kind]: + ret.append(handlers['all'][name]) + return ret -def scan(pkg, urls, on_progress=None): - """ - Scans upstream for the given package. - First tries if a package wide handler is available, then fallbacks - in url handling. - """ - pkg_handler = find_best_pkg_handler(pkg) - if pkg_handler: - if on_progress: - on_progress(increment=35) +def get_metadata(pkg): + metadata = {} - if not CONFIG['quiet'] and not CONFIG['format']: - sys.stdout.write("\n") + pkg_metadata = None - versions = pkg_handler.scan(pkg) + meta_override = os.path.join('metadata', pkg.category, pkg.name, 'metadata.xml') - if on_progress: - on_progress(increment=35) - return versions + try: + if os.path.exists(meta_override): + pkg_metadata = MetaData(meta_override) + output.einfo('Using custom metadata: %s' % meta_override) + if not pkg_metadata: + pkg_metadata = pkg.metadata + except Exception, e: + output.ewarn('Error when fetching metadata: %s' % str(e)) + + if not pkg_metadata: + return {} + + # Support multiple remote-id and multiple watch + for upstream in pkg_metadata._xml_tree.findall("upstream"): + for node in upstream.findall("watch"): + options = dict(node.attrib) + options['data'] = node.text + + if "type" in options: + handler = options['type'] + else: + handler = "url" + options['type'] = "url" + + for key in ["versionmangle", "downloadurlmangle"]: + value = options.get(key, None) + if value: + options[key] = value.split(";") + + if handler not in metadata: + metadata[handler] = [] + metadata[handler].append(options) + + for upstream in pkg_metadata._xml_tree.findall("upstream"): + for node in upstream.findall("remote-id"): + handler = node.attrib.get("type") + if not handler: + continue + if handler in metadata: + for i in range(len(metadata[handler])): + if not metadata[handler][i]['data']: + metadata[handler][i]['data'] = node.text + else: + metadata[handler] = [{'type' : handler, 'data' : node.text }] + + return metadata + +def scan_pkg(pkg_handler, pkg, options, on_progress=None): + versions = [] + + if on_progress: + on_progress(increment=35) + + for o in options: + versions += pkg_handler.scan_pkg(pkg, o) + + if on_progress: + on_progress(increment=35) + + return versions + +def scan_url(pkg, urls, options, on_progress=None): + versions = [] if on_progress: progress_available = 70 @@ -52,16 +128,12 @@ def scan(pkg, urls, on_progress=None): else: progress_increment = 0 - versions = [] - for filename in urls: for url in urls[filename]: if on_progress and progress_available > 0: on_progress(increment=progress_increment) progress_available -= progress_increment - if not CONFIG['quiet'] and not CONFIG['format']: - sys.stdout.write("\n") output.einfo("SRC_URI is '%s'" % url) if '://' not in url: @@ -69,8 +141,9 @@ def scan(pkg, urls, on_progress=None): continue try: - url_handler = find_best_url_handler(pkg, url) - versions.extend(url_handler.scan(pkg, url)) + url_handler = find_best_handler('url', pkg, url) + for o in options: + versions += url_handler.scan_url(pkg, url, o) except Exception as e: output.ewarn( "Handler failed: [%s] %s" % @@ -84,3 +157,44 @@ def scan(pkg, urls, on_progress=None): on_progress(increment=progress_available) return versions + +def scan(pkg, urls, on_progress=None): + """ + Scans upstream for the given package. + First tries if a package wide handler is available, then fallbacks + in url handling. + """ + + if not CONFIG['quiet'] and not CONFIG['format']: + sys.stdout.write('\n') + + metadata = get_metadata(pkg) + versions = [] + + pkg_handlers = find_handlers('package', metadata.keys()) + if not pkg_handlers: + pkg_handler = find_best_handler('package', pkg) + if pkg_handler: pkg_handlers = [pkg_handler] + + for pkg_handler in pkg_handlers: + options = metadata.get(pkg_handler.HANDLER_NAME, [{}]) + versions += scan_pkg(pkg_handler, pkg, options, on_progress) + + if not pkg_handlers: + versions += scan_url(pkg, urls, [{}], on_progress) + + return versions + +def mangle(kind, name, string): + if name not in handlers['all']: + return None + handler = handlers['all'][name] + if not hasattr(handler, 'mangle_%s' % kind): + return None + return getattr(handler, 'mangle_%s' % kind)(string) + +def mangle_url(name, string): + return mangle('url', name, string) + +def mangle_version(name, string): + return mangle('version', name, string) diff --git a/pym/euscan/handlers/url/cpan.py b/pym/euscan/handlers/cpan.py similarity index 68% rename from pym/euscan/handlers/url/cpan.py rename to pym/euscan/handlers/cpan.py index 6b9ad52..a184e10 100644 --- a/pym/euscan/handlers/url/cpan.py +++ b/pym/euscan/handlers/cpan.py @@ -3,7 +3,7 @@ import portage import urllib2 import json -from euscan import helpers, output +from euscan import helpers, output, mangling HANDLER_NAME = "cpan" CONFIDENCE = 100 @@ -11,10 +11,8 @@ PRIORITY = 90 _cpan_package_name_re = re.compile("mirror://cpan/authors/.*/([^/.]*).*") - -def can_handle(pkg, url): - return url.startswith('mirror://cpan/') - +def can_handle(pkg, url=None): + return url and url.startswith('mirror://cpan/') def guess_package(cp, url): match = _cpan_package_name_re.search(url) @@ -33,7 +31,7 @@ def guess_package(cp, url): return pkg -def gentoo_mangle_version(up_pv): +def mangle_version(up_pv): # clean up_pv = up_pv.replace("._", "_") # e.g.: 0.999._002 -> 0.999_002 up_pv = up_pv.replace("_0.", "_") # e.g.: 0.30_0.1 -> 0.30_1 @@ -68,53 +66,25 @@ def gentoo_mangle_version(up_pv): if rc_part: pv = "%s_rc" % pv - return helpers.gentoo_mangle_version(pv) - - -def cpan_trim_version(pv): - pv = re.sub('^[a-zA-Z]+', '', pv) - pv = re.sub('[a-zA-Z]$', '', pv) return pv - -def cpan_mangle_version(pv): - pos = pv.find('.') - if pos < 0: - return pv - up_pv = pv.replace('.', '') - up_pv = up_pv[0:pos] + '.' + up_pv[pos:] - up_pv = cpan_trim_version(up_pv) - return up_pv - - -def cpan_vercmp(cp, a, b): - try: - return float(a) - float(b) - except: - if a < b: - return -1 - else: - return 1 - - -def scan(pkg, url): +def scan_url(pkg, url, options): cp, ver, rev = portage.pkgsplit(pkg.cpv) remote_pkg = guess_package(cp, url) output.einfo("Using CPAN API: %s", remote_pkg) - result = scan_remote(pkg, [remote_pkg]) + return scan_pkg(pkg, {'data' : remote_pkg}) - ret = [] - for url, pv in result: - ret.append((url, pv, HANDLER_NAME, CONFIDENCE)) - return ret +def scan_pkg(pkg, options): + remote_pkg = options['data'] + # Defaults to CPAN mangling rules + if 'versionmangle' not in options: + options['versionmangle'] = ['cpan', 'gentoo'] -def scan_remote(pkg, remote_data): - remote_pkg = remote_data[0] url = 'http://search.cpan.org/api/dist/%s' % remote_pkg - cp, ver, rev = portage.pkgsplit(pkg.cpv) + cp, ver, rev = pkg.cp, pkg.version, pkg.revision try: fp = helpers.urlopen(url) @@ -139,11 +109,9 @@ def scan_remote(pkg, remote_data): # continue up_pv = version['version'] - up_pv = cpan_trim_version(up_pv) - pv = gentoo_mangle_version(up_pv) - up_ver = cpan_mangle_version(ver) + pv = mangling.mangle_version(up_pv, options) - if helpers.version_filtered(cp, up_ver, up_pv, cpan_vercmp): + if helpers.version_filtered(cp, ver, pv): continue url = 'mirror://cpan/authors/id/%s/%s/%s/%s' % ( @@ -153,6 +121,7 @@ def scan_remote(pkg, remote_data): version['archive'] ) - ret.append((url, pv)) + url = mangling.mangle_url(url, options) + ret.append((url, pv, HANDLER_NAME, CONFIDENCE)) return ret diff --git a/pym/euscan/handlers/url/generic.py b/pym/euscan/handlers/generic.py similarity index 96% rename from pym/euscan/handlers/url/generic.py rename to pym/euscan/handlers/generic.py index 3ba7ac0..76f598f 100644 --- a/pym/euscan/handlers/url/generic.py +++ b/pym/euscan/handlers/generic.py @@ -12,7 +12,7 @@ except ImportError: import portage from euscan import CONFIG, SCANDIR_BLACKLIST_URLS, \ - BRUTEFORCE_BLACKLIST_PACKAGES, BRUTEFORCE_BLACKLIST_URLS, output, helpers + BRUTEFORCE_BLACKLIST_PACKAGES, BRUTEFORCE_BLACKLIST_URLS, output, helpers, mangling HANDLER_NAME = "generic" CONFIDENCE = 45 @@ -69,6 +69,7 @@ def scan_html(data, url, pattern): (".".join([x for x in match.groups() if x is not None]), match.group(0)) ) + return results @@ -87,7 +88,7 @@ def scan_ftp(data, url, pattern): return results -def scan_directory_recursive(cp, ver, rev, url, steps, orig_url): +def scan_directory_recursive(cp, ver, rev, url, steps, orig_url, options): if not steps: return [] @@ -120,7 +121,8 @@ def scan_directory_recursive(cp, ver, rev, url, steps, orig_url): versions = [] for up_pv, path in results: - pv = helpers.gentoo_mangle_version(up_pv) + pv = mangling.mangle_version(up_pv, options) + if helpers.version_filtered(cp, ver, pv): continue if not url.endswith("/"): @@ -129,16 +131,17 @@ def scan_directory_recursive(cp, ver, rev, url, steps, orig_url): if not steps and path not in orig_url: confidence = confidence_score(path, orig_url) + path = mangling.mangle_url(path, options) versions.append((path, pv, HANDLER_NAME, confidence)) if steps: - ret = scan_directory_recursive(cp, ver, rev, path, steps, orig_url) + ret = scan_directory_recursive(cp, ver, rev, path, steps, orig_url, options) versions.extend(ret) return versions -def scan(pkg, url): +def scan_url(pkg, url, options): if CONFIG["scan-dir"]: for bu in SCANDIR_BLACKLIST_URLS: if re.match(bu, url): @@ -171,7 +174,7 @@ def scan(pkg, url): output.einfo("Scanning: %s" % template) steps = helpers.generate_scan_paths(template) - ret = scan_directory_recursive(cp, ver, rev, "", steps, url) + ret = scan_directory_recursive(cp, ver, rev, "", steps, url, options) if not ret: ret = brute_force(pkg, url) diff --git a/pym/euscan/handlers/url/github.py b/pym/euscan/handlers/github.py similarity index 70% rename from pym/euscan/handlers/url/github.py rename to pym/euscan/handlers/github.py index dc5dd16..dfe2cee 100644 --- a/pym/euscan/handlers/url/github.py +++ b/pym/euscan/handlers/github.py @@ -4,16 +4,15 @@ import re import portage -from euscan import helpers, output +from euscan import helpers, output, mangling HANDLER_NAME = "github" CONFIDENCE = 100 PRIORITY = 90 -def can_handle(pkg, url): - return url.startswith('mirror://github/') - +def can_handle(pkg, url=None): + return url and url.startswith('mirror://github/') def guess_package(cp, url): match = re.search('^mirror://github/(.*?)/(.*?)/(.*)$', url) @@ -21,8 +20,7 @@ def guess_package(cp, url): assert(match) return (match.group(1), match.group(2), match.group(3)) - -def scan(pkg, url): +def scan_url(pkg, url, options): 'http://developer.github.com/v3/repos/downloads/' user, project, filename = guess_package(pkg.cpv, url) @@ -38,7 +36,8 @@ def scan(pkg, url): fnre = re.compile('^%s$' % \ re.escape(filename).replace(re.escape(ver), '(.*?)')) - output.einfo("Using github API for: " + '/'.join(filename)) + output.einfo("Using github API for: project=%s user=%s filename=%s" % \ + (project, user, filename)) dlreq = urllib2.urlopen('https://api.github.com/repos/%s/%s/downloads' % \ (user, project)) @@ -49,9 +48,10 @@ def scan(pkg, url): m = fnre.match(dl['name']) if m: - pv = helpers.gentoo_mangle_version(m.group(1)) + pv = mangling.mangle_version(m.group(1), options) if helpers.version_filtered(cp, ver, pv): continue - ret.append((dl['html_url'], pv, HANDLER_NAME, CONFIDENCE)) + url = mangling.mangle_url(dl['html_url'], options) + ret.append((url, pv, HANDLER_NAME, CONFIDENCE)) return ret diff --git a/pym/euscan/handlers/url/kde.py b/pym/euscan/handlers/kde.py similarity index 61% rename from pym/euscan/handlers/url/kde.py rename to pym/euscan/handlers/kde.py index 5535158..b789b88 100644 --- a/pym/euscan/handlers/url/kde.py +++ b/pym/euscan/handlers/kde.py @@ -1,4 +1,4 @@ -from euscan.handlers.url import generic +from euscan.handlers import generic PRIORITY = 90 @@ -6,10 +6,7 @@ HANDLER_NAME = "kde" def can_handle(pkg, url): - if url.startswith('mirror://kde/'): - return True - return False - + return url and url.startswith('mirror://kde/') def clean_results(results): ret = [] @@ -22,18 +19,18 @@ def clean_results(results): return ret -def scan(pkg, url): +def scan_url(pkg, url): results = generic.scan(pkg.cpv, url) - if url.startswith('mirror://kde/unstable/'): - url = url.replace('mirror://kde/unstable/', 'mirror://kde/stable/') + if generic.startswith('mirror://kde/unstable/'): + url = generic.replace('mirror://kde/unstable/', 'mirror://kde/stable/') results += generic.scan(pkg.cpv, url) if not results: # if nothing was found go brute forcing results = generic.brute_force(pkg.cpv, url) - if url.startswith('mirror://kde/unstable/'): - url = url.replace('mirror://kde/unstable/', 'mirror://kde/stable/') + if generic.startswith('mirror://kde/unstable/'): + url = generic.replace('mirror://kde/unstable/', 'mirror://kde/stable/') results += generic.brute_force(pkg.cpv, url) return clean_results(results) diff --git a/pym/euscan/handlers/package/__init__.py b/pym/euscan/handlers/package/__init__.py deleted file mode 100644 index 8530b10..0000000 --- a/pym/euscan/handlers/package/__init__.py +++ /dev/null @@ -1,19 +0,0 @@ -""" -Package wide handlers for scanning upstream -""" - -import pkgutil - -handlers = [] - -# autoimport all modules in this directory and append them to handlers list -for loader, module_name, is_pkg in pkgutil.walk_packages(__path__): - module = loader.find_module(module_name).load_module(module_name) - handlers.append(module) - -# sort handlers by priority -handlers = sorted( - handlers, - key=lambda handler: handler.PRIORITY, - reverse=True -) diff --git a/pym/euscan/handlers/package/remote_id.py b/pym/euscan/handlers/package/remote_id.py deleted file mode 100644 index 0615526..0000000 --- a/pym/euscan/handlers/package/remote_id.py +++ /dev/null @@ -1,44 +0,0 @@ -from euscan.handlers.url import handlers -from euscan import output - -PRIORITY = 100 - -HANDLER_NAME = "remote_id" -CONFIDENCE = 100.0 - - -url_handlers = {handler.HANDLER_NAME: handler for handler in handlers} - - -def can_handle(pkg): - # Return True if there's at least one remote-id that can be - # handled by euscan - try: - remoteids = pkg.metadata.upstream()[0].upstream_remoteids() - except IndexError: - pass - else: - if len(remoteids) > 0: - for remote_value, remote_type in remoteids: - if remote_type in url_handlers: - return True - return False - - -def scan(pkg): - output.einfo("Using remote-id data") - - ret = [] - - remoteids = pkg.metadata.upstream()[0].upstream_remoteids() - for remote_value, remote_type in remoteids: - if remote_type in url_handlers: - remote_data = remote_value.split("/") - scan_remote = getattr( - url_handlers[remote_type], "scan_remote", None - ) - if scan_remote: - for url, pv in scan_remote(pkg, remote_data): - name = "%s, %s" % (HANDLER_NAME, remote_type) - ret.append((url, pv, name, CONFIDENCE)) - return ret diff --git a/pym/euscan/handlers/package/watch.py b/pym/euscan/handlers/package/watch.py deleted file mode 100644 index 14f25d2..0000000 --- a/pym/euscan/handlers/package/watch.py +++ /dev/null @@ -1,139 +0,0 @@ -import re -import urllib2 - -import portage - -from euscan.handlers.url import generic -from euscan import output, helpers - -PRIORITY = 100 - -HANDLER_NAME = "watch" -CONFIDENCE = 100.0 - - -is_pattern = r"\([^\/]+\)" - - -def can_handle(pkg): - try: - return pkg.metadata._xml_tree.find("upstream").find("watch") \ - is not None - except AttributeError: - return False - - -def parse_mangles(mangles, string): - for mangle in mangles: - # convert regex from perl format to python format - # there are some regex in this format: s/pattern/replacement/ - m = re.match(r"s/(.*[^\\])/(.*)/", mangle) - if not m: - # or in this format s|pattern|replacement| - m = re.match(r"s\|(.*[^\\])\|(.*)\|", mangle) - pattern, repl = m.groups() - repl = re.sub(r"\$(\d+)", r"\\\1", repl) - string = re.sub(pattern, repl, string) - return string - - -def clean_results(results, versionmangle, urlmangle): - ret = [] - - for path, version, _, _ in results: - version = parse_mangles(versionmangle, version) - path = parse_mangles(urlmangle, path) - ret.append((path, version, HANDLER_NAME, CONFIDENCE)) - - return ret - - -def parse_watch(pkg): - for watch_tag in pkg.metadata._xml_tree.find("upstream").findall("watch"): - try: - base, file_pattern = watch_tag.text.split(" ")[:2] - except ValueError: - base, file_pattern = watch_tag.text, None - - # the file pattern can be in the base url - pattern_regex = r"/([^/]*\([^/]*\)[^/]*)$" - match = re.search(pattern_regex, base) - if match: - file_pattern = match.group(1) - base = base.replace(file_pattern, "") - - # handle sf.net specially - base = base.replace( - "http://sf.net/", "http://qa.debian.org/watch/sf.php/" - ) - - vmangle = watch_tag.attrib.get("uversionmangle", None) or \ - watch_tag.attrib.get("versionmangle", None) - versionmangle = vmangle.split(";") if vmangle else [] - - umangle = watch_tag.attrib.get("downloadurlmangle", None) - urlmangle = umangle.split(";") if umangle else [] - - yield (base, file_pattern, versionmangle, urlmangle) - - -def handle_directory_patterns(base, file_pattern): - """ - Directory pattern matching - e.g.: base: ftp://ftp.nessus.org/pub/nessus/nessus-([\d\.]+)/src/ - file_pattern: nessus-core-([\d\.]+)\.tar\.gz - """ - splitted = base.split("/") - i = 0 - basedir = [] - for elem in splitted: - if re.search(is_pattern, elem): - break - basedir.append(elem) - i += 1 - basedir = "/".join(basedir) - directory_pattern = splitted[i] - final = "/".join(splitted[i + 1:]) - - try: - fp = helpers.urlopen(basedir) - except urllib2.URLError: - return [] - except IOError: - return [] - - if not fp: - return [] - - data = fp.read() - - if basedir.startswith("ftp://"): - scan_data = generic.scan_ftp(data, basedir, directory_pattern) - else: - scan_data = generic.scan_html(data, basedir, directory_pattern) - - return [("/".join((basedir, path, final)), file_pattern) - for _, path in scan_data] - - -def scan(pkg): - output.einfo("Using watch data") - - cp, ver, rev = portage.pkgsplit(pkg.cpv) - - results = [] - for base, file_pattern, versionmangle, urlmangle in parse_watch(pkg): - if not re.search(is_pattern, base): - steps = [(base, file_pattern)] - res = generic.scan_directory_recursive( - cp, ver, rev, "", steps, base - ) - else: - res = [] - for step in handle_directory_patterns(base, file_pattern): - res += generic.scan_directory_recursive( - cp, ver, rev, "", [step], base - ) - - results += clean_results(res, versionmangle, urlmangle) - return results diff --git a/pym/euscan/handlers/pear.py b/pym/euscan/handlers/pear.py new file mode 100644 index 0000000..2074e33 --- /dev/null +++ b/pym/euscan/handlers/pear.py @@ -0,0 +1,11 @@ +from euscan.handlers import php + +HANDLER_NAME = "pear" +CONFIDENCE = 100 +PRIORITY = 90 + +def can_handle(pkg, url=None): + return url and url.startswith('http://%s.php.net/get/' % HANDLER_NAME) + +scan_url = php.scan_url +scan_pkg = php.scan_pkg diff --git a/pym/euscan/handlers/pecl.py b/pym/euscan/handlers/pecl.py new file mode 100644 index 0000000..cf372d2 --- /dev/null +++ b/pym/euscan/handlers/pecl.py @@ -0,0 +1,11 @@ +from euscan.handlers import php + +HANDLER_NAME = "pecl" +CONFIDENCE = 100 +PRIORITY = 90 + +def can_handle(pkg, url=None): + return url and url.startswith('http://%s.php.net/get/' % HANDLER_NAME) + +scan_url = php.scan_url +scan_pkg = php.scan_pkg diff --git a/pym/euscan/handlers/url/php.py b/pym/euscan/handlers/php.py similarity index 54% rename from pym/euscan/handlers/url/php.py rename to pym/euscan/handlers/php.py index d0fef71..1a0117a 100644 --- a/pym/euscan/handlers/url/php.py +++ b/pym/euscan/handlers/php.py @@ -3,23 +3,17 @@ import portage import urllib2 import xml.dom.minidom -from euscan import helpers, output +from euscan import helpers, output, mangling HANDLER_NAME = "php" CONFIDENCE = 100 PRIORITY = 90 - -def can_handle(pkg, url): - if url.startswith('http://pear.php.net/get/'): - return True - if url.startswith('http://pecl.php.net/get/'): - return True +def can_handle(pkg, url=None): return False - def guess_package_and_channel(cp, url): - match = re.search('http://(.*)/get/(.*)-(.*).tgz', url) + match = re.search('http://(.*)\.php\.net/get/(.*)-(.*).tgz', url) if match: host = match.group(1) @@ -30,12 +24,17 @@ def guess_package_and_channel(cp, url): return pkg, host -def scan(pkg, url): - cp, ver, rev = portage.pkgsplit(pkg.cpv) - package, channel = guess_package_and_channel(cp, url) +def scan_url(pkg, url, options): + package, channel = guess_package_and_channel(pkg.cp, url) + return scan_pkg(pkg, {'type' : channel, 'data' : package }) - orig_url = url - url = 'http://%s/rest/r/%s/allreleases.xml' % (channel, package.lower()) +def scan_pkg(pkg, options): + cp, ver, rev = pkg.cp, pkg.version, pkg.revision + + package = options['data'] + channel = options['type'] + + url = 'http://%s.php.net/rest/r/%s/allreleases.xml' % (channel, package.lower()) output.einfo("Using: " + url) @@ -58,14 +57,12 @@ def scan(pkg, url): for node in nodes: up_pv = node.childNodes[0].data - pv = helpers.gentoo_mangle_version(up_pv) + pv = mangling.mangle_version(up_pv, options) if helpers.version_filtered(cp, ver, pv): continue - url = 'http://%s/get/%s-%s.tgz' % (channel, package, up_pv) - - if url == orig_url: - continue + url = 'http://%s.php.net/get/%s-%s.tgz' % (channel, package, up_pv) + url = mangling.mangle_url(url, options) ret.append((url, pv, HANDLER_NAME, CONFIDENCE)) diff --git a/pym/euscan/handlers/url/pypi.py b/pym/euscan/handlers/pypi.py similarity index 68% rename from pym/euscan/handlers/url/pypi.py rename to pym/euscan/handlers/pypi.py index 02428ee..c49046c 100644 --- a/pym/euscan/handlers/url/pypi.py +++ b/pym/euscan/handlers/pypi.py @@ -3,15 +3,15 @@ import re import portage -from euscan import helpers, output +from euscan import mangling, helpers, output HANDLER_NAME = "pypi" CONFIDENCE = 100 PRIORITY = 90 -def can_handle(pkg, url): - return url.startswith('mirror://pypi/') +def can_handle(pkg, url=None): + return url and url.startswith('mirror://pypi/') def guess_package(cp, url): @@ -24,19 +24,15 @@ def guess_package(cp, url): return pkg -def scan(pkg, url): +def scan_url(pkg, url, options): 'http://wiki.python.org/moin/PyPiXmlRpc' package = guess_package(pkg.cpv, url) - - ret = [] - for urls, pv in scan_remote(pkg, [package]): - ret.append((urls, pv, HANDLER_NAME, CONFIDENCE)) - return ret + return scan_kg(pkg, [package]) -def scan_remote(pkg, remote_data): - package = remote_data[0] +def scan_pkg(pkg, options): + package = options['data'] output.einfo("Using PyPi XMLRPC: " + package) @@ -52,10 +48,10 @@ def scan_remote(pkg, remote_data): ret = [] for up_pv in versions: - pv = helpers.gentoo_mangle_version(up_pv) + pv = mangling.mangle_version(up_pv, options) if helpers.version_filtered(cp, ver, pv): continue urls = client.release_urls(package, up_pv) - urls = " ".join([infos['url'] for infos in urls]) - ret.append((urls, pv)) + urls = " ".join([mangling.mangle_url(infos['url'], options) for infos in urls]) + ret.append((urls, pv, HANDLER_NAME, CONFIDENCE)) return ret diff --git a/pym/euscan/handlers/url/rubygems.py b/pym/euscan/handlers/rubygems.py similarity index 74% rename from pym/euscan/handlers/url/rubygems.py rename to pym/euscan/handlers/rubygems.py index 3b4facd..7fd4c02 100644 --- a/pym/euscan/handlers/url/rubygems.py +++ b/pym/euscan/handlers/rubygems.py @@ -3,15 +3,15 @@ import portage import json import urllib2 -from euscan import helpers, output +from euscan import helpers, output, mangling HANDLER_NAME = "rubygems" CONFIDENCE = 100 PRIORITY = 90 -def can_handle(pkg, url): - return url.startswith('mirror://rubygems/') +def can_handle(pkg, url=None): + return url and url.startswith('mirror://rubygems/') def guess_gem(cpv, url): @@ -29,7 +29,7 @@ def guess_gem(cpv, url): return pkg -def scan(pkg, url): +def scan_url(pkg, url, options): 'http://guides.rubygems.org/rubygems-org-api/#gemversion' gem = guess_gem(pkg.cpv, url) @@ -41,14 +41,11 @@ def scan(pkg, url): output.einfo("Using RubyGem API: %s" % gem) - ret = [] - for url, pv in scan_remote(pkg, [gem]): - ret.append(url, pv, HANDLER_NAME, CONFIDENCE) - return ret + return scan_pkg(pkg, {'data' : gem}) -def scan_remote(pkg, remote_data): - gem = remote_data[0] +def scan_pkg(pkg, options): + gem = options['data'] url = 'http://rubygems.org/api/v1/versions/%s.json' % gem try: @@ -69,9 +66,10 @@ def scan_remote(pkg, remote_data): ret = [] for version in versions: up_pv = version['number'] - pv = helpers.gentoo_mangle_version(up_pv) + pv = mangling.mangle_version(up_pv, options) if helpers.version_filtered(cp, ver, pv): continue url = 'http://rubygems.org/gems/%s-%s.gem' % (gem, up_pv) - ret.append((url, pv)) + url = mangling.mangle_url(url, options) + ret.append((url, pv, HANDLER_NAME, CONFIDENCE)) return ret diff --git a/pym/euscan/handlers/url.py b/pym/euscan/handlers/url.py new file mode 100644 index 0000000..b5b22da --- /dev/null +++ b/pym/euscan/handlers/url.py @@ -0,0 +1,98 @@ +import re +import urllib2 + +import portage + +import generic +from euscan import output, helpers + +PRIORITY = 100 + +HANDLER_NAME = "url" +CONFIDENCE = 100.0 + + +is_pattern = r"\([^\/]+\)" + +def can_handle(*args): + return False + +def handle_directory_patterns(base, file_pattern): + """ + Directory pattern matching + e.g.: base: ftp://ftp.nessus.org/pub/nessus/nessus-([\d\.]+)/src/ + file_pattern: nessus-core-([\d\.]+)\.tar\.gz + """ + splitted = base.split("/") + i = 0 + basedir = [] + for elem in splitted: + if re.search(is_pattern, elem): + break + basedir.append(elem) + i += 1 + basedir = "/".join(basedir) + directory_pattern = splitted[i] + final = "/".join(splitted[i + 1:]) + + try: + fp = helpers.urlopen(basedir) + except urllib2.URLError: + return [] + except IOError: + return [] + + if not fp: + return [] + + data = fp.read() + + if basedir.startswith("ftp://"): + scan_data = generic.scan_ftp(data, basedir, directory_pattern) + else: + scan_data = generic.scan_html(data, basedir, directory_pattern) + + return [("/".join((basedir, path, final)), file_pattern) + for _, path in scan_data] + +def read_options(options): + try: + base, file_pattern = options['data'].split(" ")[:2] + except ValueError: + base, file_pattern = options['data'], None + + # the file pattern can be in the base url + pattern_regex = r"/([^/]*\([^/]*\)[^/]*)$" + match = re.search(pattern_regex, base) + if match: + file_pattern = match.group(1) + base = base.replace(file_pattern, "") + + # handle sf.net specially + base = base.replace( + "http://sf.net/", "http://qa.debian.org/watch/sf.php/" + ) + + return base, file_pattern + +def scan_pkg(pkg, options): + output.einfo("Using watch data") + + cp, ver, rev = pkg.cp, pkg.version, pkg.revision + + base, file_pattern = read_options(options) + + results = [] + if not re.search(is_pattern, base): + steps = [(base, file_pattern)] + results = generic.scan_directory_recursive( + cp, ver, rev, "", steps, base, options + ) + else: + for step in handle_directory_patterns(base, file_pattern): + results += generic.scan_directory_recursive( + cp, ver, rev, "", [step], base, options + ) + + return results + diff --git a/pym/euscan/handlers/url/__init__.py b/pym/euscan/handlers/url/__init__.py deleted file mode 100644 index 7328644..0000000 --- a/pym/euscan/handlers/url/__init__.py +++ /dev/null @@ -1,19 +0,0 @@ -""" -Url wide handlers for scanning upstream -""" - -import pkgutil - -handlers = [] - -# autoimport all modules in this directory and append them to handlers list -for loader, module_name, is_pkg in pkgutil.walk_packages(__path__): - module = loader.find_module(module_name).load_module(module_name) - handlers.append(module) - -# sort handlers by priority -handlers = sorted( - handlers, - key=lambda handler: handler.PRIORITY, - reverse=True -) diff --git a/pym/euscan/helpers.py b/pym/euscan/helpers.py index ec721b7..ce5f2fb 100644 --- a/pym/euscan/helpers.py +++ b/pym/euscan/helpers.py @@ -34,120 +34,6 @@ _v_end = r'(?:(?:-|_)(?:pre|p|beta|b|alpha|a|rc|r)\d*)' _v = r'((?:\d+)(?:(?:\.\d+)*)(?:[a-zA-Z]*?)(?:' + _v_end + '*))' -# Stolen from g-pypi -def gentoo_mangle_version(up_pv): - """Convert PV to MY_PV if needed - - :param up_pv: Upstream package version - :type up_pv: string - :returns: pv - :rtype: string - - Can't determine PV from upstream's version. - Do our best with some well-known versioning schemes: - - * 1.0a1 (1.0_alpha1) - * 1.0-a1 (1.0_alpha1) - * 1.0b1 (1.0_beta1) - * 1.0-b1 (1.0_beta1) - * 1.0-r1234 (1.0_pre1234) - * 1.0dev-r1234 (1.0_pre1234) - * 1.0.dev-r1234 (1.0_pre1234) - * 1.0dev-20091118 (1.0_pre20091118) - - Regex match.groups(): - * pkgfoo-1.0.dev-r1234 - * group 1 pv major (1.0) - * group 2 replace this with portage suffix (.dev-r) - * group 3 suffix version (1234) - - The order of the regexes is significant. For instance if you have - .dev-r123, dev-r123 and -r123 you should order your regex's in - that order. - - The chronological portage release versions are: - - * _alpha - * _beta - * _pre - * _rc - * release - * _p - - **Example:** - - >>> gentoo_mangle_version('1.0b2') - '1.0_beta2' - - .. note:: - The number of regex's could have been reduced, but we use four - number of match.groups every time to simplify the code - - """ - bad_suffixes = re.compile( - r'((?:[._-]*)(?:dev|devel|final|stable|snapshot)$)', re.I) - revision_suffixes = re.compile( - r'(.*?)([\._-]*(?:r|patch|p)[\._-]*)([0-9]*)$', re.I) - suf_matches = { - '_pre': [ - r'(.*?)([\._-]*dev[\._-]*r?)([0-9]+)$', - r'(.*?)([\._-]*(?:pre|preview)[\._-]*)([0-9]*)$', - ], - '_alpha': [ - r'(.*?)([\._-]*(?:alpha|test)[\._-]*)([0-9]*)$', - r'(.*?)([\._-]*a[\._-]*)([0-9]*)$', - r'(.*[^a-z])(a)([0-9]*)$', - ], - '_beta': [ - r'(.*?)([\._-]*beta[\._-]*)([0-9]*)$', - r'(.*?)([\._-]*b)([0-9]*)$', - r'(.*[^a-z])(b)([0-9]*)$', - ], - '_rc': [ - r'(.*?)([\._-]*rc[\._-]*)([0-9]*)$', - r'(.*?)([\._-]*c[\._-]*)([0-9]*)$', - r'(.*[^a-z])(c[\._-]*)([0-9]+)$', - ], - } - rs_match = None - pv = up_pv - additional_version = "" - - rev_match = revision_suffixes.search(up_pv) - if rev_match: - pv = up_pv = rev_match.group(1) - replace_me = rev_match.group(2) - rev = rev_match.group(3) - additional_version = '_p' + rev - - for this_suf in suf_matches.keys(): - if rs_match: - break - for regex in suf_matches[this_suf]: - rsuffix_regex = re.compile(regex, re.I) - rs_match = rsuffix_regex.match(up_pv) - if rs_match: - portage_suffix = this_suf - break - - if rs_match: - # e.g. 1.0.dev-r1234 - major_ver = rs_match.group(1) # 1.0 - replace_me = rs_match.group(2) # .dev-r - rev = rs_match.group(3) # 1234 - pv = major_ver + portage_suffix + rev - else: - # Single suffixes with no numeric component are simply removed. - match = bad_suffixes.search(up_pv) - if match: - suffix = match.groups()[0] - pv = up_pv[: - (len(suffix))] - - pv = pv + additional_version - - return pv - - def cast_int_components(version): for i, obj in enumerate(version): try: @@ -520,7 +406,6 @@ def basedir_from_template(template): return template[0:idx] - def generate_scan_paths(url): prefix, chunks = url.split('://') chunks = chunks.split('/') diff --git a/pym/euscan/mangling.py b/pym/euscan/mangling.py new file mode 100644 index 0000000..60534c4 --- /dev/null +++ b/pym/euscan/mangling.py @@ -0,0 +1,163 @@ +import re + +import euscan.handlers + +def apply_mangling_rule(mangle, string): + # convert regex from perl format to python format + # there are some regex in this format: s/pattern/replacement/ + m = re.match(r"s/(.*[^\\])/(.*)/", mangle) + if not m: + # or in this format s|pattern|replacement| + m = re.match(r"s\|(.*[^\\])\|(.*)\|", mangle) + if not m: # Not a known regex format + return string + pattern, repl = m.groups() + repl = re.sub(r"\$(\d+)", r"\\\1", repl) + + return re.sub(pattern, repl, string) + +def apply_mangling_rules(kind, rules, string): + """ + Apply multiple mangling rules (both sed-like and handlers) + in order + """ + + if kind not in rules: + return string + + for rule in rules[kind]: + ret = None + + # First try handlers rules + if rule == 'gentoo' and kind == 'version': + ret = gentoo_mangle_version(string) + elif kind == 'downloadurlmangle': + ret = euscan.handlers.mangle_url(rule, string) + elif kind == 'versionmangle': + ret = euscan.handlers.mangle_version(rule, string) + + if ret is not None: # Use return value as new string if not None + string = ret + else: # Apply sed like rules + string = apply_mangling_rule(rule, string) + + return string + +def mangle_version(up_pv, options): + return apply_mangling_rules('versionmangle', options, up_pv) + +def mangle_url(url, options): + return apply_mangling_rules('downloadurlmangle', options, url) + +# Stolen from g-pypi +def gentoo_mangle_version(up_pv): + """Convert PV to MY_PV if needed + + :param up_pv: Upstream package version + :type up_pv: string + :returns: pv + :rtype: string + + Can't determine PV from upstream's version. + Do our best with some well-known versioning schemes: + + * 1.0a1 (1.0_alpha1) + * 1.0-a1 (1.0_alpha1) + * 1.0b1 (1.0_beta1) + * 1.0-b1 (1.0_beta1) + * 1.0-r1234 (1.0_pre1234) + * 1.0dev-r1234 (1.0_pre1234) + * 1.0.dev-r1234 (1.0_pre1234) + * 1.0dev-20091118 (1.0_pre20091118) + + Regex match.groups(): + * pkgfoo-1.0.dev-r1234 + * group 1 pv major (1.0) + * group 2 replace this with portage suffix (.dev-r) + * group 3 suffix version (1234) + + The order of the regexes is significant. For instance if you have + .dev-r123, dev-r123 and -r123 you should order your regex's in + that order. + + The chronological portage release versions are: + + * _alpha + * _beta + * _pre + * _rc + * release + * _p + + **Example:** + + >>> gentoo_mangle_version('1.0b2') + '1.0_beta2' + + .. note:: + The number of regex's could have been reduced, but we use four + number of match.groups every time to simplify the code + + """ + bad_suffixes = re.compile( + r'((?:[._-]*)(?:dev|devel|final|stable|snapshot)$)', re.I) + revision_suffixes = re.compile( + r'(.*?)([\._-]*(?:r|patch|p)[\._-]*)([0-9]*)$', re.I) + suf_matches = { + '_pre': [ + r'(.*?)([\._-]*dev[\._-]*r?)([0-9]+)$', + r'(.*?)([\._-]*(?:pre|preview)[\._-]*)([0-9]*)$', + ], + '_alpha': [ + r'(.*?)([\._-]*(?:alpha|test)[\._-]*)([0-9]*)$', + r'(.*?)([\._-]*a[\._-]*)([0-9]*)$', + r'(.*[^a-z])(a)([0-9]*)$', + ], + '_beta': [ + r'(.*?)([\._-]*beta[\._-]*)([0-9]*)$', + r'(.*?)([\._-]*b)([0-9]*)$', + r'(.*[^a-z])(b)([0-9]*)$', + ], + '_rc': [ + r'(.*?)([\._-]*rc[\._-]*)([0-9]*)$', + r'(.*?)([\._-]*c[\._-]*)([0-9]*)$', + r'(.*[^a-z])(c[\._-]*)([0-9]+)$', + ], + } + rs_match = None + pv = up_pv + additional_version = "" + + rev_match = revision_suffixes.search(up_pv) + if rev_match: + pv = up_pv = rev_match.group(1) + replace_me = rev_match.group(2) + rev = rev_match.group(3) + additional_version = '_p' + rev + + for this_suf in suf_matches.keys(): + if rs_match: + break + for regex in suf_matches[this_suf]: + rsuffix_regex = re.compile(regex, re.I) + rs_match = rsuffix_regex.match(up_pv) + if rs_match: + portage_suffix = this_suf + break + + if rs_match: + # e.g. 1.0.dev-r1234 + major_ver = rs_match.group(1) # 1.0 + replace_me = rs_match.group(2) # .dev-r + rev = rs_match.group(3) # 1234 + pv = major_ver + portage_suffix + rev + else: + # Single suffixes with no numeric component are simply removed. + match = bad_suffixes.search(up_pv) + if match: + suffix = match.groups()[0] + pv = up_pv[: - (len(suffix))] + + pv = pv + additional_version + + return pv diff --git a/pym/euscan/scan.py b/pym/euscan/scan.py index f5e13cf..a55c6d9 100644 --- a/pym/euscan/scan.py +++ b/pym/euscan/scan.py @@ -44,24 +44,6 @@ def filter_versions(cp, versions): ] -# gentoolkit stores PORTDB, so even if we modify it to add an overlay -# it will still use the old dbapi -def reload_gentoolkit(): - from gentoolkit import dbapi - import gentoolkit.package - import gentoolkit.query - - PORTDB = portage.db[portage.root]["porttree"].dbapi - dbapi.PORTDB = PORTDB - - if hasattr(dbapi, 'PORTDB'): - dbapi.PORTDB = PORTDB - if hasattr(gentoolkit.package, 'PORTDB'): - gentoolkit.package.PORTDB = PORTDB - if hasattr(gentoolkit.query, 'PORTDB'): - gentoolkit.query.PORTDB = PORTDB - - def scan_upstream(query, on_progress=None): """ Scans the upstream searching new versions for the given query