From 35603fd704fa86589fe3df4480182b0cec852559 Mon Sep 17 00:00:00 2001 From: volpino Date: Thu, 26 Jul 2012 17:46:53 +0200 Subject: [PATCH] euscan: Added remote-id handler Signed-off-by: volpino --- pym/euscan/handlers/package/remote_id.py | 43 ++++++ pym/euscan/handlers/url/cpan.py | 23 ++- pym/euscan/handlers/url/github.py | 5 +- pym/euscan/handlers/url/pypi.py | 13 +- .../handlers/url/{rubygem.py => rubygems.py} | 22 +-- pym/euscan/handlers/watch.py | 143 ------------------ 6 files changed, 85 insertions(+), 164 deletions(-) create mode 100644 pym/euscan/handlers/package/remote_id.py rename pym/euscan/handlers/url/{rubygem.py => rubygems.py} (82%) delete mode 100644 pym/euscan/handlers/watch.py diff --git a/pym/euscan/handlers/package/remote_id.py b/pym/euscan/handlers/package/remote_id.py new file mode 100644 index 0000000..b95f856 --- /dev/null +++ b/pym/euscan/handlers/package/remote_id.py @@ -0,0 +1,43 @@ +from euscan.handlers.url import handlers +from euscan import output + +PRIORITY = 100 + +HANDLER_NAME = "remote_id" +CONFIDENCE = 100.0 + + +url_handlers = {handler.HANDLER_NAME: handler for handler in handlers} + + +def can_handle(pkg): + # Return True if there's at least one remote-id that can be + # handled by euscan + try: + remoteids = pkg.metadata.upstream()[0].upstream_remoteids() + except IndexError: + pass + else: + if len(remoteids) > 0: + for remote_value, remote_type in remoteids: + if remote_type in url_handlers: + return True + return False + + +def scan(pkg): + output.einfo("Using remote-id data") + + ret = [] + + remoteids = pkg.metadata.upstream()[0].upstream_remoteids() + for remote_value, remote_type in remoteids: + if remote_type in url_handlers: + remote_data = remote_value.split("/") + scan_remote = getattr( + url_handlers[remote_type], "scan_remote", None + ) + if scan_remote: + for url, pv in scan_remote(pkg, remote_data): + ret.append((url, pv, HANDLER_NAME, CONFIDENCE)) + return ret diff --git a/pym/euscan/handlers/url/cpan.py b/pym/euscan/handlers/url/cpan.py index 0f587e1..0721324 100644 --- a/pym/euscan/handlers/url/cpan.py +++ b/pym/euscan/handlers/url/cpan.py @@ -81,12 +81,22 @@ def cpan_vercmp(cp, a, b): def scan(pkg, url): cp, ver, rev = portage.pkgsplit(pkg.cpv) - pkg = guess_package(cp, url) + remote_pkg = guess_package(cp, url) - orig_url = url - url = 'http://search.cpan.org/api/dist/%s' % pkg + output.einfo("Using CPAN API: %s", remote_pkg) - output.einfo("Using: " + url) + result = scan_remote(pkg, [remote_pkg]) + + ret = [] + for url, pv in result: + ret.append((url, pv, HANDLER_NAME, CONFIDENCE)) + return ret + + +def scan_remote(pkg, remote_data): + remote_pkg = remote_data[0] + url = 'http://search.cpan.org/api/dist/%s' % remote_pkg + cp, ver, rev = portage.pkgsplit(pkg.cpv) try: fp = helpers.urlopen(url) @@ -125,9 +135,6 @@ def scan(pkg, url): version['archive'] ) - if url == orig_url: - continue - - ret.append((url, pv, HANDLER_NAME, CONFIDENCE)) + ret.append((url, pv)) return ret diff --git a/pym/euscan/handlers/url/github.py b/pym/euscan/handlers/url/github.py index 080a559..e4ebe10 100644 --- a/pym/euscan/handlers/url/github.py +++ b/pym/euscan/handlers/url/github.py @@ -44,6 +44,7 @@ def scan(pkg, url): (user, project)) dls = json.load(dlreq) + ret = [] for dl in dls: m = fnre.match(dl['name']) @@ -51,4 +52,6 @@ def scan(pkg, url): pv = helpers.gentoo_mangle_version(m.group(1)) if helpers.version_filtered(cp, ver, pv): continue - yield (dl['html_url'], pv, HANDLER_NAME, CONFIDENCE) + + ret.append((dl['html_url'], pv, HANDLER_NAME, CONFIDENCE)) + return ret diff --git a/pym/euscan/handlers/url/pypi.py b/pym/euscan/handlers/url/pypi.py index 8ed8021..82251e6 100644 --- a/pym/euscan/handlers/url/pypi.py +++ b/pym/euscan/handlers/url/pypi.py @@ -29,6 +29,15 @@ def scan(pkg, url): package = guess_package(pkg.cpv, url) + ret = [] + for urls, pv in scan_remote(pkg, [package]): + ret.append((urls, pv, HANDLER_NAME, CONFIDENCE)) + return ret + + +def scan_remote(pkg, remote_data): + package = remote_data[0] + output.einfo("Using PyPi XMLRPC: " + package) client = xmlrpclib.ServerProxy('http://pypi.python.org/pypi') @@ -42,13 +51,11 @@ def scan(pkg, url): cp, ver, rev = portage.pkgsplit(pkg.cpv) ret = [] - for up_pv in versions: pv = helpers.gentoo_mangle_version(up_pv) if helpers.version_filtered(cp, ver, pv): continue urls = client.release_urls(package, up_pv) urls = " ".join([infos['url'] for infos in urls]) - ret.append((urls, pv, HANDLER_NAME, CONFIDENCE)) - + ret.append((urls, pv)) return ret diff --git a/pym/euscan/handlers/url/rubygem.py b/pym/euscan/handlers/url/rubygems.py similarity index 82% rename from pym/euscan/handlers/url/rubygem.py rename to pym/euscan/handlers/url/rubygems.py index 950e81b..a3021f0 100644 --- a/pym/euscan/handlers/url/rubygem.py +++ b/pym/euscan/handlers/url/rubygems.py @@ -5,7 +5,7 @@ import urllib2 from euscan import helpers, output -HANDLER_NAME = "rubygem" +HANDLER_NAME = "rubygems" CONFIDENCE = 100.0 PRIORITY = 90 @@ -33,14 +33,23 @@ def scan(pkg, url): 'http://guides.rubygems.org/rubygems-org-api/#gemversion' gem = guess_gem(pkg.cpv, url) + if not gem: output.eerror("Can't guess gem name using %s and %s" % \ (pkg.cpv, url)) return [] - url = 'http://rubygems.org/api/v1/versions/%s.json' % gem + output.einfo("Using RubyGem API: %s" % gem) - output.einfo("Using: " + url) + ret = [] + for url, pv in scan_remote(pkg, [gem]): + ret.append(url, pv, HANDLER_NAME, CONFIDENCE) + return ret + + +def scan_remote(pkg, remote_data): + gem = remote_data[0] + url = 'http://rubygems.org/api/v1/versions/%s.json' % gem try: fp = helpers.urlopen(url) @@ -55,19 +64,14 @@ def scan(pkg, url): data = fp.read() versions = json.loads(data) - if not versions: - return [] - cp, ver, rev = portage.pkgsplit(pkg.cpv) ret = [] - for version in versions: up_pv = version['number'] pv = helpers.gentoo_mangle_version(up_pv) if helpers.version_filtered(cp, ver, pv): continue url = 'http://rubygems.org/gems/%s-%s.gem' % (gem, up_pv) - ret.append((url, pv, HANDLER_NAME, CONFIDENCE)) - + ret.append((url, pv)) return ret diff --git a/pym/euscan/handlers/watch.py b/pym/euscan/handlers/watch.py deleted file mode 100644 index a129281..0000000 --- a/pym/euscan/handlers/watch.py +++ /dev/null @@ -1,143 +0,0 @@ -import re -import urllib2 - -import portage - -from euscan.handlers import generic -from euscan import output, helpers - -PRIORITY = 100 - -HANDLER_NAME = "watch" -CONFIDENCE = 100.0 - - -is_pattern = r"\([^\/]+\)" - - -def can_handle(pkg, url): - try: - return pkg.metadata._xml_tree.find("upstream").find("watch") \ - is not None - except AttributeError: - return False - - -def parse_mangles(mangles, string): - for mangle in mangles: - # convert regex from perl format to python format - # there are some regex in this format: s/pattern/replacement/ - m = re.match(r"s/(.*[^\\])/(.*)/", mangle) - if not m: - # or in this format s|pattern|replacement| - m = re.match(r"s\|(.*[^\\])\|(.*)\|", mangle) - pattern, repl = m.groups() - repl = re.sub(r"\$(\d+)", r"\\\1", repl) - string = re.sub(pattern, repl, string) - return string - - -def clean_results(results, versionmangle, urlmangle): - ret = [] - - for path, version, _, _ in results: - version = parse_mangles(versionmangle, version) - path = parse_mangles(urlmangle, path) - ret.append((path, version, HANDLER_NAME, CONFIDENCE)) - - return ret - - -def parse_watch(pkg): - for watch_tag in pkg.metadata._xml_tree.find("upstream").findall("watch"): - try: - base, file_pattern = watch_tag.text.split(" ")[:2] - except ValueError: - base, file_pattern = watch_tag.text, None - - # the file pattern can be in the base url - pattern_regex = r"/([^/]*\([^/]*\)[^/]*)$" - match = re.search(pattern_regex, base) - if match: - file_pattern = match.group(1) - base = base.replace(file_pattern, "") - - # handle sf.net specially - base = base.replace( - "http://sf.net/", "http://qa.debian.org/watch/sf.php/" - ) - - vmangle = watch_tag.attrib.get("uversionmangle", None) or \ - watch_tag.attrib.get("versionmangle", None) - versionmangle = vmangle.split(";") if vmangle else [] - - umangle = watch_tag.attrib.get("downloadurlmangle", None) - urlmangle = umangle.split(";") if umangle else [] - - yield (base, file_pattern, versionmangle, urlmangle) - - -def handle_directory_patterns(base, file_pattern): - """ - Directory pattern matching - e.g.: base: ftp://ftp.nessus.org/pub/nessus/nessus-([\d\.]+)/src/ - file_pattern: nessus-core-([\d\.]+)\.tar\.gz - """ - splitted = base.split("/") - i = 0 - basedir = [] - for elem in splitted: - if re.search(is_pattern, elem): - break - basedir.append(elem) - i += 1 - basedir = "/".join(basedir) - directory_pattern = splitted[i] - final = "/".join(splitted[i + 1:]) - - try: - fp = helpers.urlopen(basedir) - except urllib2.URLError: - return [] - except IOError: - return [] - - if not fp: - return [] - - data = fp.read() - - if basedir.startswith("ftp://"): - scan_data = generic.scan_ftp(data, basedir, directory_pattern) - else: - scan_data = generic.scan_html(data, basedir, directory_pattern) - - return [("/".join((basedir, path, final)), file_pattern) - for _, path in scan_data] - - -def scan(pkg, url): - output.einfo("Using watch data") - - cp, ver, rev = portage.pkgsplit(pkg.cpv) - - results = [] - for base, file_pattern, versionmangle, urlmangle in parse_watch(pkg): - if not re.search(is_pattern, base): - steps = [(base, file_pattern)] - res = generic.scan_directory_recursive( - cp, ver, rev, "", steps, url - ) - else: - res = [] - for step in handle_directory_patterns(base, file_pattern): - res += generic.scan_directory_recursive( - cp, ver, rev, "", [step], url - ) - - results += clean_results(res, versionmangle, urlmangle) - return results - - -def brute_force(pkg, url): - return []