From a18083bd988213251fd1e5a2ee1e94c8512e56cb Mon Sep 17 00:00:00 2001 From: volpino Date: Wed, 23 May 2012 16:30:43 +0200 Subject: [PATCH] euscan: json format output Now "-f json" seems good, it supports the handler type used to retrieve each version and outputs metadata. Signed-off-by: volpino --- bin/euscan | 29 +++++++------ pym/euscan/__init__.py | 61 ++++++++++++++++++-------- pym/euscan/handlers/cpan.py | 9 ++-- pym/euscan/handlers/generic.py | 39 +++++++++-------- pym/euscan/handlers/kde.py | 4 +- pym/euscan/handlers/php.py | 9 ++-- pym/euscan/handlers/pypi.py | 9 ++-- pym/euscan/handlers/rubygem.py | 11 ++--- pym/euscan/scan.py | 79 +++++++++++++++++++--------------- 9 files changed, 150 insertions(+), 100 deletions(-) diff --git a/bin/euscan b/bin/euscan index c4da8a4..011d9ec 100755 --- a/bin/euscan +++ b/bin/euscan @@ -212,7 +212,7 @@ def main(): # parse command line options and actions try: - packages = parse_args() + queries = parse_args() except ParseArgsException as e: if e.value == 'help': print_usage(help='all') @@ -233,11 +233,13 @@ def main(): if CONFIG['verbose'] > 2: httplib.HTTPConnection.debuglevel = 1 - for package in packages: + for query in queries: ret = [] + output.set_query(query) + try: - ret = scan_upstream(package) + ret = scan_upstream(query) except AmbiguousPackageName as e: pkgs = e.args[0] output.eerror("\n".join(pkgs)) @@ -252,24 +254,27 @@ def main(): exit_helper(1) except GentoolkitException as err: - output.eerror('%s: %s' % (package, str(err))) + output.eerror('%s: %s' % (query, str(err))) exit_helper(1) except Exception as err: - output.eerror('%s: %s' % (package, str(err))) + output.eerror('%s: %s' % (query, str(err))) exit_helper(1) if not CONFIG['quiet'] and not CONFIG['format']: print() - for cp, url, version in ret: - output.result(cp, version, url) + if ret is not None: + if len(ret) > 0: + for cp, url, version, handler in ret: + output.result(cp, version, url, handler) + elif not CONFIG['quiet']: + output.ewarn( + "Didn't find any new version, check package's homepage " + + "for more informations" + ) - if not len(ret) and not CONFIG['quiet']: - output.ewarn( - "Didn't find any new version, check package's homepage for " + - "more informations" - ) + output.set_query(None) if __name__ == "__main__": diff --git a/pym/euscan/__init__.py b/pym/euscan/__init__.py index ef7e4ed..b61528b 100644 --- a/pym/euscan/__init__.py +++ b/pym/euscan/__init__.py @@ -59,10 +59,11 @@ BRUTEFORCE_BLACKLIST_PACKAGES = [ BRUTEFORCE_BLACKLIST_URLS = [ 'http://(.*)dockapps.org/download.php/id/(.*)', # infinite loop 'http://hydra.nixos.org/build/(.*)', # infinite loop - 'http://www.rennings.net/gentoo/distfiles/(.*)', # Doesn't respect 404, infinite loop - 'http://art.gnome.org/download/(.*)', # Doesn't respect 404, infinite loop - 'http://barelysufficient.org/~olemarkus/(.*)', # Doesn't respect 404, infinite loop - 'http://olemarkus.org/~olemarkus/(.*)', # Doesn't respect 404, infinite loop + # Doesn't respect 404, infinite loop + 'http://www.rennings.net/gentoo/distfiles/(.*)', + 'http://art.gnome.org/download/(.*)', + 'http://barelysufficient.org/~olemarkus/(.*)', + 'http://olemarkus.org/~olemarkus/(.*)', ] ROBOTS_TXT_BLACKLIST_DOMAINS = [ @@ -96,26 +97,45 @@ class EuscanOutput(object): """ def __init__(self, config): self.config = config - self.data = defaultdict(StringIO) - self.packages = defaultdict(list) + self.queries = defaultdict(dict) + self.current_query = None + + def set_query(self, query): + self.current_query = query + if query is not None: + if not query in self.queries: + self.queries[query] = { + "messages": defaultdict(StringIO), + "result": [], + "metadata": {}, + } def get_formatted_output(self): data = {} - for key in self.data: - if key not in ("ebegin", "eend"): - val = [x for x in self.data[key].getvalue().split("\n") if x] - data[key] = val - data["result"] = self.packages + for query in self.queries: + data[query] = { + "result": self.queries[query]["result"], + "metadata": self.queries[query]["metadata"], + "messages": {} + } + for key in self.queries[query]["messages"]: + if key not in ("ebegin", "eend"): + _msg = self.queries[query]["messages"][key].getvalue() + val = [x for x in _msg.split("\n") if x] + data[query]["messages"][key] = val if self.config["format"].lower() == "json": return json.dumps(data, indent=self.config["indent"]) else: raise TypeError("Invalid output format") - def result(self, cp, version, url): + def result(self, cp, version, url, handler): if self.config['format']: - self.packages[cp].append({"version": version, "url": url}) + _curr = self.queries[self.current_query] + _curr["result"].append( + {"version": version, "urls": [url], "handler": handler} + ) else: if not self.config['quiet']: print "Upstream Version:", pp.number("%s" % version), @@ -123,16 +143,21 @@ class EuscanOutput(object): else: print pp.cpv("%s-%s" % (cp, version)) + ":", pp.path(url) - def __getattr__(self, key): - output_file = self.data[key] if self.config["format"] else None + def metadata(self, key, value, show=True): + if self.config["format"]: + self.queries[self.current_query]["metadata"][key] = value + elif show: + print "%s: %s" % (key.capitalize(), value) - if output_file: - _output = EOutputFile(out_file=self.data[key], + def __getattr__(self, key): + if self.config["format"]: + out_file = self.queries[self.current_query]["messages"][key] + + _output = EOutputFile(out_file=out_file, quiet=self.config['quiet']) ret = getattr(_output, key) else: ret = getattr(EOutputFile(quiet=self.config['quiet']), key) - return ret diff --git a/pym/euscan/handlers/cpan.py b/pym/euscan/handlers/cpan.py index 1b5a111..0546cb7 100644 --- a/pym/euscan/handlers/cpan.py +++ b/pym/euscan/handlers/cpan.py @@ -3,8 +3,9 @@ import portage import urllib2 import json -from euscan import helpers -import euscan +from euscan import helpers, output + +HANDLER_NAME = "cpan" _cpan_package_name_re = re.compile("mirror://cpan/authors/.*/([^/.]*).*") @@ -83,7 +84,7 @@ def scan(cpv, url): orig_url = url url = 'http://search.cpan.org/api/dist/%s' % pkg - euscan.output.einfo("Using: " + url) + output.einfo("Using: " + url) try: fp = helpers.urlopen(url) @@ -125,7 +126,7 @@ def scan(cpv, url): if url == orig_url: continue - ret.append((url, pv)) + ret.append((url, pv, HANDLER_NAME)) return ret diff --git a/pym/euscan/handlers/generic.py b/pym/euscan/handlers/generic.py index ea94dad..9a4c74e 100644 --- a/pym/euscan/handlers/generic.py +++ b/pym/euscan/handlers/generic.py @@ -7,9 +7,10 @@ from BeautifulSoup import BeautifulSoup import portage from euscan import CONFIG, SCANDIR_BLACKLIST_URLS, \ - BRUTEFORCE_BLACKLIST_PACKAGES, BRUTEFORCE_BLACKLIST_URLS -from euscan import helpers -import euscan + BRUTEFORCE_BLACKLIST_PACKAGES, BRUTEFORCE_BLACKLIST_URLS, output, helpers + +HANDLER_NAME = "generic" +BRUTEFORCE_HANDLER_NAME = "brute_force" def scan_html(data, url, pattern): @@ -53,7 +54,7 @@ def scan_directory_recursive(cp, ver, rev, url, steps, orig_url): steps = steps[1:] - euscan.output.einfo("Scanning: %s" % url) + output.einfo("Scanning: %s" % url) try: fp = helpers.urlopen(url) @@ -87,7 +88,7 @@ def scan_directory_recursive(cp, ver, rev, url, steps, orig_url): path = url + path if not steps and path not in orig_url: - versions.append((path, pv)) + versions.append((path, pv, HANDLER_NAME)) if steps: ret = scan_directory_recursive(cp, ver, rev, path, steps, orig_url) @@ -99,7 +100,7 @@ def scan_directory_recursive(cp, ver, rev, url, steps, orig_url): def scan(cpv, url): for bu in SCANDIR_BLACKLIST_URLS: if re.match(bu, url): - euscan.output.einfo("%s is blacklisted by rule %s" % (url, bu)) + output.einfo("%s is blacklisted by rule %s" % (url, bu)) return [] resolved_url = helpers.parse_mirror(url) @@ -112,23 +113,25 @@ def scan(cpv, url): if ver not in resolved_url: newver = helpers.version_change_end_sep(ver) if newver and newver in resolved_url: - euscan.output.einfo( + output.einfo( "Version: using %s instead of %s" % (newver, ver) ) ver = newver template = helpers.template_from_url(resolved_url, ver) if '${' not in template: - euscan.output.einfo( + output.einfo( "Url doesn't seems to depend on version: %s not found in %s" % (ver, resolved_url) ) return [] else: - euscan.output.einfo("Scanning: %s" % template) + output.einfo("Scanning: %s" % template) steps = helpers.generate_scan_paths(template) - return scan_directory_recursive(cp, ver, rev, "", steps, url) + ret = scan_directory_recursive(cp, ver, rev, "", steps, url) + + return ret def brute_force(cpv, url): @@ -140,15 +143,15 @@ def brute_force(cpv, url): for bp in BRUTEFORCE_BLACKLIST_PACKAGES: if re.match(bp, cp): - euscan.output.einfo("%s is blacklisted by rule %s" % (cp, bp)) + output.einfo("%s is blacklisted by rule %s" % (cp, bp)) return [] for bp in BRUTEFORCE_BLACKLIST_URLS: if re.match(bp, url): - euscan.output.einfo("%s is blacklisted by rule %s" % (cp, bp)) + output.einfo("%s is blacklisted by rule %s" % (cp, bp)) return [] - euscan.output.einfo("Generating version from " + ver) + output.einfo("Generating version from " + ver) components = helpers.split_version(ver) versions = helpers.gen_versions(components, CONFIG["brute-force"]) @@ -159,18 +162,18 @@ def brute_force(cpv, url): versions.remove(v) if not versions: - euscan.output.einfo("Can't generate new versions from " + ver) + output.einfo("Can't generate new versions from " + ver) return [] template = helpers.template_from_url(url, ver) if '${PV}' not in template: - euscan.output.einfo( + output.einfo( "Url doesn't seems to depend on full version: %s not found in %s" % (ver, url)) return [] else: - euscan.output.einfo("Brute forcing: %s" % template) + output.einfo("Brute forcing: %s" % template) result = [] @@ -195,10 +198,10 @@ def brute_force(cpv, url): if not infos: continue - result.append([url, version]) + result.append([url, version, BRUTEFORCE_HANDLER_NAME]) if len(result) > CONFIG['brute-force-false-watermark']: - euscan.output.einfo( + output.einfo( "Broken server detected ! Skipping brute force." ) return [] diff --git a/pym/euscan/handlers/kde.py b/pym/euscan/handlers/kde.py index 00947c0..3736466 100644 --- a/pym/euscan/handlers/kde.py +++ b/pym/euscan/handlers/kde.py @@ -1,5 +1,7 @@ from euscan.handlers import generic +HANDLER_NAME = "kde" + def can_handle(cpv, url): if url.startswith('mirror://kde/'): @@ -13,7 +15,7 @@ def clean_results(results): for path, version in results: if version == '5SUMS': continue - ret.append((path, version)) + ret.append((path, version, HANDLER_NAME)) return ret diff --git a/pym/euscan/handlers/php.py b/pym/euscan/handlers/php.py index a4c7267..97a6322 100644 --- a/pym/euscan/handlers/php.py +++ b/pym/euscan/handlers/php.py @@ -3,8 +3,9 @@ import portage import urllib2 import xml.dom.minidom -from euscan import helpers -import euscan +from euscan import helpers, output + +HANDLER_NAME = "php" def can_handle(cpv, url): @@ -34,7 +35,7 @@ def scan(cpv, url): orig_url = url url = 'http://%s/rest/r/%s/allreleases.xml' % (channel, pkg.lower()) - euscan.output.einfo("Using: " + url) + output.einfo("Using: " + url) try: fp = helpers.urlopen(url) @@ -64,7 +65,7 @@ def scan(cpv, url): if url == orig_url: continue - ret.append((url, pv)) + ret.append((url, pv, HANDLER_NAME)) return ret diff --git a/pym/euscan/handlers/pypi.py b/pym/euscan/handlers/pypi.py index 1e477b7..4d52e4a 100644 --- a/pym/euscan/handlers/pypi.py +++ b/pym/euscan/handlers/pypi.py @@ -3,8 +3,9 @@ import re import portage -from euscan import helpers -import euscan +from euscan import helpers, output + +HANDLER_NAME = "pypi" def can_handle(cpv, url): @@ -26,7 +27,7 @@ def scan(cpv, url): package = guess_package(cpv, url) - euscan.output.einfo("Using PyPi XMLRPC: " + package) + output.einfo("Using PyPi XMLRPC: " + package) client = xmlrpclib.ServerProxy('http://pypi.python.org/pypi') versions = client.package_releases(package) @@ -46,7 +47,7 @@ def scan(cpv, url): continue urls = client.release_urls(package, up_pv) urls = " ".join([infos['url'] for infos in urls]) - ret.append((urls, pv)) + ret.append((urls, pv, HANDLER_NAME)) return ret diff --git a/pym/euscan/handlers/rubygem.py b/pym/euscan/handlers/rubygem.py index ec57a2a..52da00c 100644 --- a/pym/euscan/handlers/rubygem.py +++ b/pym/euscan/handlers/rubygem.py @@ -3,8 +3,9 @@ import portage import json import urllib2 -from euscan import helpers -import euscan +from euscan import helpers, output + +HANDLER_NAME = "rubygem" def can_handle(cpv, url): @@ -31,13 +32,13 @@ def scan(cpv, url): gem = guess_gem(cpv, url) if not gem: - euscan.output.eerror("Can't guess gem name using %s and %s" % \ + output.eerror("Can't guess gem name using %s and %s" % \ (cpv, url)) return [] url = 'http://rubygems.org/api/v1/versions/%s.json' % gem - euscan.output.einfo("Using: " + url) + output.einfo("Using: " + url) try: fp = helpers.urlopen(url) @@ -65,7 +66,7 @@ def scan(cpv, url): if helpers.version_filtered(cp, ver, pv): continue url = 'http://rubygems.org/gems/%s-%s.gem' % (gem, up_pv) - ret.append((url, pv)) + ret.append((url, pv, HANDLER_NAME)) return ret diff --git a/pym/euscan/scan.py b/pym/euscan/scan.py index a1f63aa..a931e53 100644 --- a/pym/euscan/scan.py +++ b/pym/euscan/scan.py @@ -1,8 +1,7 @@ import os -import sys +from datetime import datetime import portage - from portage.dbapi import porttree import gentoolkit.pprinter as pp @@ -10,28 +9,28 @@ from gentoolkit.query import Query from gentoolkit.eclean.search import (port_settings) from euscan import CONFIG, BLACKLIST_PACKAGES -from euscan import handlers -from euscan import helpers - -import euscan +from euscan import handlers, helpers, output def filter_versions(cp, versions): filtered = {} - for url, version in versions: + for url, version, handler in versions: - ''' Try to keep the most specific urls (determinted by the length) ''' + # Try to keep the most specific urls (determinted by the length) if version in filtered and len(url) < len(filtered[version]): continue - ''' Remove blacklisted versions ''' + # Remove blacklisted versions if helpers.version_blacklisted(cp, version): continue - filtered[version] = url + filtered[version] = {"url": url, "handler": handler} - return [(cp, filtered[version], version) for version in filtered] + return [ + (cp, filtered[version]["url"], version, filtered[version]["handler"]) + for version in filtered + ] def scan_upstream_urls(cpv, urls): @@ -41,20 +40,20 @@ def scan_upstream_urls(cpv, urls): for url in urls[filename]: if not CONFIG['quiet'] and not CONFIG['format']: pp.uprint() - euscan.output.einfo("SRC_URI is '%s'" % url) + output.einfo("SRC_URI is '%s'" % url) if '://' not in url: - euscan.output.einfo("Invalid url '%s'" % url) + output.einfo("Invalid url '%s'" % url) continue - ''' Try normal scan ''' + # Try normal scan if CONFIG["scan-dir"]: versions.extend(handlers.scan(cpv, url)) if versions and CONFIG['oneshot']: break - ''' Brute Force ''' + # Brute Force if CONFIG["brute-force"] > 0: versions.extend(handlers.brute_force(cpv, url)) @@ -79,10 +78,10 @@ def scan_upstream(query): ) if not matches: - sys.stderr.write( + output.ewarn( pp.warn("No package matching '%s'" % pp.pkgquery(query)) ) - return [] + return None matches = sorted(matches) pkg = matches.pop() @@ -91,29 +90,41 @@ def scan_upstream(query): pkg = matches.pop() if not pkg: - sys.stderr.write(pp.warn("Package '%s' only have a dev version (9999)" - % pp.pkgquery(pkg.cp))) - return [] + output.ewarn( + pp.warn("Package '%s' only have a dev version (9999)" + % pp.pkgquery(pkg.cp)) + ) + return None + + # useful data only for formatted output + output.metadata("datetime", datetime.now().isoformat(), show=False) + output.metadata("cp", pkg.cp, show=False) + output.metadata("cpv", pkg.cpv, show=False) if pkg.cp in BLACKLIST_PACKAGES: - sys.stderr.write( + output.ewarn( pp.warn("Package '%s' is blacklisted" % pp.pkgquery(pkg.cp)) ) - return [] + return None - if not CONFIG['quiet'] and not CONFIG['format']: - pp.uprint( - " * %s [%s]" % (pp.cpv(pkg.cpv), pp.section(pkg.repo_name())) - ) - pp.uprint() + if not CONFIG['quiet']: + if not CONFIG['format']: + pp.uprint( + " * %s [%s]" % (pp.cpv(pkg.cpv), pp.section(pkg.repo_name())) + ) + pp.uprint() + else: + output.metadata("overlay", pp.section(pkg.repo_name())) ebuild_path = pkg.ebuild_path() if ebuild_path: - pp.uprint('Ebuild: ' + pp.path(os.path.normpath(ebuild_path))) + output.metadata( + "ebuild", pp.path(os.path.normpath(ebuild_path)) + ) - pp.uprint('Repository: ' + pkg.repo_name()) - pp.uprint('Homepage: ' + pkg.environment("HOMEPAGE")) - pp.uprint('Description: ' + pkg.environment("DESCRIPTION")) + output.metadata("repository", pkg.repo_name()) + output.metadata("homepage", pkg.environment("HOMEPAGE")) + output.metadata("description", pkg.environment("DESCRIPTION")) cpv = pkg.cpv metadata = { @@ -125,11 +136,11 @@ def scan_upstream(query): alist = porttree._parse_uri_map(cpv, metadata, use=use) aalist = porttree._parse_uri_map(cpv, metadata) except Exception as e: - sys.stderr.write(pp.warn("%s\n" % str(e))) - sys.stderr.write( + output.ewarn(pp.warn("%s\n" % str(e))) + output.ewarn( pp.warn("Invalid SRC_URI for '%s'" % pp.pkgquery(cpv)) ) - return [] + return None if "mirror" in portage.settings.features: urls = aalist