From 373fba6e010e4313877d98ef925c4546b2b49283 Mon Sep 17 00:00:00 2001 From: volpino Date: Mon, 21 May 2012 12:03:14 +0200 Subject: [PATCH 1/6] euscan: fixed patched versions issue The old code extracted version 0.8.6.1 from buildbot-0.8.6p1 instead of 0.8.6_p1 Signed-off-by: volpino --- pym/euscan/handlers/generic.py | 2 +- pym/euscan/helpers.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pym/euscan/handlers/generic.py b/pym/euscan/handlers/generic.py index ceb854b..ea94dad 100644 --- a/pym/euscan/handlers/generic.py +++ b/pym/euscan/handlers/generic.py @@ -153,7 +153,7 @@ def brute_force(cpv, url): components = helpers.split_version(ver) versions = helpers.gen_versions(components, CONFIG["brute-force"]) - """ Remove unwanted versions """ + # Remove unwanted versions for v in versions: if helpers.vercmp(cp, ver, helpers.join_version(v)) >= 0: versions.remove(v) diff --git a/pym/euscan/helpers.py b/pym/euscan/helpers.py index 7af055b..3f7b767 100644 --- a/pym/euscan/helpers.py +++ b/pym/euscan/helpers.py @@ -126,7 +126,7 @@ def gentoo_mangle_version(up_pv): pv = up_pv = rev_match.group(1) replace_me = rev_match.group(2) rev = rev_match.group(3) - additional_version = '.' + rev + additional_version = '_p' + rev for this_suf in suf_matches.keys(): if rs_match: From 8cb19b5a6ba2bf1142d07bdd8ee72080d89017d2 Mon Sep 17 00:00:00 2001 From: volpino Date: Mon, 21 May 2012 22:24:44 +0200 Subject: [PATCH 2/6] euscan: adding json output Naive json output implmented, probably needs some further tuning Signed-off-by: volpino --- bin/euscan | 171 ++++++++++++++++++++++------------------- pym/euscan/__init__.py | 80 +++++++++++++++++-- pym/euscan/scan.py | 4 +- 3 files changed, 170 insertions(+), 85 deletions(-) diff --git a/bin/euscan b/bin/euscan index e7708e3..c4da8a4 100755 --- a/bin/euscan +++ b/bin/euscan @@ -6,51 +6,58 @@ Distributed under the terms of the GNU General Public License v2 from __future__ import print_function -""" Meta """ + +# Meta + __author__ = "Corentin Chary (iksaif)" __email__ = "corentin.chary@gmail.com" __version__ = "git" __productname__ = "euscan" __description__ = "A tool to detect new upstream releases." -__version__ = "git" -""" Imports """ + +# Imports import sys import getopt import errno import httplib -from portage.output import white, yellow, turquoise, green, EOutput +from portage.output import white, yellow, turquoise, green from portage.exception import AmbiguousPackageName from gentoolkit import pprinter as pp from gentoolkit.eclean.search import (port_settings) from gentoolkit.errors import GentoolkitException -import euscan -from euscan import CONFIG +from euscan import CONFIG, output from euscan.scan import scan_upstream -""" Globals """ + +# Globals + +def exit_helper(status): + if CONFIG["format"]: + print(output.get_formatted_output()) + sys.exit(status) -def setupSignals(): - """ This block ensures that ^C interrupts are handled quietly. """ +def setup_signals(): + """This block ensures that ^C interrupts are handled quietly.""" import signal def exithandler(signum, frame): signal.signal(signal.SIGINT, signal.SIG_IGN) signal.signal(signal.SIGTERM, signal.SIG_IGN) print() - sys.exit(errno.EINTR) + exit_helper(errno.EINTR) signal.signal(signal.SIGINT, exithandler) signal.signal(signal.SIGTERM, exithandler) signal.signal(signal.SIGPIPE, signal.SIG_DFL) -def printVersion(): +def print_version(): """Output the version info.""" print("%s (%s) - %s" \ % (__productname__, __version__, __description__)) @@ -60,23 +67,26 @@ def printVersion(): print("Distributed under the terms of the GNU General Public License v2") -def printUsage(_error=None, help=None): +def print_usage(_error=None, help=None): """Print help message. May also print partial help to stderr if an error from {'options'} is specified.""" out = sys.stdout if _error: out = sys.stderr + if not _error in ('global-options', 'packages',): _error = None + if not _error and not help: help = 'all' + if _error in ('global-options',): - print(pp.error("Wrong option on command line."), file=out) - print(file=out) + output.eerror("Wrong option on command line.\n") + if _error in ('packages',): - print(pp.error("You need to specify exactly one package."), file=out) - print(file=out) + output.eerror("You need to specify exactly one package.\n") + print(white("Usage:"), file=out) if _error in ('global-options', 'packages',) or help == 'all': print(" " + turquoise(__productname__), @@ -106,14 +116,19 @@ def printUsage(_error=None, help=None): " (default: 2)\n" + " " * 29 + "bigger levels will generate more versions numbers\n" + " " * 29 + "0 means disabled", file=out) + print(yellow(" -f, --format=") + + " - define the output " + yellow("") + + " (available: json)", file=out) print(file=out) + if _error in ('packages',) or help: print(green(" package") + " - the packages (or ebuilds) you want to scan", file=out) print(file=out) - '''print( "More detailed instruction can be found in", - turquoise("`man %s`" % __productname__), file=out)''' + + #print( "More detailed instruction can be found in", + #turquoise("`man %s`" % __productname__), file=out) class ParseArgsException(Exception): @@ -125,12 +140,12 @@ class ParseArgsException(Exception): return repr(self.value) -def parseArgs(): +def parse_args(): """Parse the command line arguments. Raise exceptions on - errors. Returns package and affect the CONFIG dict. + errors. Returns packages and affects the CONFIG dict. """ - def optionSwitch(opts): + def option_switch(opts): """local function for interpreting command line options and setting options accordingly""" return_code = True @@ -150,30 +165,36 @@ def parseArgs(): elif o in ("-b", "--brute-force"): CONFIG['brute-force'] = int(a) elif o in ("-v", "--verbose") and not CONFIG['quiet']: - CONFIG['verbose'] += 1 + CONFIG['verbose'] += 1 + elif o in ("-f", "--format"): + CONFIG['format'] = a + CONFIG['nocolor'] = True + pp.output.nocolor() else: return_code = False return return_code - ' here are the different allowed command line options (getopt args) ' + # here are the different allowed command line options (getopt args) getopt_options = {'short': {}, 'long': {}} - getopt_options['short']['global'] = "hVCqv1b:" - getopt_options['long']['global'] = ["help", "version", "nocolor", "quiet", - "verbose", "oneshot", "brute-force="] + getopt_options['short']['global'] = "hVCqv1bf:" + getopt_options['long']['global'] = [ + "help", "version", "nocolor", "quiet", "verbose", "oneshot", + "brute-force=", "format=" + ] short_opts = getopt_options['short']['global'] long_opts = getopt_options['long']['global'] opts_mode = 'global' - ' apply getopts to command line, show partial help on failure ' + # apply getopts to command line, show partial help on failure try: opts, args = getopt.getopt(sys.argv[1:], short_opts, long_opts) except: raise ParseArgsException(opts_mode + '-options') - ' set options accordingly ' - optionSwitch(opts) + # set options accordingly + option_switch(opts) if len(args) < 1: raise ParseArgsException('packages') @@ -183,29 +204,32 @@ def parseArgs(): def main(): """Parse command line and execute all actions.""" - CONFIG['nocolor'] = (port_settings["NOCOLOR"] in ('yes', 'true') - or not sys.stdout.isatty()) + CONFIG['nocolor'] = ( + port_settings["NOCOLOR"] in ('yes', 'true') or not sys.stdout.isatty() + ) if CONFIG['nocolor']: pp.output.nocolor() - ' parse command line options and actions ' + + # parse command line options and actions try: - packages = parseArgs() + packages = parse_args() except ParseArgsException as e: if e.value == 'help': - printUsage(help='all') - sys.exit(0) - elif e.value[:5] == 'help-': - printUsage(help=e.value[5:]) - sys.exit(0) - elif e.value == 'version': - printVersion() - sys.exit(0) - else: - printUsage(e.value) - sys.exit(errno.EINVAL) + print_usage(help='all') + exit_helper(0) + + elif e.value[:5] == 'help-': + print_usage(help=e.value[5:]) + exit_helper(0) + + elif e.value == 'version': + print_version() + exit_helper(0) + + else: + print_usage(e.value) + exit_helper(errno.EINVAL) - """ Change euscan's output """ - euscan.output = EOutput(CONFIG['quiet']) if CONFIG['verbose'] > 2: httplib.HTTPConnection.debuglevel = 1 @@ -216,48 +240,39 @@ def main(): ret = scan_upstream(package) except AmbiguousPackageName as e: pkgs = e.args[0] - for candidate in pkgs: - print(candidate) + output.eerror("\n".join(pkgs)) from os.path import basename # To get the short name - print(file=sys.stderr) - print( - pp.error( - "The short ebuild name '%s' is ambiguous. Please specify" \ - % basename(pkgs[0]) - ), - file=sys.stderr, end="" + output.error( + "The short ebuild name '%s' is ambiguous. Please specify" % + basename(pkgs[0]), + "one of the above fully-qualified ebuild names instead." ) - pp.die(1, "one of the above fully-qualified ebuild names instead.") - except GentoolkitException as err: - pp.die(1, '%s: %s' % (package, str(err))) - except Exception as err: - pp.die(1, '%s: %s' % (package, str(err))) + exit_helper(1) - if not CONFIG['quiet']: + except GentoolkitException as err: + output.eerror('%s: %s' % (package, str(err))) + exit_helper(1) + + except Exception as err: + output.eerror('%s: %s' % (package, str(err))) + exit_helper(1) + + if not CONFIG['quiet'] and not CONFIG['format']: print() for cp, url, version in ret: - if not CONFIG['quiet']: - print("Upstream Version: " - + pp.number("%s" % version) - + pp.path(" %s" % url)) - else: - print(pp.cpv("%s-%s" % (cp, version)) - + ": " + pp.path(url)) + output.result(cp, version, url) if not len(ret) and not CONFIG['quiet']: - print(pp.warn("Didn't find any new version, " - + "check package's homepage for " - + "more informations")) + output.ewarn( + "Didn't find any new version, check package's homepage for " + + "more informations" + ) if __name__ == "__main__": - try: - setupSignals() - main() - except KeyboardInterrupt: - print("Aborted.") - sys.exit(errno.EINTR) - sys.exit(0) + setup_signals() + main() + exit_helper(0) diff --git a/pym/euscan/__init__.py b/pym/euscan/__init__.py index 391a7a3..ef7e4ed 100644 --- a/pym/euscan/__init__.py +++ b/pym/euscan/__init__.py @@ -3,11 +3,17 @@ # Copyright 2011 Corentin Chary # Distributed under the terms of the GNU General Public License v2 +from io import StringIO +from collections import defaultdict +import json + +from gentoolkit import pprinter as pp +from portage.output import EOutput + + __version__ = "git" -from portage.output import EOutput - CONFIG = { 'nocolor': False, 'quiet': False, @@ -20,11 +26,11 @@ CONFIG = { 'oneshot': True, 'user-agent': 'escan (http://euscan.iksaif.net)', 'skip-robots-txt': False, - 'cache': False + 'cache': False, + 'format': None, + 'indent': 2 } -output = EOutput(CONFIG['quiet']) - BLACKLIST_VERSIONS = [ # Compatibility package for running binaries linked against a # pre gcc 3.4 libstdc++, won't be updated @@ -67,3 +73,67 @@ ROBOTS_TXT_BLACKLIST_DOMAINS = [ '(.*)chromium.org(.*)', '(.*)nodejs.org(.*)', ] + + +class EOutputFile(EOutput): + """ + Override of EOutput, allows to specify an output file for writes + """ + def __init__(self, out_file=None, *args, **kwargs): + super(EOutputFile, self).__init__(*args, **kwargs) + self.out_file = out_file + + def _write(self, f, msg): + if self.out_file is None: + super(EOutputFile, self)._write(f, msg) + else: + super(EOutputFile, self)._write(self.out_file, msg) + + +class EuscanOutput(object): + """ + Class that handles output for euscan + """ + def __init__(self, config): + self.config = config + self.data = defaultdict(StringIO) + self.packages = defaultdict(list) + + def get_formatted_output(self): + data = {} + for key in self.data: + if key not in ("ebegin", "eend"): + val = [x for x in self.data[key].getvalue().split("\n") if x] + data[key] = val + + data["result"] = self.packages + + if self.config["format"].lower() == "json": + return json.dumps(data, indent=self.config["indent"]) + else: + raise TypeError("Invalid output format") + + def result(self, cp, version, url): + if self.config['format']: + self.packages[cp].append({"version": version, "url": url}) + else: + if not self.config['quiet']: + print "Upstream Version:", pp.number("%s" % version), + print pp.path(" %s" % url) + else: + print pp.cpv("%s-%s" % (cp, version)) + ":", pp.path(url) + + def __getattr__(self, key): + output_file = self.data[key] if self.config["format"] else None + + if output_file: + _output = EOutputFile(out_file=self.data[key], + quiet=self.config['quiet']) + ret = getattr(_output, key) + else: + ret = getattr(EOutputFile(quiet=self.config['quiet']), key) + + return ret + + +output = EuscanOutput(CONFIG) diff --git a/pym/euscan/scan.py b/pym/euscan/scan.py index 17bd938..a1f63aa 100644 --- a/pym/euscan/scan.py +++ b/pym/euscan/scan.py @@ -39,7 +39,7 @@ def scan_upstream_urls(cpv, urls): for filename in urls: for url in urls[filename]: - if not CONFIG['quiet']: + if not CONFIG['quiet'] and not CONFIG['format']: pp.uprint() euscan.output.einfo("SRC_URI is '%s'" % url) @@ -101,7 +101,7 @@ def scan_upstream(query): ) return [] - if not CONFIG['quiet']: + if not CONFIG['quiet'] and not CONFIG['format']: pp.uprint( " * %s [%s]" % (pp.cpv(pkg.cpv), pp.section(pkg.repo_name())) ) From a18083bd988213251fd1e5a2ee1e94c8512e56cb Mon Sep 17 00:00:00 2001 From: volpino Date: Wed, 23 May 2012 16:30:43 +0200 Subject: [PATCH 3/6] euscan: json format output Now "-f json" seems good, it supports the handler type used to retrieve each version and outputs metadata. Signed-off-by: volpino --- bin/euscan | 29 +++++++------ pym/euscan/__init__.py | 61 ++++++++++++++++++-------- pym/euscan/handlers/cpan.py | 9 ++-- pym/euscan/handlers/generic.py | 39 +++++++++-------- pym/euscan/handlers/kde.py | 4 +- pym/euscan/handlers/php.py | 9 ++-- pym/euscan/handlers/pypi.py | 9 ++-- pym/euscan/handlers/rubygem.py | 11 ++--- pym/euscan/scan.py | 79 +++++++++++++++++++--------------- 9 files changed, 150 insertions(+), 100 deletions(-) diff --git a/bin/euscan b/bin/euscan index c4da8a4..011d9ec 100755 --- a/bin/euscan +++ b/bin/euscan @@ -212,7 +212,7 @@ def main(): # parse command line options and actions try: - packages = parse_args() + queries = parse_args() except ParseArgsException as e: if e.value == 'help': print_usage(help='all') @@ -233,11 +233,13 @@ def main(): if CONFIG['verbose'] > 2: httplib.HTTPConnection.debuglevel = 1 - for package in packages: + for query in queries: ret = [] + output.set_query(query) + try: - ret = scan_upstream(package) + ret = scan_upstream(query) except AmbiguousPackageName as e: pkgs = e.args[0] output.eerror("\n".join(pkgs)) @@ -252,24 +254,27 @@ def main(): exit_helper(1) except GentoolkitException as err: - output.eerror('%s: %s' % (package, str(err))) + output.eerror('%s: %s' % (query, str(err))) exit_helper(1) except Exception as err: - output.eerror('%s: %s' % (package, str(err))) + output.eerror('%s: %s' % (query, str(err))) exit_helper(1) if not CONFIG['quiet'] and not CONFIG['format']: print() - for cp, url, version in ret: - output.result(cp, version, url) + if ret is not None: + if len(ret) > 0: + for cp, url, version, handler in ret: + output.result(cp, version, url, handler) + elif not CONFIG['quiet']: + output.ewarn( + "Didn't find any new version, check package's homepage " + + "for more informations" + ) - if not len(ret) and not CONFIG['quiet']: - output.ewarn( - "Didn't find any new version, check package's homepage for " + - "more informations" - ) + output.set_query(None) if __name__ == "__main__": diff --git a/pym/euscan/__init__.py b/pym/euscan/__init__.py index ef7e4ed..b61528b 100644 --- a/pym/euscan/__init__.py +++ b/pym/euscan/__init__.py @@ -59,10 +59,11 @@ BRUTEFORCE_BLACKLIST_PACKAGES = [ BRUTEFORCE_BLACKLIST_URLS = [ 'http://(.*)dockapps.org/download.php/id/(.*)', # infinite loop 'http://hydra.nixos.org/build/(.*)', # infinite loop - 'http://www.rennings.net/gentoo/distfiles/(.*)', # Doesn't respect 404, infinite loop - 'http://art.gnome.org/download/(.*)', # Doesn't respect 404, infinite loop - 'http://barelysufficient.org/~olemarkus/(.*)', # Doesn't respect 404, infinite loop - 'http://olemarkus.org/~olemarkus/(.*)', # Doesn't respect 404, infinite loop + # Doesn't respect 404, infinite loop + 'http://www.rennings.net/gentoo/distfiles/(.*)', + 'http://art.gnome.org/download/(.*)', + 'http://barelysufficient.org/~olemarkus/(.*)', + 'http://olemarkus.org/~olemarkus/(.*)', ] ROBOTS_TXT_BLACKLIST_DOMAINS = [ @@ -96,26 +97,45 @@ class EuscanOutput(object): """ def __init__(self, config): self.config = config - self.data = defaultdict(StringIO) - self.packages = defaultdict(list) + self.queries = defaultdict(dict) + self.current_query = None + + def set_query(self, query): + self.current_query = query + if query is not None: + if not query in self.queries: + self.queries[query] = { + "messages": defaultdict(StringIO), + "result": [], + "metadata": {}, + } def get_formatted_output(self): data = {} - for key in self.data: - if key not in ("ebegin", "eend"): - val = [x for x in self.data[key].getvalue().split("\n") if x] - data[key] = val - data["result"] = self.packages + for query in self.queries: + data[query] = { + "result": self.queries[query]["result"], + "metadata": self.queries[query]["metadata"], + "messages": {} + } + for key in self.queries[query]["messages"]: + if key not in ("ebegin", "eend"): + _msg = self.queries[query]["messages"][key].getvalue() + val = [x for x in _msg.split("\n") if x] + data[query]["messages"][key] = val if self.config["format"].lower() == "json": return json.dumps(data, indent=self.config["indent"]) else: raise TypeError("Invalid output format") - def result(self, cp, version, url): + def result(self, cp, version, url, handler): if self.config['format']: - self.packages[cp].append({"version": version, "url": url}) + _curr = self.queries[self.current_query] + _curr["result"].append( + {"version": version, "urls": [url], "handler": handler} + ) else: if not self.config['quiet']: print "Upstream Version:", pp.number("%s" % version), @@ -123,16 +143,21 @@ class EuscanOutput(object): else: print pp.cpv("%s-%s" % (cp, version)) + ":", pp.path(url) - def __getattr__(self, key): - output_file = self.data[key] if self.config["format"] else None + def metadata(self, key, value, show=True): + if self.config["format"]: + self.queries[self.current_query]["metadata"][key] = value + elif show: + print "%s: %s" % (key.capitalize(), value) - if output_file: - _output = EOutputFile(out_file=self.data[key], + def __getattr__(self, key): + if self.config["format"]: + out_file = self.queries[self.current_query]["messages"][key] + + _output = EOutputFile(out_file=out_file, quiet=self.config['quiet']) ret = getattr(_output, key) else: ret = getattr(EOutputFile(quiet=self.config['quiet']), key) - return ret diff --git a/pym/euscan/handlers/cpan.py b/pym/euscan/handlers/cpan.py index 1b5a111..0546cb7 100644 --- a/pym/euscan/handlers/cpan.py +++ b/pym/euscan/handlers/cpan.py @@ -3,8 +3,9 @@ import portage import urllib2 import json -from euscan import helpers -import euscan +from euscan import helpers, output + +HANDLER_NAME = "cpan" _cpan_package_name_re = re.compile("mirror://cpan/authors/.*/([^/.]*).*") @@ -83,7 +84,7 @@ def scan(cpv, url): orig_url = url url = 'http://search.cpan.org/api/dist/%s' % pkg - euscan.output.einfo("Using: " + url) + output.einfo("Using: " + url) try: fp = helpers.urlopen(url) @@ -125,7 +126,7 @@ def scan(cpv, url): if url == orig_url: continue - ret.append((url, pv)) + ret.append((url, pv, HANDLER_NAME)) return ret diff --git a/pym/euscan/handlers/generic.py b/pym/euscan/handlers/generic.py index ea94dad..9a4c74e 100644 --- a/pym/euscan/handlers/generic.py +++ b/pym/euscan/handlers/generic.py @@ -7,9 +7,10 @@ from BeautifulSoup import BeautifulSoup import portage from euscan import CONFIG, SCANDIR_BLACKLIST_URLS, \ - BRUTEFORCE_BLACKLIST_PACKAGES, BRUTEFORCE_BLACKLIST_URLS -from euscan import helpers -import euscan + BRUTEFORCE_BLACKLIST_PACKAGES, BRUTEFORCE_BLACKLIST_URLS, output, helpers + +HANDLER_NAME = "generic" +BRUTEFORCE_HANDLER_NAME = "brute_force" def scan_html(data, url, pattern): @@ -53,7 +54,7 @@ def scan_directory_recursive(cp, ver, rev, url, steps, orig_url): steps = steps[1:] - euscan.output.einfo("Scanning: %s" % url) + output.einfo("Scanning: %s" % url) try: fp = helpers.urlopen(url) @@ -87,7 +88,7 @@ def scan_directory_recursive(cp, ver, rev, url, steps, orig_url): path = url + path if not steps and path not in orig_url: - versions.append((path, pv)) + versions.append((path, pv, HANDLER_NAME)) if steps: ret = scan_directory_recursive(cp, ver, rev, path, steps, orig_url) @@ -99,7 +100,7 @@ def scan_directory_recursive(cp, ver, rev, url, steps, orig_url): def scan(cpv, url): for bu in SCANDIR_BLACKLIST_URLS: if re.match(bu, url): - euscan.output.einfo("%s is blacklisted by rule %s" % (url, bu)) + output.einfo("%s is blacklisted by rule %s" % (url, bu)) return [] resolved_url = helpers.parse_mirror(url) @@ -112,23 +113,25 @@ def scan(cpv, url): if ver not in resolved_url: newver = helpers.version_change_end_sep(ver) if newver and newver in resolved_url: - euscan.output.einfo( + output.einfo( "Version: using %s instead of %s" % (newver, ver) ) ver = newver template = helpers.template_from_url(resolved_url, ver) if '${' not in template: - euscan.output.einfo( + output.einfo( "Url doesn't seems to depend on version: %s not found in %s" % (ver, resolved_url) ) return [] else: - euscan.output.einfo("Scanning: %s" % template) + output.einfo("Scanning: %s" % template) steps = helpers.generate_scan_paths(template) - return scan_directory_recursive(cp, ver, rev, "", steps, url) + ret = scan_directory_recursive(cp, ver, rev, "", steps, url) + + return ret def brute_force(cpv, url): @@ -140,15 +143,15 @@ def brute_force(cpv, url): for bp in BRUTEFORCE_BLACKLIST_PACKAGES: if re.match(bp, cp): - euscan.output.einfo("%s is blacklisted by rule %s" % (cp, bp)) + output.einfo("%s is blacklisted by rule %s" % (cp, bp)) return [] for bp in BRUTEFORCE_BLACKLIST_URLS: if re.match(bp, url): - euscan.output.einfo("%s is blacklisted by rule %s" % (cp, bp)) + output.einfo("%s is blacklisted by rule %s" % (cp, bp)) return [] - euscan.output.einfo("Generating version from " + ver) + output.einfo("Generating version from " + ver) components = helpers.split_version(ver) versions = helpers.gen_versions(components, CONFIG["brute-force"]) @@ -159,18 +162,18 @@ def brute_force(cpv, url): versions.remove(v) if not versions: - euscan.output.einfo("Can't generate new versions from " + ver) + output.einfo("Can't generate new versions from " + ver) return [] template = helpers.template_from_url(url, ver) if '${PV}' not in template: - euscan.output.einfo( + output.einfo( "Url doesn't seems to depend on full version: %s not found in %s" % (ver, url)) return [] else: - euscan.output.einfo("Brute forcing: %s" % template) + output.einfo("Brute forcing: %s" % template) result = [] @@ -195,10 +198,10 @@ def brute_force(cpv, url): if not infos: continue - result.append([url, version]) + result.append([url, version, BRUTEFORCE_HANDLER_NAME]) if len(result) > CONFIG['brute-force-false-watermark']: - euscan.output.einfo( + output.einfo( "Broken server detected ! Skipping brute force." ) return [] diff --git a/pym/euscan/handlers/kde.py b/pym/euscan/handlers/kde.py index 00947c0..3736466 100644 --- a/pym/euscan/handlers/kde.py +++ b/pym/euscan/handlers/kde.py @@ -1,5 +1,7 @@ from euscan.handlers import generic +HANDLER_NAME = "kde" + def can_handle(cpv, url): if url.startswith('mirror://kde/'): @@ -13,7 +15,7 @@ def clean_results(results): for path, version in results: if version == '5SUMS': continue - ret.append((path, version)) + ret.append((path, version, HANDLER_NAME)) return ret diff --git a/pym/euscan/handlers/php.py b/pym/euscan/handlers/php.py index a4c7267..97a6322 100644 --- a/pym/euscan/handlers/php.py +++ b/pym/euscan/handlers/php.py @@ -3,8 +3,9 @@ import portage import urllib2 import xml.dom.minidom -from euscan import helpers -import euscan +from euscan import helpers, output + +HANDLER_NAME = "php" def can_handle(cpv, url): @@ -34,7 +35,7 @@ def scan(cpv, url): orig_url = url url = 'http://%s/rest/r/%s/allreleases.xml' % (channel, pkg.lower()) - euscan.output.einfo("Using: " + url) + output.einfo("Using: " + url) try: fp = helpers.urlopen(url) @@ -64,7 +65,7 @@ def scan(cpv, url): if url == orig_url: continue - ret.append((url, pv)) + ret.append((url, pv, HANDLER_NAME)) return ret diff --git a/pym/euscan/handlers/pypi.py b/pym/euscan/handlers/pypi.py index 1e477b7..4d52e4a 100644 --- a/pym/euscan/handlers/pypi.py +++ b/pym/euscan/handlers/pypi.py @@ -3,8 +3,9 @@ import re import portage -from euscan import helpers -import euscan +from euscan import helpers, output + +HANDLER_NAME = "pypi" def can_handle(cpv, url): @@ -26,7 +27,7 @@ def scan(cpv, url): package = guess_package(cpv, url) - euscan.output.einfo("Using PyPi XMLRPC: " + package) + output.einfo("Using PyPi XMLRPC: " + package) client = xmlrpclib.ServerProxy('http://pypi.python.org/pypi') versions = client.package_releases(package) @@ -46,7 +47,7 @@ def scan(cpv, url): continue urls = client.release_urls(package, up_pv) urls = " ".join([infos['url'] for infos in urls]) - ret.append((urls, pv)) + ret.append((urls, pv, HANDLER_NAME)) return ret diff --git a/pym/euscan/handlers/rubygem.py b/pym/euscan/handlers/rubygem.py index ec57a2a..52da00c 100644 --- a/pym/euscan/handlers/rubygem.py +++ b/pym/euscan/handlers/rubygem.py @@ -3,8 +3,9 @@ import portage import json import urllib2 -from euscan import helpers -import euscan +from euscan import helpers, output + +HANDLER_NAME = "rubygem" def can_handle(cpv, url): @@ -31,13 +32,13 @@ def scan(cpv, url): gem = guess_gem(cpv, url) if not gem: - euscan.output.eerror("Can't guess gem name using %s and %s" % \ + output.eerror("Can't guess gem name using %s and %s" % \ (cpv, url)) return [] url = 'http://rubygems.org/api/v1/versions/%s.json' % gem - euscan.output.einfo("Using: " + url) + output.einfo("Using: " + url) try: fp = helpers.urlopen(url) @@ -65,7 +66,7 @@ def scan(cpv, url): if helpers.version_filtered(cp, ver, pv): continue url = 'http://rubygems.org/gems/%s-%s.gem' % (gem, up_pv) - ret.append((url, pv)) + ret.append((url, pv, HANDLER_NAME)) return ret diff --git a/pym/euscan/scan.py b/pym/euscan/scan.py index a1f63aa..a931e53 100644 --- a/pym/euscan/scan.py +++ b/pym/euscan/scan.py @@ -1,8 +1,7 @@ import os -import sys +from datetime import datetime import portage - from portage.dbapi import porttree import gentoolkit.pprinter as pp @@ -10,28 +9,28 @@ from gentoolkit.query import Query from gentoolkit.eclean.search import (port_settings) from euscan import CONFIG, BLACKLIST_PACKAGES -from euscan import handlers -from euscan import helpers - -import euscan +from euscan import handlers, helpers, output def filter_versions(cp, versions): filtered = {} - for url, version in versions: + for url, version, handler in versions: - ''' Try to keep the most specific urls (determinted by the length) ''' + # Try to keep the most specific urls (determinted by the length) if version in filtered and len(url) < len(filtered[version]): continue - ''' Remove blacklisted versions ''' + # Remove blacklisted versions if helpers.version_blacklisted(cp, version): continue - filtered[version] = url + filtered[version] = {"url": url, "handler": handler} - return [(cp, filtered[version], version) for version in filtered] + return [ + (cp, filtered[version]["url"], version, filtered[version]["handler"]) + for version in filtered + ] def scan_upstream_urls(cpv, urls): @@ -41,20 +40,20 @@ def scan_upstream_urls(cpv, urls): for url in urls[filename]: if not CONFIG['quiet'] and not CONFIG['format']: pp.uprint() - euscan.output.einfo("SRC_URI is '%s'" % url) + output.einfo("SRC_URI is '%s'" % url) if '://' not in url: - euscan.output.einfo("Invalid url '%s'" % url) + output.einfo("Invalid url '%s'" % url) continue - ''' Try normal scan ''' + # Try normal scan if CONFIG["scan-dir"]: versions.extend(handlers.scan(cpv, url)) if versions and CONFIG['oneshot']: break - ''' Brute Force ''' + # Brute Force if CONFIG["brute-force"] > 0: versions.extend(handlers.brute_force(cpv, url)) @@ -79,10 +78,10 @@ def scan_upstream(query): ) if not matches: - sys.stderr.write( + output.ewarn( pp.warn("No package matching '%s'" % pp.pkgquery(query)) ) - return [] + return None matches = sorted(matches) pkg = matches.pop() @@ -91,29 +90,41 @@ def scan_upstream(query): pkg = matches.pop() if not pkg: - sys.stderr.write(pp.warn("Package '%s' only have a dev version (9999)" - % pp.pkgquery(pkg.cp))) - return [] + output.ewarn( + pp.warn("Package '%s' only have a dev version (9999)" + % pp.pkgquery(pkg.cp)) + ) + return None + + # useful data only for formatted output + output.metadata("datetime", datetime.now().isoformat(), show=False) + output.metadata("cp", pkg.cp, show=False) + output.metadata("cpv", pkg.cpv, show=False) if pkg.cp in BLACKLIST_PACKAGES: - sys.stderr.write( + output.ewarn( pp.warn("Package '%s' is blacklisted" % pp.pkgquery(pkg.cp)) ) - return [] + return None - if not CONFIG['quiet'] and not CONFIG['format']: - pp.uprint( - " * %s [%s]" % (pp.cpv(pkg.cpv), pp.section(pkg.repo_name())) - ) - pp.uprint() + if not CONFIG['quiet']: + if not CONFIG['format']: + pp.uprint( + " * %s [%s]" % (pp.cpv(pkg.cpv), pp.section(pkg.repo_name())) + ) + pp.uprint() + else: + output.metadata("overlay", pp.section(pkg.repo_name())) ebuild_path = pkg.ebuild_path() if ebuild_path: - pp.uprint('Ebuild: ' + pp.path(os.path.normpath(ebuild_path))) + output.metadata( + "ebuild", pp.path(os.path.normpath(ebuild_path)) + ) - pp.uprint('Repository: ' + pkg.repo_name()) - pp.uprint('Homepage: ' + pkg.environment("HOMEPAGE")) - pp.uprint('Description: ' + pkg.environment("DESCRIPTION")) + output.metadata("repository", pkg.repo_name()) + output.metadata("homepage", pkg.environment("HOMEPAGE")) + output.metadata("description", pkg.environment("DESCRIPTION")) cpv = pkg.cpv metadata = { @@ -125,11 +136,11 @@ def scan_upstream(query): alist = porttree._parse_uri_map(cpv, metadata, use=use) aalist = porttree._parse_uri_map(cpv, metadata) except Exception as e: - sys.stderr.write(pp.warn("%s\n" % str(e))) - sys.stderr.write( + output.ewarn(pp.warn("%s\n" % str(e))) + output.ewarn( pp.warn("Invalid SRC_URI for '%s'" % pp.pkgquery(cpv)) ) - return [] + return None if "mirror" in portage.settings.features: urls = aalist From 1efeee4134b420b39d54ebe83614e8d5b3b4d849 Mon Sep 17 00:00:00 2001 From: volpino Date: Wed, 23 May 2012 16:44:44 +0200 Subject: [PATCH 4/6] euscan: added simple support to result confidence Confidence values are meaningless at the moment. Need to fix it. Signed-off-by: volpino --- bin/euscan | 4 ++-- pym/euscan/__init__.py | 5 +++-- pym/euscan/handlers/cpan.py | 3 ++- pym/euscan/handlers/generic.py | 8 ++++++-- pym/euscan/handlers/kde.py | 4 ++-- pym/euscan/handlers/php.py | 3 ++- pym/euscan/handlers/pypi.py | 3 ++- pym/euscan/handlers/rubygem.py | 3 ++- pym/euscan/scan.py | 11 ++++++++--- 9 files changed, 29 insertions(+), 15 deletions(-) diff --git a/bin/euscan b/bin/euscan index 011d9ec..dfdedca 100755 --- a/bin/euscan +++ b/bin/euscan @@ -266,8 +266,8 @@ def main(): if ret is not None: if len(ret) > 0: - for cp, url, version, handler in ret: - output.result(cp, version, url, handler) + for cp, url, version, handler, confidence in ret: + output.result(cp, version, url, handler, confidence) elif not CONFIG['quiet']: output.ewarn( "Didn't find any new version, check package's homepage " + diff --git a/pym/euscan/__init__.py b/pym/euscan/__init__.py index b61528b..796c56a 100644 --- a/pym/euscan/__init__.py +++ b/pym/euscan/__init__.py @@ -130,11 +130,12 @@ class EuscanOutput(object): else: raise TypeError("Invalid output format") - def result(self, cp, version, url, handler): + def result(self, cp, version, url, handler, confidence): if self.config['format']: _curr = self.queries[self.current_query] _curr["result"].append( - {"version": version, "urls": [url], "handler": handler} + {"version": version, "urls": [url], "handler": handler, + "confidence": confidence} ) else: if not self.config['quiet']: diff --git a/pym/euscan/handlers/cpan.py b/pym/euscan/handlers/cpan.py index 0546cb7..7dcc246 100644 --- a/pym/euscan/handlers/cpan.py +++ b/pym/euscan/handlers/cpan.py @@ -6,6 +6,7 @@ import json from euscan import helpers, output HANDLER_NAME = "cpan" +CONFIDENCE = 100.0 _cpan_package_name_re = re.compile("mirror://cpan/authors/.*/([^/.]*).*") @@ -126,7 +127,7 @@ def scan(cpv, url): if url == orig_url: continue - ret.append((url, pv, HANDLER_NAME)) + ret.append((url, pv, HANDLER_NAME, CONFIDENCE)) return ret diff --git a/pym/euscan/handlers/generic.py b/pym/euscan/handlers/generic.py index 9a4c74e..5cf9d49 100644 --- a/pym/euscan/handlers/generic.py +++ b/pym/euscan/handlers/generic.py @@ -10,7 +10,10 @@ from euscan import CONFIG, SCANDIR_BLACKLIST_URLS, \ BRUTEFORCE_BLACKLIST_PACKAGES, BRUTEFORCE_BLACKLIST_URLS, output, helpers HANDLER_NAME = "generic" +CONFIDENCE = 50.0 + BRUTEFORCE_HANDLER_NAME = "brute_force" +BRUTEFORCE_CONFIDENCE = 30.0 def scan_html(data, url, pattern): @@ -88,7 +91,7 @@ def scan_directory_recursive(cp, ver, rev, url, steps, orig_url): path = url + path if not steps and path not in orig_url: - versions.append((path, pv, HANDLER_NAME)) + versions.append((path, pv, HANDLER_NAME, CONFIDENCE)) if steps: ret = scan_directory_recursive(cp, ver, rev, path, steps, orig_url) @@ -198,7 +201,8 @@ def brute_force(cpv, url): if not infos: continue - result.append([url, version, BRUTEFORCE_HANDLER_NAME]) + result.append([url, version, BRUTEFORCE_HANDLER_NAME, + BRUTEFORCE_CONFIDENCE]) if len(result) > CONFIG['brute-force-false-watermark']: output.einfo( diff --git a/pym/euscan/handlers/kde.py b/pym/euscan/handlers/kde.py index 3736466..2b27639 100644 --- a/pym/euscan/handlers/kde.py +++ b/pym/euscan/handlers/kde.py @@ -12,10 +12,10 @@ def can_handle(cpv, url): def clean_results(results): ret = [] - for path, version in results: + for path, version, confidence in results: if version == '5SUMS': continue - ret.append((path, version, HANDLER_NAME)) + ret.append((path, version, HANDLER_NAME, confidence)) return ret diff --git a/pym/euscan/handlers/php.py b/pym/euscan/handlers/php.py index 97a6322..82c49c5 100644 --- a/pym/euscan/handlers/php.py +++ b/pym/euscan/handlers/php.py @@ -6,6 +6,7 @@ import xml.dom.minidom from euscan import helpers, output HANDLER_NAME = "php" +CONFIDENCE = 100.0 def can_handle(cpv, url): @@ -65,7 +66,7 @@ def scan(cpv, url): if url == orig_url: continue - ret.append((url, pv, HANDLER_NAME)) + ret.append((url, pv, HANDLER_NAME, CONFIDENCE)) return ret diff --git a/pym/euscan/handlers/pypi.py b/pym/euscan/handlers/pypi.py index 4d52e4a..0b3ed73 100644 --- a/pym/euscan/handlers/pypi.py +++ b/pym/euscan/handlers/pypi.py @@ -6,6 +6,7 @@ import portage from euscan import helpers, output HANDLER_NAME = "pypi" +CONFIDENCE = 100.0 def can_handle(cpv, url): @@ -47,7 +48,7 @@ def scan(cpv, url): continue urls = client.release_urls(package, up_pv) urls = " ".join([infos['url'] for infos in urls]) - ret.append((urls, pv, HANDLER_NAME)) + ret.append((urls, pv, HANDLER_NAME, CONFIDENCE)) return ret diff --git a/pym/euscan/handlers/rubygem.py b/pym/euscan/handlers/rubygem.py index 52da00c..8bbd5ab 100644 --- a/pym/euscan/handlers/rubygem.py +++ b/pym/euscan/handlers/rubygem.py @@ -6,6 +6,7 @@ import urllib2 from euscan import helpers, output HANDLER_NAME = "rubygem" +CONFIDENCE = 100.0 def can_handle(cpv, url): @@ -66,7 +67,7 @@ def scan(cpv, url): if helpers.version_filtered(cp, ver, pv): continue url = 'http://rubygems.org/gems/%s-%s.gem' % (gem, up_pv) - ret.append((url, pv, HANDLER_NAME)) + ret.append((url, pv, HANDLER_NAME, CONFIDENCE)) return ret diff --git a/pym/euscan/scan.py b/pym/euscan/scan.py index a931e53..37cd71f 100644 --- a/pym/euscan/scan.py +++ b/pym/euscan/scan.py @@ -15,7 +15,7 @@ from euscan import handlers, helpers, output def filter_versions(cp, versions): filtered = {} - for url, version, handler in versions: + for url, version, handler, confidence in versions: # Try to keep the most specific urls (determinted by the length) if version in filtered and len(url) < len(filtered[version]): @@ -25,10 +25,15 @@ def filter_versions(cp, versions): if helpers.version_blacklisted(cp, version): continue - filtered[version] = {"url": url, "handler": handler} + filtered[version] = { + "url": url, + "handler": handler, + "confidence": confidence + } return [ - (cp, filtered[version]["url"], version, filtered[version]["handler"]) + (cp, filtered[version]["url"], version, filtered[version]["handler"], + filtered[version]["confidence"]) for version in filtered ] From 9989433ad5e5b5f33474113ac8e6c0c82448cacd Mon Sep 17 00:00:00 2001 From: volpino Date: Wed, 23 May 2012 23:08:07 +0200 Subject: [PATCH 5/6] euscan: detect version type In the formatted output display the version type (beta, alpha, rc, ...) Signed-off-by: volpino --- pym/euscan/__init__.py | 4 +++- pym/euscan/helpers.py | 12 ++++++++++++ 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/pym/euscan/__init__.py b/pym/euscan/__init__.py index 796c56a..bb83dc8 100644 --- a/pym/euscan/__init__.py +++ b/pym/euscan/__init__.py @@ -131,11 +131,13 @@ class EuscanOutput(object): raise TypeError("Invalid output format") def result(self, cp, version, url, handler, confidence): + from euscan.helpers import get_version_type + if self.config['format']: _curr = self.queries[self.current_query] _curr["result"].append( {"version": version, "urls": [url], "handler": handler, - "confidence": confidence} + "confidence": confidence, "type": get_version_type(version)} ) else: if not self.config['quiet']: diff --git a/pym/euscan/helpers.py b/pym/euscan/helpers.py index 3f7b767..c73c45e 100644 --- a/pym/euscan/helpers.py +++ b/pym/euscan/helpers.py @@ -42,6 +42,18 @@ _v_end = '((-|_)(pre|p|beta|b|alpha|a|rc|r)\d*)' _v = r'((\d+)((\.\d+)*)([a-zA-Z]*?)(' + _v_end + '*))' +def get_version_type(version): + types = [] + gentoo_types = ("alpha", "beta", "pre", "rc", "p") + + for token in re.findall("[\._-]([a-zA-Z]+)", version): + if token in gentoo_types: + types.append(token) + if types: + return types[0] + return "release" + + # Stolen from g-pypi def gentoo_mangle_version(up_pv): """Convert PV to MY_PV if needed From 4d606ae1cf98a92536eac3d3f16f701990c61123 Mon Sep 17 00:00:00 2001 From: volpino Date: Thu, 24 May 2012 20:27:18 +0200 Subject: [PATCH 6/6] euscan: added scan time to formatted output scan_time in metadata dictionary Signed-off-by: volpino --- pym/euscan/scan.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/pym/euscan/scan.py b/pym/euscan/scan.py index 37cd71f..f6660e3 100644 --- a/pym/euscan/scan.py +++ b/pym/euscan/scan.py @@ -102,7 +102,8 @@ def scan_upstream(query): return None # useful data only for formatted output - output.metadata("datetime", datetime.now().isoformat(), show=False) + start_time = datetime.now() + output.metadata("datetime", start_time.isoformat(), show=False) output.metadata("cp", pkg.cp, show=False) output.metadata("cpv", pkg.cpv, show=False) @@ -152,4 +153,8 @@ def scan_upstream(query): else: urls = alist + # output scan time for formatted output + scan_time = (datetime.now() - start_time).total_seconds() + output.metadata("scan_time", scan_time, show=False) + return scan_upstream_urls(pkg.cpv, urls)