euscan: json format output

Now "-f json" seems good, it supports the handler type used to retrieve
each version and outputs metadata.

Signed-off-by: volpino <fox91@anche.no>
This commit is contained in:
volpino 2012-05-23 16:30:43 +02:00
parent 8cb19b5a6b
commit a18083bd98
9 changed files with 150 additions and 100 deletions

View File

@ -212,7 +212,7 @@ def main():
# parse command line options and actions
try:
packages = parse_args()
queries = parse_args()
except ParseArgsException as e:
if e.value == 'help':
print_usage(help='all')
@ -233,11 +233,13 @@ def main():
if CONFIG['verbose'] > 2:
httplib.HTTPConnection.debuglevel = 1
for package in packages:
for query in queries:
ret = []
output.set_query(query)
try:
ret = scan_upstream(package)
ret = scan_upstream(query)
except AmbiguousPackageName as e:
pkgs = e.args[0]
output.eerror("\n".join(pkgs))
@ -252,24 +254,27 @@ def main():
exit_helper(1)
except GentoolkitException as err:
output.eerror('%s: %s' % (package, str(err)))
output.eerror('%s: %s' % (query, str(err)))
exit_helper(1)
except Exception as err:
output.eerror('%s: %s' % (package, str(err)))
output.eerror('%s: %s' % (query, str(err)))
exit_helper(1)
if not CONFIG['quiet'] and not CONFIG['format']:
print()
for cp, url, version in ret:
output.result(cp, version, url)
if ret is not None:
if len(ret) > 0:
for cp, url, version, handler in ret:
output.result(cp, version, url, handler)
elif not CONFIG['quiet']:
output.ewarn(
"Didn't find any new version, check package's homepage " +
"for more informations"
)
if not len(ret) and not CONFIG['quiet']:
output.ewarn(
"Didn't find any new version, check package's homepage for " +
"more informations"
)
output.set_query(None)
if __name__ == "__main__":

View File

@ -59,10 +59,11 @@ BRUTEFORCE_BLACKLIST_PACKAGES = [
BRUTEFORCE_BLACKLIST_URLS = [
'http://(.*)dockapps.org/download.php/id/(.*)', # infinite loop
'http://hydra.nixos.org/build/(.*)', # infinite loop
'http://www.rennings.net/gentoo/distfiles/(.*)', # Doesn't respect 404, infinite loop
'http://art.gnome.org/download/(.*)', # Doesn't respect 404, infinite loop
'http://barelysufficient.org/~olemarkus/(.*)', # Doesn't respect 404, infinite loop
'http://olemarkus.org/~olemarkus/(.*)', # Doesn't respect 404, infinite loop
# Doesn't respect 404, infinite loop
'http://www.rennings.net/gentoo/distfiles/(.*)',
'http://art.gnome.org/download/(.*)',
'http://barelysufficient.org/~olemarkus/(.*)',
'http://olemarkus.org/~olemarkus/(.*)',
]
ROBOTS_TXT_BLACKLIST_DOMAINS = [
@ -96,26 +97,45 @@ class EuscanOutput(object):
"""
def __init__(self, config):
self.config = config
self.data = defaultdict(StringIO)
self.packages = defaultdict(list)
self.queries = defaultdict(dict)
self.current_query = None
def set_query(self, query):
self.current_query = query
if query is not None:
if not query in self.queries:
self.queries[query] = {
"messages": defaultdict(StringIO),
"result": [],
"metadata": {},
}
def get_formatted_output(self):
data = {}
for key in self.data:
if key not in ("ebegin", "eend"):
val = [x for x in self.data[key].getvalue().split("\n") if x]
data[key] = val
data["result"] = self.packages
for query in self.queries:
data[query] = {
"result": self.queries[query]["result"],
"metadata": self.queries[query]["metadata"],
"messages": {}
}
for key in self.queries[query]["messages"]:
if key not in ("ebegin", "eend"):
_msg = self.queries[query]["messages"][key].getvalue()
val = [x for x in _msg.split("\n") if x]
data[query]["messages"][key] = val
if self.config["format"].lower() == "json":
return json.dumps(data, indent=self.config["indent"])
else:
raise TypeError("Invalid output format")
def result(self, cp, version, url):
def result(self, cp, version, url, handler):
if self.config['format']:
self.packages[cp].append({"version": version, "url": url})
_curr = self.queries[self.current_query]
_curr["result"].append(
{"version": version, "urls": [url], "handler": handler}
)
else:
if not self.config['quiet']:
print "Upstream Version:", pp.number("%s" % version),
@ -123,16 +143,21 @@ class EuscanOutput(object):
else:
print pp.cpv("%s-%s" % (cp, version)) + ":", pp.path(url)
def __getattr__(self, key):
output_file = self.data[key] if self.config["format"] else None
def metadata(self, key, value, show=True):
if self.config["format"]:
self.queries[self.current_query]["metadata"][key] = value
elif show:
print "%s: %s" % (key.capitalize(), value)
if output_file:
_output = EOutputFile(out_file=self.data[key],
def __getattr__(self, key):
if self.config["format"]:
out_file = self.queries[self.current_query]["messages"][key]
_output = EOutputFile(out_file=out_file,
quiet=self.config['quiet'])
ret = getattr(_output, key)
else:
ret = getattr(EOutputFile(quiet=self.config['quiet']), key)
return ret

View File

@ -3,8 +3,9 @@ import portage
import urllib2
import json
from euscan import helpers
import euscan
from euscan import helpers, output
HANDLER_NAME = "cpan"
_cpan_package_name_re = re.compile("mirror://cpan/authors/.*/([^/.]*).*")
@ -83,7 +84,7 @@ def scan(cpv, url):
orig_url = url
url = 'http://search.cpan.org/api/dist/%s' % pkg
euscan.output.einfo("Using: " + url)
output.einfo("Using: " + url)
try:
fp = helpers.urlopen(url)
@ -125,7 +126,7 @@ def scan(cpv, url):
if url == orig_url:
continue
ret.append((url, pv))
ret.append((url, pv, HANDLER_NAME))
return ret

View File

@ -7,9 +7,10 @@ from BeautifulSoup import BeautifulSoup
import portage
from euscan import CONFIG, SCANDIR_BLACKLIST_URLS, \
BRUTEFORCE_BLACKLIST_PACKAGES, BRUTEFORCE_BLACKLIST_URLS
from euscan import helpers
import euscan
BRUTEFORCE_BLACKLIST_PACKAGES, BRUTEFORCE_BLACKLIST_URLS, output, helpers
HANDLER_NAME = "generic"
BRUTEFORCE_HANDLER_NAME = "brute_force"
def scan_html(data, url, pattern):
@ -53,7 +54,7 @@ def scan_directory_recursive(cp, ver, rev, url, steps, orig_url):
steps = steps[1:]
euscan.output.einfo("Scanning: %s" % url)
output.einfo("Scanning: %s" % url)
try:
fp = helpers.urlopen(url)
@ -87,7 +88,7 @@ def scan_directory_recursive(cp, ver, rev, url, steps, orig_url):
path = url + path
if not steps and path not in orig_url:
versions.append((path, pv))
versions.append((path, pv, HANDLER_NAME))
if steps:
ret = scan_directory_recursive(cp, ver, rev, path, steps, orig_url)
@ -99,7 +100,7 @@ def scan_directory_recursive(cp, ver, rev, url, steps, orig_url):
def scan(cpv, url):
for bu in SCANDIR_BLACKLIST_URLS:
if re.match(bu, url):
euscan.output.einfo("%s is blacklisted by rule %s" % (url, bu))
output.einfo("%s is blacklisted by rule %s" % (url, bu))
return []
resolved_url = helpers.parse_mirror(url)
@ -112,23 +113,25 @@ def scan(cpv, url):
if ver not in resolved_url:
newver = helpers.version_change_end_sep(ver)
if newver and newver in resolved_url:
euscan.output.einfo(
output.einfo(
"Version: using %s instead of %s" % (newver, ver)
)
ver = newver
template = helpers.template_from_url(resolved_url, ver)
if '${' not in template:
euscan.output.einfo(
output.einfo(
"Url doesn't seems to depend on version: %s not found in %s" %
(ver, resolved_url)
)
return []
else:
euscan.output.einfo("Scanning: %s" % template)
output.einfo("Scanning: %s" % template)
steps = helpers.generate_scan_paths(template)
return scan_directory_recursive(cp, ver, rev, "", steps, url)
ret = scan_directory_recursive(cp, ver, rev, "", steps, url)
return ret
def brute_force(cpv, url):
@ -140,15 +143,15 @@ def brute_force(cpv, url):
for bp in BRUTEFORCE_BLACKLIST_PACKAGES:
if re.match(bp, cp):
euscan.output.einfo("%s is blacklisted by rule %s" % (cp, bp))
output.einfo("%s is blacklisted by rule %s" % (cp, bp))
return []
for bp in BRUTEFORCE_BLACKLIST_URLS:
if re.match(bp, url):
euscan.output.einfo("%s is blacklisted by rule %s" % (cp, bp))
output.einfo("%s is blacklisted by rule %s" % (cp, bp))
return []
euscan.output.einfo("Generating version from " + ver)
output.einfo("Generating version from " + ver)
components = helpers.split_version(ver)
versions = helpers.gen_versions(components, CONFIG["brute-force"])
@ -159,18 +162,18 @@ def brute_force(cpv, url):
versions.remove(v)
if not versions:
euscan.output.einfo("Can't generate new versions from " + ver)
output.einfo("Can't generate new versions from " + ver)
return []
template = helpers.template_from_url(url, ver)
if '${PV}' not in template:
euscan.output.einfo(
output.einfo(
"Url doesn't seems to depend on full version: %s not found in %s" %
(ver, url))
return []
else:
euscan.output.einfo("Brute forcing: %s" % template)
output.einfo("Brute forcing: %s" % template)
result = []
@ -195,10 +198,10 @@ def brute_force(cpv, url):
if not infos:
continue
result.append([url, version])
result.append([url, version, BRUTEFORCE_HANDLER_NAME])
if len(result) > CONFIG['brute-force-false-watermark']:
euscan.output.einfo(
output.einfo(
"Broken server detected ! Skipping brute force."
)
return []

View File

@ -1,5 +1,7 @@
from euscan.handlers import generic
HANDLER_NAME = "kde"
def can_handle(cpv, url):
if url.startswith('mirror://kde/'):
@ -13,7 +15,7 @@ def clean_results(results):
for path, version in results:
if version == '5SUMS':
continue
ret.append((path, version))
ret.append((path, version, HANDLER_NAME))
return ret

View File

@ -3,8 +3,9 @@ import portage
import urllib2
import xml.dom.minidom
from euscan import helpers
import euscan
from euscan import helpers, output
HANDLER_NAME = "php"
def can_handle(cpv, url):
@ -34,7 +35,7 @@ def scan(cpv, url):
orig_url = url
url = 'http://%s/rest/r/%s/allreleases.xml' % (channel, pkg.lower())
euscan.output.einfo("Using: " + url)
output.einfo("Using: " + url)
try:
fp = helpers.urlopen(url)
@ -64,7 +65,7 @@ def scan(cpv, url):
if url == orig_url:
continue
ret.append((url, pv))
ret.append((url, pv, HANDLER_NAME))
return ret

View File

@ -3,8 +3,9 @@ import re
import portage
from euscan import helpers
import euscan
from euscan import helpers, output
HANDLER_NAME = "pypi"
def can_handle(cpv, url):
@ -26,7 +27,7 @@ def scan(cpv, url):
package = guess_package(cpv, url)
euscan.output.einfo("Using PyPi XMLRPC: " + package)
output.einfo("Using PyPi XMLRPC: " + package)
client = xmlrpclib.ServerProxy('http://pypi.python.org/pypi')
versions = client.package_releases(package)
@ -46,7 +47,7 @@ def scan(cpv, url):
continue
urls = client.release_urls(package, up_pv)
urls = " ".join([infos['url'] for infos in urls])
ret.append((urls, pv))
ret.append((urls, pv, HANDLER_NAME))
return ret

View File

@ -3,8 +3,9 @@ import portage
import json
import urllib2
from euscan import helpers
import euscan
from euscan import helpers, output
HANDLER_NAME = "rubygem"
def can_handle(cpv, url):
@ -31,13 +32,13 @@ def scan(cpv, url):
gem = guess_gem(cpv, url)
if not gem:
euscan.output.eerror("Can't guess gem name using %s and %s" % \
output.eerror("Can't guess gem name using %s and %s" % \
(cpv, url))
return []
url = 'http://rubygems.org/api/v1/versions/%s.json' % gem
euscan.output.einfo("Using: " + url)
output.einfo("Using: " + url)
try:
fp = helpers.urlopen(url)
@ -65,7 +66,7 @@ def scan(cpv, url):
if helpers.version_filtered(cp, ver, pv):
continue
url = 'http://rubygems.org/gems/%s-%s.gem' % (gem, up_pv)
ret.append((url, pv))
ret.append((url, pv, HANDLER_NAME))
return ret

View File

@ -1,8 +1,7 @@
import os
import sys
from datetime import datetime
import portage
from portage.dbapi import porttree
import gentoolkit.pprinter as pp
@ -10,28 +9,28 @@ from gentoolkit.query import Query
from gentoolkit.eclean.search import (port_settings)
from euscan import CONFIG, BLACKLIST_PACKAGES
from euscan import handlers
from euscan import helpers
import euscan
from euscan import handlers, helpers, output
def filter_versions(cp, versions):
filtered = {}
for url, version in versions:
for url, version, handler in versions:
''' Try to keep the most specific urls (determinted by the length) '''
# Try to keep the most specific urls (determinted by the length)
if version in filtered and len(url) < len(filtered[version]):
continue
''' Remove blacklisted versions '''
# Remove blacklisted versions
if helpers.version_blacklisted(cp, version):
continue
filtered[version] = url
filtered[version] = {"url": url, "handler": handler}
return [(cp, filtered[version], version) for version in filtered]
return [
(cp, filtered[version]["url"], version, filtered[version]["handler"])
for version in filtered
]
def scan_upstream_urls(cpv, urls):
@ -41,20 +40,20 @@ def scan_upstream_urls(cpv, urls):
for url in urls[filename]:
if not CONFIG['quiet'] and not CONFIG['format']:
pp.uprint()
euscan.output.einfo("SRC_URI is '%s'" % url)
output.einfo("SRC_URI is '%s'" % url)
if '://' not in url:
euscan.output.einfo("Invalid url '%s'" % url)
output.einfo("Invalid url '%s'" % url)
continue
''' Try normal scan '''
# Try normal scan
if CONFIG["scan-dir"]:
versions.extend(handlers.scan(cpv, url))
if versions and CONFIG['oneshot']:
break
''' Brute Force '''
# Brute Force
if CONFIG["brute-force"] > 0:
versions.extend(handlers.brute_force(cpv, url))
@ -79,10 +78,10 @@ def scan_upstream(query):
)
if not matches:
sys.stderr.write(
output.ewarn(
pp.warn("No package matching '%s'" % pp.pkgquery(query))
)
return []
return None
matches = sorted(matches)
pkg = matches.pop()
@ -91,29 +90,41 @@ def scan_upstream(query):
pkg = matches.pop()
if not pkg:
sys.stderr.write(pp.warn("Package '%s' only have a dev version (9999)"
% pp.pkgquery(pkg.cp)))
return []
output.ewarn(
pp.warn("Package '%s' only have a dev version (9999)"
% pp.pkgquery(pkg.cp))
)
return None
# useful data only for formatted output
output.metadata("datetime", datetime.now().isoformat(), show=False)
output.metadata("cp", pkg.cp, show=False)
output.metadata("cpv", pkg.cpv, show=False)
if pkg.cp in BLACKLIST_PACKAGES:
sys.stderr.write(
output.ewarn(
pp.warn("Package '%s' is blacklisted" % pp.pkgquery(pkg.cp))
)
return []
return None
if not CONFIG['quiet'] and not CONFIG['format']:
pp.uprint(
" * %s [%s]" % (pp.cpv(pkg.cpv), pp.section(pkg.repo_name()))
)
pp.uprint()
if not CONFIG['quiet']:
if not CONFIG['format']:
pp.uprint(
" * %s [%s]" % (pp.cpv(pkg.cpv), pp.section(pkg.repo_name()))
)
pp.uprint()
else:
output.metadata("overlay", pp.section(pkg.repo_name()))
ebuild_path = pkg.ebuild_path()
if ebuild_path:
pp.uprint('Ebuild: ' + pp.path(os.path.normpath(ebuild_path)))
output.metadata(
"ebuild", pp.path(os.path.normpath(ebuild_path))
)
pp.uprint('Repository: ' + pkg.repo_name())
pp.uprint('Homepage: ' + pkg.environment("HOMEPAGE"))
pp.uprint('Description: ' + pkg.environment("DESCRIPTION"))
output.metadata("repository", pkg.repo_name())
output.metadata("homepage", pkg.environment("HOMEPAGE"))
output.metadata("description", pkg.environment("DESCRIPTION"))
cpv = pkg.cpv
metadata = {
@ -125,11 +136,11 @@ def scan_upstream(query):
alist = porttree._parse_uri_map(cpv, metadata, use=use)
aalist = porttree._parse_uri_map(cpv, metadata)
except Exception as e:
sys.stderr.write(pp.warn("%s\n" % str(e)))
sys.stderr.write(
output.ewarn(pp.warn("%s\n" % str(e)))
output.ewarn(
pp.warn("Invalid SRC_URI for '%s'" % pp.pkgquery(cpv))
)
return []
return None
if "mirror" in portage.settings.features:
urls = aalist