euscan/handlers: rework handlers, better remote-id + watch support

Signed-off-by: Corentin Chary <corentin.chary@gmail.com>
This commit is contained in:
Corentin Chary 2012-08-03 21:50:54 +02:00
parent d262b935e6
commit 8d91237988
20 changed files with 518 additions and 509 deletions

View File

@ -298,11 +298,16 @@ def main():
exit_helper(1) exit_helper(1)
except Exception as err: except Exception as err:
import traceback
print ('-'*60)
traceback.print_exc(file=sys.stderr)
print ('-'*60)
output.eerror('%s: %s' % (query, str(err))) output.eerror('%s: %s' % (query, str(err)))
exit_helper(1) exit_helper(1)
if not ret and not CONFIG['quiet']: if not ret and not CONFIG['quiet']:
output.ewarn( output.einfo(
"Didn't find any new version, check package's homepage " + "Didn't find any new version, check package's homepage " +
"for more informations" "for more informations"
) )

View File

@ -176,8 +176,9 @@ def patch_metadata(package, watch_data, diff=False):
valid = ("uversionmangle", "versionmangle", "downloadurlmangle") valid = ("uversionmangle", "versionmangle", "downloadurlmangle")
cleaned_opts = [] cleaned_opts = []
for opt in opts.split(","): for opt in opts.split(","):
opt_name, opt_value = opt.split("=") opt_name, opt_value = opt.split("=", 1)
if opt_name in valid: if opt_name in valid:
if opt_name == "uversionmangle": opt_name = "versionmangle"
cleaned_opts.append('%s="%s"' % (opt_name, opt_value)) cleaned_opts.append('%s="%s"' % (opt_name, opt_value))
opts = " ".join(cleaned_opts) opts = " ".join(cleaned_opts)

View File

@ -1,49 +1,125 @@
import sys import os, sys
import pkgutil
from euscan import CONFIG, output from euscan import CONFIG, output
from euscan.handlers.package import handlers as pkg_handlers import euscan.mangling
from euscan.handlers.url import handlers as url_handlers
from gentoolkit.metadata import MetaData
def find_best_pkg_handler(pkg): handlers = {'package' : [], 'url' : [], 'all' : {}}
# autoimport all modules in this directory and append them to handlers list
for loader, module_name, is_pkg in pkgutil.walk_packages(__path__):
module = loader.find_module(module_name).load_module(module_name)
if not hasattr(module, 'HANDLER_NAME'):
continue
if hasattr(module, 'scan_url'):
handlers['url'].append(module)
if hasattr(module, 'scan_pkg'):
handlers['package'].append(module)
handlers['all'][module.HANDLER_NAME] = module
# sort handlers by priority
def sort_handlers(handlers):
return sorted(
handlers,
key=lambda handler: handler.PRIORITY,
reverse=True
)
handlers['package'] = sort_handlers(handlers['package'])
handlers['url'] = sort_handlers(handlers['url'])
def find_best_handler(kind, pkg, *args):
""" """
Find the best handler for the given package Find the best handler for the given package
""" """
for handler in pkg_handlers: for handler in handlers[kind]:
if handler.can_handle(pkg): if handler.can_handle(pkg, *args):
return handler return handler
return None return None
def find_handlers(kind, names):
ret = []
def find_best_url_handler(pkg, url): for name in names:
""" # Does this handler exist, and handle this kind of thing ? (pkg / url)
Find the best handler for the given url if name in handlers['all'] and handlers['all'][name] in handlers[kind]:
""" ret.append(handlers['all'][name])
for handler in url_handlers:
if handler.can_handle(pkg, url):
return handler
return None
return ret
def scan(pkg, urls, on_progress=None): def get_metadata(pkg):
""" metadata = {}
Scans upstream for the given package.
First tries if a package wide handler is available, then fallbacks
in url handling.
"""
pkg_handler = find_best_pkg_handler(pkg)
if pkg_handler:
if on_progress:
on_progress(increment=35)
if not CONFIG['quiet'] and not CONFIG['format']: pkg_metadata = None
sys.stdout.write("\n")
versions = pkg_handler.scan(pkg) meta_override = os.path.join('metadata', pkg.category, pkg.name, 'metadata.xml')
try:
if os.path.exists(meta_override):
pkg_metadata = MetaData(meta_override)
output.einfo('Using custom metadata: %s' % meta_override)
if not pkg_metadata:
pkg_metadata = pkg.metadata
except Exception, e:
output.ewarn('Error when fetching metadata: %s' % str(e))
if not pkg_metadata:
return {}
# Support multiple remote-id and multiple watch
for upstream in pkg_metadata._xml_tree.findall("upstream"):
for node in upstream.findall("watch"):
options = dict(node.attrib)
options['data'] = node.text
if "type" in options:
handler = options['type']
else:
handler = "url"
options['type'] = "url"
for key in ["versionmangle", "downloadurlmangle"]:
value = options.get(key, None)
if value:
options[key] = value.split(";")
if handler not in metadata:
metadata[handler] = []
metadata[handler].append(options)
for upstream in pkg_metadata._xml_tree.findall("upstream"):
for node in upstream.findall("remote-id"):
handler = node.attrib.get("type")
if not handler:
continue
if handler in metadata:
for i in range(len(metadata[handler])):
if not metadata[handler][i]['data']:
metadata[handler][i]['data'] = node.text
else:
metadata[handler] = [{'type' : handler, 'data' : node.text }]
return metadata
def scan_pkg(pkg_handler, pkg, options, on_progress=None):
versions = []
if on_progress: if on_progress:
on_progress(increment=35) on_progress(increment=35)
for o in options:
versions += pkg_handler.scan_pkg(pkg, o)
if on_progress:
on_progress(increment=35)
return versions return versions
def scan_url(pkg, urls, options, on_progress=None):
versions = []
if on_progress: if on_progress:
progress_available = 70 progress_available = 70
num_urls = sum([len(urls[fn]) for fn in urls]) num_urls = sum([len(urls[fn]) for fn in urls])
@ -52,16 +128,12 @@ def scan(pkg, urls, on_progress=None):
else: else:
progress_increment = 0 progress_increment = 0
versions = []
for filename in urls: for filename in urls:
for url in urls[filename]: for url in urls[filename]:
if on_progress and progress_available > 0: if on_progress and progress_available > 0:
on_progress(increment=progress_increment) on_progress(increment=progress_increment)
progress_available -= progress_increment progress_available -= progress_increment
if not CONFIG['quiet'] and not CONFIG['format']:
sys.stdout.write("\n")
output.einfo("SRC_URI is '%s'" % url) output.einfo("SRC_URI is '%s'" % url)
if '://' not in url: if '://' not in url:
@ -69,8 +141,9 @@ def scan(pkg, urls, on_progress=None):
continue continue
try: try:
url_handler = find_best_url_handler(pkg, url) url_handler = find_best_handler('url', pkg, url)
versions.extend(url_handler.scan(pkg, url)) for o in options:
versions += url_handler.scan_url(pkg, url, o)
except Exception as e: except Exception as e:
output.ewarn( output.ewarn(
"Handler failed: [%s] %s" % "Handler failed: [%s] %s" %
@ -84,3 +157,44 @@ def scan(pkg, urls, on_progress=None):
on_progress(increment=progress_available) on_progress(increment=progress_available)
return versions return versions
def scan(pkg, urls, on_progress=None):
"""
Scans upstream for the given package.
First tries if a package wide handler is available, then fallbacks
in url handling.
"""
if not CONFIG['quiet'] and not CONFIG['format']:
sys.stdout.write('\n')
metadata = get_metadata(pkg)
versions = []
pkg_handlers = find_handlers('package', metadata.keys())
if not pkg_handlers:
pkg_handler = find_best_handler('package', pkg)
if pkg_handler: pkg_handlers = [pkg_handler]
for pkg_handler in pkg_handlers:
options = metadata.get(pkg_handler.HANDLER_NAME, [{}])
versions += scan_pkg(pkg_handler, pkg, options, on_progress)
if not pkg_handlers:
versions += scan_url(pkg, urls, [{}], on_progress)
return versions
def mangle(kind, name, string):
if name not in handlers['all']:
return None
handler = handlers['all'][name]
if not hasattr(handler, 'mangle_%s' % kind):
return None
return getattr(handler, 'mangle_%s' % kind)(string)
def mangle_url(name, string):
return mangle('url', name, string)
def mangle_version(name, string):
return mangle('version', name, string)

View File

@ -3,7 +3,7 @@ import portage
import urllib2 import urllib2
import json import json
from euscan import helpers, output from euscan import helpers, output, mangling
HANDLER_NAME = "cpan" HANDLER_NAME = "cpan"
CONFIDENCE = 100 CONFIDENCE = 100
@ -11,10 +11,8 @@ PRIORITY = 90
_cpan_package_name_re = re.compile("mirror://cpan/authors/.*/([^/.]*).*") _cpan_package_name_re = re.compile("mirror://cpan/authors/.*/([^/.]*).*")
def can_handle(pkg, url=None):
def can_handle(pkg, url): return url and url.startswith('mirror://cpan/')
return url.startswith('mirror://cpan/')
def guess_package(cp, url): def guess_package(cp, url):
match = _cpan_package_name_re.search(url) match = _cpan_package_name_re.search(url)
@ -33,7 +31,7 @@ def guess_package(cp, url):
return pkg return pkg
def gentoo_mangle_version(up_pv): def mangle_version(up_pv):
# clean # clean
up_pv = up_pv.replace("._", "_") # e.g.: 0.999._002 -> 0.999_002 up_pv = up_pv.replace("._", "_") # e.g.: 0.999._002 -> 0.999_002
up_pv = up_pv.replace("_0.", "_") # e.g.: 0.30_0.1 -> 0.30_1 up_pv = up_pv.replace("_0.", "_") # e.g.: 0.30_0.1 -> 0.30_1
@ -68,53 +66,25 @@ def gentoo_mangle_version(up_pv):
if rc_part: if rc_part:
pv = "%s_rc" % pv pv = "%s_rc" % pv
return helpers.gentoo_mangle_version(pv)
def cpan_trim_version(pv):
pv = re.sub('^[a-zA-Z]+', '', pv)
pv = re.sub('[a-zA-Z]$', '', pv)
return pv return pv
def scan_url(pkg, url, options):
def cpan_mangle_version(pv):
pos = pv.find('.')
if pos < 0:
return pv
up_pv = pv.replace('.', '')
up_pv = up_pv[0:pos] + '.' + up_pv[pos:]
up_pv = cpan_trim_version(up_pv)
return up_pv
def cpan_vercmp(cp, a, b):
try:
return float(a) - float(b)
except:
if a < b:
return -1
else:
return 1
def scan(pkg, url):
cp, ver, rev = portage.pkgsplit(pkg.cpv) cp, ver, rev = portage.pkgsplit(pkg.cpv)
remote_pkg = guess_package(cp, url) remote_pkg = guess_package(cp, url)
output.einfo("Using CPAN API: %s", remote_pkg) output.einfo("Using CPAN API: %s", remote_pkg)
result = scan_remote(pkg, [remote_pkg]) return scan_pkg(pkg, {'data' : remote_pkg})
ret = [] def scan_pkg(pkg, options):
for url, pv in result: remote_pkg = options['data']
ret.append((url, pv, HANDLER_NAME, CONFIDENCE))
return ret
# Defaults to CPAN mangling rules
if 'versionmangle' not in options:
options['versionmangle'] = ['cpan', 'gentoo']
def scan_remote(pkg, remote_data):
remote_pkg = remote_data[0]
url = 'http://search.cpan.org/api/dist/%s' % remote_pkg url = 'http://search.cpan.org/api/dist/%s' % remote_pkg
cp, ver, rev = portage.pkgsplit(pkg.cpv) cp, ver, rev = pkg.cp, pkg.version, pkg.revision
try: try:
fp = helpers.urlopen(url) fp = helpers.urlopen(url)
@ -139,11 +109,9 @@ def scan_remote(pkg, remote_data):
# continue # continue
up_pv = version['version'] up_pv = version['version']
up_pv = cpan_trim_version(up_pv) pv = mangling.mangle_version(up_pv, options)
pv = gentoo_mangle_version(up_pv)
up_ver = cpan_mangle_version(ver)
if helpers.version_filtered(cp, up_ver, up_pv, cpan_vercmp): if helpers.version_filtered(cp, ver, pv):
continue continue
url = 'mirror://cpan/authors/id/%s/%s/%s/%s' % ( url = 'mirror://cpan/authors/id/%s/%s/%s/%s' % (
@ -153,6 +121,7 @@ def scan_remote(pkg, remote_data):
version['archive'] version['archive']
) )
ret.append((url, pv)) url = mangling.mangle_url(url, options)
ret.append((url, pv, HANDLER_NAME, CONFIDENCE))
return ret return ret

View File

@ -12,7 +12,7 @@ except ImportError:
import portage import portage
from euscan import CONFIG, SCANDIR_BLACKLIST_URLS, \ from euscan import CONFIG, SCANDIR_BLACKLIST_URLS, \
BRUTEFORCE_BLACKLIST_PACKAGES, BRUTEFORCE_BLACKLIST_URLS, output, helpers BRUTEFORCE_BLACKLIST_PACKAGES, BRUTEFORCE_BLACKLIST_URLS, output, helpers, mangling
HANDLER_NAME = "generic" HANDLER_NAME = "generic"
CONFIDENCE = 45 CONFIDENCE = 45
@ -69,6 +69,7 @@ def scan_html(data, url, pattern):
(".".join([x for x in match.groups() if x is not None]), (".".join([x for x in match.groups() if x is not None]),
match.group(0)) match.group(0))
) )
return results return results
@ -87,7 +88,7 @@ def scan_ftp(data, url, pattern):
return results return results
def scan_directory_recursive(cp, ver, rev, url, steps, orig_url): def scan_directory_recursive(cp, ver, rev, url, steps, orig_url, options):
if not steps: if not steps:
return [] return []
@ -120,7 +121,8 @@ def scan_directory_recursive(cp, ver, rev, url, steps, orig_url):
versions = [] versions = []
for up_pv, path in results: for up_pv, path in results:
pv = helpers.gentoo_mangle_version(up_pv) pv = mangling.mangle_version(up_pv, options)
if helpers.version_filtered(cp, ver, pv): if helpers.version_filtered(cp, ver, pv):
continue continue
if not url.endswith("/"): if not url.endswith("/"):
@ -129,16 +131,17 @@ def scan_directory_recursive(cp, ver, rev, url, steps, orig_url):
if not steps and path not in orig_url: if not steps and path not in orig_url:
confidence = confidence_score(path, orig_url) confidence = confidence_score(path, orig_url)
path = mangling.mangle_url(path, options)
versions.append((path, pv, HANDLER_NAME, confidence)) versions.append((path, pv, HANDLER_NAME, confidence))
if steps: if steps:
ret = scan_directory_recursive(cp, ver, rev, path, steps, orig_url) ret = scan_directory_recursive(cp, ver, rev, path, steps, orig_url, options)
versions.extend(ret) versions.extend(ret)
return versions return versions
def scan(pkg, url): def scan_url(pkg, url, options):
if CONFIG["scan-dir"]: if CONFIG["scan-dir"]:
for bu in SCANDIR_BLACKLIST_URLS: for bu in SCANDIR_BLACKLIST_URLS:
if re.match(bu, url): if re.match(bu, url):
@ -171,7 +174,7 @@ def scan(pkg, url):
output.einfo("Scanning: %s" % template) output.einfo("Scanning: %s" % template)
steps = helpers.generate_scan_paths(template) steps = helpers.generate_scan_paths(template)
ret = scan_directory_recursive(cp, ver, rev, "", steps, url) ret = scan_directory_recursive(cp, ver, rev, "", steps, url, options)
if not ret: if not ret:
ret = brute_force(pkg, url) ret = brute_force(pkg, url)

View File

@ -4,16 +4,15 @@ import re
import portage import portage
from euscan import helpers, output from euscan import helpers, output, mangling
HANDLER_NAME = "github" HANDLER_NAME = "github"
CONFIDENCE = 100 CONFIDENCE = 100
PRIORITY = 90 PRIORITY = 90
def can_handle(pkg, url): def can_handle(pkg, url=None):
return url.startswith('mirror://github/') return url and url.startswith('mirror://github/')
def guess_package(cp, url): def guess_package(cp, url):
match = re.search('^mirror://github/(.*?)/(.*?)/(.*)$', url) match = re.search('^mirror://github/(.*?)/(.*?)/(.*)$', url)
@ -21,8 +20,7 @@ def guess_package(cp, url):
assert(match) assert(match)
return (match.group(1), match.group(2), match.group(3)) return (match.group(1), match.group(2), match.group(3))
def scan_url(pkg, url, options):
def scan(pkg, url):
'http://developer.github.com/v3/repos/downloads/' 'http://developer.github.com/v3/repos/downloads/'
user, project, filename = guess_package(pkg.cpv, url) user, project, filename = guess_package(pkg.cpv, url)
@ -38,7 +36,8 @@ def scan(pkg, url):
fnre = re.compile('^%s$' % \ fnre = re.compile('^%s$' % \
re.escape(filename).replace(re.escape(ver), '(.*?)')) re.escape(filename).replace(re.escape(ver), '(.*?)'))
output.einfo("Using github API for: " + '/'.join(filename)) output.einfo("Using github API for: project=%s user=%s filename=%s" % \
(project, user, filename))
dlreq = urllib2.urlopen('https://api.github.com/repos/%s/%s/downloads' % \ dlreq = urllib2.urlopen('https://api.github.com/repos/%s/%s/downloads' % \
(user, project)) (user, project))
@ -49,9 +48,10 @@ def scan(pkg, url):
m = fnre.match(dl['name']) m = fnre.match(dl['name'])
if m: if m:
pv = helpers.gentoo_mangle_version(m.group(1)) pv = mangling.mangle_version(m.group(1), options)
if helpers.version_filtered(cp, ver, pv): if helpers.version_filtered(cp, ver, pv):
continue continue
ret.append((dl['html_url'], pv, HANDLER_NAME, CONFIDENCE)) url = mangling.mangle_url(dl['html_url'], options)
ret.append((url, pv, HANDLER_NAME, CONFIDENCE))
return ret return ret

View File

@ -1,4 +1,4 @@
from euscan.handlers.url import generic from euscan.handlers import generic
PRIORITY = 90 PRIORITY = 90
@ -6,10 +6,7 @@ HANDLER_NAME = "kde"
def can_handle(pkg, url): def can_handle(pkg, url):
if url.startswith('mirror://kde/'): return url and url.startswith('mirror://kde/')
return True
return False
def clean_results(results): def clean_results(results):
ret = [] ret = []
@ -22,18 +19,18 @@ def clean_results(results):
return ret return ret
def scan(pkg, url): def scan_url(pkg, url):
results = generic.scan(pkg.cpv, url) results = generic.scan(pkg.cpv, url)
if url.startswith('mirror://kde/unstable/'): if generic.startswith('mirror://kde/unstable/'):
url = url.replace('mirror://kde/unstable/', 'mirror://kde/stable/') url = generic.replace('mirror://kde/unstable/', 'mirror://kde/stable/')
results += generic.scan(pkg.cpv, url) results += generic.scan(pkg.cpv, url)
if not results: # if nothing was found go brute forcing if not results: # if nothing was found go brute forcing
results = generic.brute_force(pkg.cpv, url) results = generic.brute_force(pkg.cpv, url)
if url.startswith('mirror://kde/unstable/'): if generic.startswith('mirror://kde/unstable/'):
url = url.replace('mirror://kde/unstable/', 'mirror://kde/stable/') url = generic.replace('mirror://kde/unstable/', 'mirror://kde/stable/')
results += generic.brute_force(pkg.cpv, url) results += generic.brute_force(pkg.cpv, url)
return clean_results(results) return clean_results(results)

View File

@ -1,19 +0,0 @@
"""
Package wide handlers for scanning upstream
"""
import pkgutil
handlers = []
# autoimport all modules in this directory and append them to handlers list
for loader, module_name, is_pkg in pkgutil.walk_packages(__path__):
module = loader.find_module(module_name).load_module(module_name)
handlers.append(module)
# sort handlers by priority
handlers = sorted(
handlers,
key=lambda handler: handler.PRIORITY,
reverse=True
)

View File

@ -1,44 +0,0 @@
from euscan.handlers.url import handlers
from euscan import output
PRIORITY = 100
HANDLER_NAME = "remote_id"
CONFIDENCE = 100.0
url_handlers = {handler.HANDLER_NAME: handler for handler in handlers}
def can_handle(pkg):
# Return True if there's at least one remote-id that can be
# handled by euscan
try:
remoteids = pkg.metadata.upstream()[0].upstream_remoteids()
except IndexError:
pass
else:
if len(remoteids) > 0:
for remote_value, remote_type in remoteids:
if remote_type in url_handlers:
return True
return False
def scan(pkg):
output.einfo("Using remote-id data")
ret = []
remoteids = pkg.metadata.upstream()[0].upstream_remoteids()
for remote_value, remote_type in remoteids:
if remote_type in url_handlers:
remote_data = remote_value.split("/")
scan_remote = getattr(
url_handlers[remote_type], "scan_remote", None
)
if scan_remote:
for url, pv in scan_remote(pkg, remote_data):
name = "%s, %s" % (HANDLER_NAME, remote_type)
ret.append((url, pv, name, CONFIDENCE))
return ret

View File

@ -1,139 +0,0 @@
import re
import urllib2
import portage
from euscan.handlers.url import generic
from euscan import output, helpers
PRIORITY = 100
HANDLER_NAME = "watch"
CONFIDENCE = 100.0
is_pattern = r"\([^\/]+\)"
def can_handle(pkg):
try:
return pkg.metadata._xml_tree.find("upstream").find("watch") \
is not None
except AttributeError:
return False
def parse_mangles(mangles, string):
for mangle in mangles:
# convert regex from perl format to python format
# there are some regex in this format: s/pattern/replacement/
m = re.match(r"s/(.*[^\\])/(.*)/", mangle)
if not m:
# or in this format s|pattern|replacement|
m = re.match(r"s\|(.*[^\\])\|(.*)\|", mangle)
pattern, repl = m.groups()
repl = re.sub(r"\$(\d+)", r"\\\1", repl)
string = re.sub(pattern, repl, string)
return string
def clean_results(results, versionmangle, urlmangle):
ret = []
for path, version, _, _ in results:
version = parse_mangles(versionmangle, version)
path = parse_mangles(urlmangle, path)
ret.append((path, version, HANDLER_NAME, CONFIDENCE))
return ret
def parse_watch(pkg):
for watch_tag in pkg.metadata._xml_tree.find("upstream").findall("watch"):
try:
base, file_pattern = watch_tag.text.split(" ")[:2]
except ValueError:
base, file_pattern = watch_tag.text, None
# the file pattern can be in the base url
pattern_regex = r"/([^/]*\([^/]*\)[^/]*)$"
match = re.search(pattern_regex, base)
if match:
file_pattern = match.group(1)
base = base.replace(file_pattern, "")
# handle sf.net specially
base = base.replace(
"http://sf.net/", "http://qa.debian.org/watch/sf.php/"
)
vmangle = watch_tag.attrib.get("uversionmangle", None) or \
watch_tag.attrib.get("versionmangle", None)
versionmangle = vmangle.split(";") if vmangle else []
umangle = watch_tag.attrib.get("downloadurlmangle", None)
urlmangle = umangle.split(";") if umangle else []
yield (base, file_pattern, versionmangle, urlmangle)
def handle_directory_patterns(base, file_pattern):
"""
Directory pattern matching
e.g.: base: ftp://ftp.nessus.org/pub/nessus/nessus-([\d\.]+)/src/
file_pattern: nessus-core-([\d\.]+)\.tar\.gz
"""
splitted = base.split("/")
i = 0
basedir = []
for elem in splitted:
if re.search(is_pattern, elem):
break
basedir.append(elem)
i += 1
basedir = "/".join(basedir)
directory_pattern = splitted[i]
final = "/".join(splitted[i + 1:])
try:
fp = helpers.urlopen(basedir)
except urllib2.URLError:
return []
except IOError:
return []
if not fp:
return []
data = fp.read()
if basedir.startswith("ftp://"):
scan_data = generic.scan_ftp(data, basedir, directory_pattern)
else:
scan_data = generic.scan_html(data, basedir, directory_pattern)
return [("/".join((basedir, path, final)), file_pattern)
for _, path in scan_data]
def scan(pkg):
output.einfo("Using watch data")
cp, ver, rev = portage.pkgsplit(pkg.cpv)
results = []
for base, file_pattern, versionmangle, urlmangle in parse_watch(pkg):
if not re.search(is_pattern, base):
steps = [(base, file_pattern)]
res = generic.scan_directory_recursive(
cp, ver, rev, "", steps, base
)
else:
res = []
for step in handle_directory_patterns(base, file_pattern):
res += generic.scan_directory_recursive(
cp, ver, rev, "", [step], base
)
results += clean_results(res, versionmangle, urlmangle)
return results

View File

@ -0,0 +1,11 @@
from euscan.handlers import php
HANDLER_NAME = "pear"
CONFIDENCE = 100
PRIORITY = 90
def can_handle(pkg, url=None):
return url and url.startswith('http://%s.php.net/get/' % HANDLER_NAME)
scan_url = php.scan_url
scan_pkg = php.scan_pkg

View File

@ -0,0 +1,11 @@
from euscan.handlers import php
HANDLER_NAME = "pecl"
CONFIDENCE = 100
PRIORITY = 90
def can_handle(pkg, url=None):
return url and url.startswith('http://%s.php.net/get/' % HANDLER_NAME)
scan_url = php.scan_url
scan_pkg = php.scan_pkg

View File

@ -3,23 +3,17 @@ import portage
import urllib2 import urllib2
import xml.dom.minidom import xml.dom.minidom
from euscan import helpers, output from euscan import helpers, output, mangling
HANDLER_NAME = "php" HANDLER_NAME = "php"
CONFIDENCE = 100 CONFIDENCE = 100
PRIORITY = 90 PRIORITY = 90
def can_handle(pkg, url=None):
def can_handle(pkg, url):
if url.startswith('http://pear.php.net/get/'):
return True
if url.startswith('http://pecl.php.net/get/'):
return True
return False return False
def guess_package_and_channel(cp, url): def guess_package_and_channel(cp, url):
match = re.search('http://(.*)/get/(.*)-(.*).tgz', url) match = re.search('http://(.*)\.php\.net/get/(.*)-(.*).tgz', url)
if match: if match:
host = match.group(1) host = match.group(1)
@ -30,12 +24,17 @@ def guess_package_and_channel(cp, url):
return pkg, host return pkg, host
def scan(pkg, url): def scan_url(pkg, url, options):
cp, ver, rev = portage.pkgsplit(pkg.cpv) package, channel = guess_package_and_channel(pkg.cp, url)
package, channel = guess_package_and_channel(cp, url) return scan_pkg(pkg, {'type' : channel, 'data' : package })
orig_url = url def scan_pkg(pkg, options):
url = 'http://%s/rest/r/%s/allreleases.xml' % (channel, package.lower()) cp, ver, rev = pkg.cp, pkg.version, pkg.revision
package = options['data']
channel = options['type']
url = 'http://%s.php.net/rest/r/%s/allreleases.xml' % (channel, package.lower())
output.einfo("Using: " + url) output.einfo("Using: " + url)
@ -58,14 +57,12 @@ def scan(pkg, url):
for node in nodes: for node in nodes:
up_pv = node.childNodes[0].data up_pv = node.childNodes[0].data
pv = helpers.gentoo_mangle_version(up_pv) pv = mangling.mangle_version(up_pv, options)
if helpers.version_filtered(cp, ver, pv): if helpers.version_filtered(cp, ver, pv):
continue continue
url = 'http://%s/get/%s-%s.tgz' % (channel, package, up_pv) url = 'http://%s.php.net/get/%s-%s.tgz' % (channel, package, up_pv)
url = mangling.mangle_url(url, options)
if url == orig_url:
continue
ret.append((url, pv, HANDLER_NAME, CONFIDENCE)) ret.append((url, pv, HANDLER_NAME, CONFIDENCE))

View File

@ -3,15 +3,15 @@ import re
import portage import portage
from euscan import helpers, output from euscan import mangling, helpers, output
HANDLER_NAME = "pypi" HANDLER_NAME = "pypi"
CONFIDENCE = 100 CONFIDENCE = 100
PRIORITY = 90 PRIORITY = 90
def can_handle(pkg, url): def can_handle(pkg, url=None):
return url.startswith('mirror://pypi/') return url and url.startswith('mirror://pypi/')
def guess_package(cp, url): def guess_package(cp, url):
@ -24,19 +24,15 @@ def guess_package(cp, url):
return pkg return pkg
def scan(pkg, url): def scan_url(pkg, url, options):
'http://wiki.python.org/moin/PyPiXmlRpc' 'http://wiki.python.org/moin/PyPiXmlRpc'
package = guess_package(pkg.cpv, url) package = guess_package(pkg.cpv, url)
return scan_kg(pkg, [package])
ret = []
for urls, pv in scan_remote(pkg, [package]):
ret.append((urls, pv, HANDLER_NAME, CONFIDENCE))
return ret
def scan_remote(pkg, remote_data): def scan_pkg(pkg, options):
package = remote_data[0] package = options['data']
output.einfo("Using PyPi XMLRPC: " + package) output.einfo("Using PyPi XMLRPC: " + package)
@ -52,10 +48,10 @@ def scan_remote(pkg, remote_data):
ret = [] ret = []
for up_pv in versions: for up_pv in versions:
pv = helpers.gentoo_mangle_version(up_pv) pv = mangling.mangle_version(up_pv, options)
if helpers.version_filtered(cp, ver, pv): if helpers.version_filtered(cp, ver, pv):
continue continue
urls = client.release_urls(package, up_pv) urls = client.release_urls(package, up_pv)
urls = " ".join([infos['url'] for infos in urls]) urls = " ".join([mangling.mangle_url(infos['url'], options) for infos in urls])
ret.append((urls, pv)) ret.append((urls, pv, HANDLER_NAME, CONFIDENCE))
return ret return ret

View File

@ -3,15 +3,15 @@ import portage
import json import json
import urllib2 import urllib2
from euscan import helpers, output from euscan import helpers, output, mangling
HANDLER_NAME = "rubygems" HANDLER_NAME = "rubygems"
CONFIDENCE = 100 CONFIDENCE = 100
PRIORITY = 90 PRIORITY = 90
def can_handle(pkg, url): def can_handle(pkg, url=None):
return url.startswith('mirror://rubygems/') return url and url.startswith('mirror://rubygems/')
def guess_gem(cpv, url): def guess_gem(cpv, url):
@ -29,7 +29,7 @@ def guess_gem(cpv, url):
return pkg return pkg
def scan(pkg, url): def scan_url(pkg, url, options):
'http://guides.rubygems.org/rubygems-org-api/#gemversion' 'http://guides.rubygems.org/rubygems-org-api/#gemversion'
gem = guess_gem(pkg.cpv, url) gem = guess_gem(pkg.cpv, url)
@ -41,14 +41,11 @@ def scan(pkg, url):
output.einfo("Using RubyGem API: %s" % gem) output.einfo("Using RubyGem API: %s" % gem)
ret = [] return scan_pkg(pkg, {'data' : gem})
for url, pv in scan_remote(pkg, [gem]):
ret.append(url, pv, HANDLER_NAME, CONFIDENCE)
return ret
def scan_remote(pkg, remote_data): def scan_pkg(pkg, options):
gem = remote_data[0] gem = options['data']
url = 'http://rubygems.org/api/v1/versions/%s.json' % gem url = 'http://rubygems.org/api/v1/versions/%s.json' % gem
try: try:
@ -69,9 +66,10 @@ def scan_remote(pkg, remote_data):
ret = [] ret = []
for version in versions: for version in versions:
up_pv = version['number'] up_pv = version['number']
pv = helpers.gentoo_mangle_version(up_pv) pv = mangling.mangle_version(up_pv, options)
if helpers.version_filtered(cp, ver, pv): if helpers.version_filtered(cp, ver, pv):
continue continue
url = 'http://rubygems.org/gems/%s-%s.gem' % (gem, up_pv) url = 'http://rubygems.org/gems/%s-%s.gem' % (gem, up_pv)
ret.append((url, pv)) url = mangling.mangle_url(url, options)
ret.append((url, pv, HANDLER_NAME, CONFIDENCE))
return ret return ret

View File

@ -0,0 +1,98 @@
import re
import urllib2
import portage
import generic
from euscan import output, helpers
PRIORITY = 100
HANDLER_NAME = "url"
CONFIDENCE = 100.0
is_pattern = r"\([^\/]+\)"
def can_handle(*args):
return False
def handle_directory_patterns(base, file_pattern):
"""
Directory pattern matching
e.g.: base: ftp://ftp.nessus.org/pub/nessus/nessus-([\d\.]+)/src/
file_pattern: nessus-core-([\d\.]+)\.tar\.gz
"""
splitted = base.split("/")
i = 0
basedir = []
for elem in splitted:
if re.search(is_pattern, elem):
break
basedir.append(elem)
i += 1
basedir = "/".join(basedir)
directory_pattern = splitted[i]
final = "/".join(splitted[i + 1:])
try:
fp = helpers.urlopen(basedir)
except urllib2.URLError:
return []
except IOError:
return []
if not fp:
return []
data = fp.read()
if basedir.startswith("ftp://"):
scan_data = generic.scan_ftp(data, basedir, directory_pattern)
else:
scan_data = generic.scan_html(data, basedir, directory_pattern)
return [("/".join((basedir, path, final)), file_pattern)
for _, path in scan_data]
def read_options(options):
try:
base, file_pattern = options['data'].split(" ")[:2]
except ValueError:
base, file_pattern = options['data'], None
# the file pattern can be in the base url
pattern_regex = r"/([^/]*\([^/]*\)[^/]*)$"
match = re.search(pattern_regex, base)
if match:
file_pattern = match.group(1)
base = base.replace(file_pattern, "")
# handle sf.net specially
base = base.replace(
"http://sf.net/", "http://qa.debian.org/watch/sf.php/"
)
return base, file_pattern
def scan_pkg(pkg, options):
output.einfo("Using watch data")
cp, ver, rev = pkg.cp, pkg.version, pkg.revision
base, file_pattern = read_options(options)
results = []
if not re.search(is_pattern, base):
steps = [(base, file_pattern)]
results = generic.scan_directory_recursive(
cp, ver, rev, "", steps, base, options
)
else:
for step in handle_directory_patterns(base, file_pattern):
results += generic.scan_directory_recursive(
cp, ver, rev, "", [step], base, options
)
return results

View File

@ -1,19 +0,0 @@
"""
Url wide handlers for scanning upstream
"""
import pkgutil
handlers = []
# autoimport all modules in this directory and append them to handlers list
for loader, module_name, is_pkg in pkgutil.walk_packages(__path__):
module = loader.find_module(module_name).load_module(module_name)
handlers.append(module)
# sort handlers by priority
handlers = sorted(
handlers,
key=lambda handler: handler.PRIORITY,
reverse=True
)

View File

@ -34,120 +34,6 @@ _v_end = r'(?:(?:-|_)(?:pre|p|beta|b|alpha|a|rc|r)\d*)'
_v = r'((?:\d+)(?:(?:\.\d+)*)(?:[a-zA-Z]*?)(?:' + _v_end + '*))' _v = r'((?:\d+)(?:(?:\.\d+)*)(?:[a-zA-Z]*?)(?:' + _v_end + '*))'
# Stolen from g-pypi
def gentoo_mangle_version(up_pv):
"""Convert PV to MY_PV if needed
:param up_pv: Upstream package version
:type up_pv: string
:returns: pv
:rtype: string
Can't determine PV from upstream's version.
Do our best with some well-known versioning schemes:
* 1.0a1 (1.0_alpha1)
* 1.0-a1 (1.0_alpha1)
* 1.0b1 (1.0_beta1)
* 1.0-b1 (1.0_beta1)
* 1.0-r1234 (1.0_pre1234)
* 1.0dev-r1234 (1.0_pre1234)
* 1.0.dev-r1234 (1.0_pre1234)
* 1.0dev-20091118 (1.0_pre20091118)
Regex match.groups():
* pkgfoo-1.0.dev-r1234
* group 1 pv major (1.0)
* group 2 replace this with portage suffix (.dev-r)
* group 3 suffix version (1234)
The order of the regexes is significant. For instance if you have
.dev-r123, dev-r123 and -r123 you should order your regex's in
that order.
The chronological portage release versions are:
* _alpha
* _beta
* _pre
* _rc
* release
* _p
**Example:**
>>> gentoo_mangle_version('1.0b2')
'1.0_beta2'
.. note::
The number of regex's could have been reduced, but we use four
number of match.groups every time to simplify the code
"""
bad_suffixes = re.compile(
r'((?:[._-]*)(?:dev|devel|final|stable|snapshot)$)', re.I)
revision_suffixes = re.compile(
r'(.*?)([\._-]*(?:r|patch|p)[\._-]*)([0-9]*)$', re.I)
suf_matches = {
'_pre': [
r'(.*?)([\._-]*dev[\._-]*r?)([0-9]+)$',
r'(.*?)([\._-]*(?:pre|preview)[\._-]*)([0-9]*)$',
],
'_alpha': [
r'(.*?)([\._-]*(?:alpha|test)[\._-]*)([0-9]*)$',
r'(.*?)([\._-]*a[\._-]*)([0-9]*)$',
r'(.*[^a-z])(a)([0-9]*)$',
],
'_beta': [
r'(.*?)([\._-]*beta[\._-]*)([0-9]*)$',
r'(.*?)([\._-]*b)([0-9]*)$',
r'(.*[^a-z])(b)([0-9]*)$',
],
'_rc': [
r'(.*?)([\._-]*rc[\._-]*)([0-9]*)$',
r'(.*?)([\._-]*c[\._-]*)([0-9]*)$',
r'(.*[^a-z])(c[\._-]*)([0-9]+)$',
],
}
rs_match = None
pv = up_pv
additional_version = ""
rev_match = revision_suffixes.search(up_pv)
if rev_match:
pv = up_pv = rev_match.group(1)
replace_me = rev_match.group(2)
rev = rev_match.group(3)
additional_version = '_p' + rev
for this_suf in suf_matches.keys():
if rs_match:
break
for regex in suf_matches[this_suf]:
rsuffix_regex = re.compile(regex, re.I)
rs_match = rsuffix_regex.match(up_pv)
if rs_match:
portage_suffix = this_suf
break
if rs_match:
# e.g. 1.0.dev-r1234
major_ver = rs_match.group(1) # 1.0
replace_me = rs_match.group(2) # .dev-r
rev = rs_match.group(3) # 1234
pv = major_ver + portage_suffix + rev
else:
# Single suffixes with no numeric component are simply removed.
match = bad_suffixes.search(up_pv)
if match:
suffix = match.groups()[0]
pv = up_pv[: - (len(suffix))]
pv = pv + additional_version
return pv
def cast_int_components(version): def cast_int_components(version):
for i, obj in enumerate(version): for i, obj in enumerate(version):
try: try:
@ -520,7 +406,6 @@ def basedir_from_template(template):
return template[0:idx] return template[0:idx]
def generate_scan_paths(url): def generate_scan_paths(url):
prefix, chunks = url.split('://') prefix, chunks = url.split('://')
chunks = chunks.split('/') chunks = chunks.split('/')

163
pym/euscan/mangling.py Normal file
View File

@ -0,0 +1,163 @@
import re
import euscan.handlers
def apply_mangling_rule(mangle, string):
# convert regex from perl format to python format
# there are some regex in this format: s/pattern/replacement/
m = re.match(r"s/(.*[^\\])/(.*)/", mangle)
if not m:
# or in this format s|pattern|replacement|
m = re.match(r"s\|(.*[^\\])\|(.*)\|", mangle)
if not m: # Not a known regex format
return string
pattern, repl = m.groups()
repl = re.sub(r"\$(\d+)", r"\\\1", repl)
return re.sub(pattern, repl, string)
def apply_mangling_rules(kind, rules, string):
"""
Apply multiple mangling rules (both sed-like and handlers)
in order
"""
if kind not in rules:
return string
for rule in rules[kind]:
ret = None
# First try handlers rules
if rule == 'gentoo' and kind == 'version':
ret = gentoo_mangle_version(string)
elif kind == 'downloadurlmangle':
ret = euscan.handlers.mangle_url(rule, string)
elif kind == 'versionmangle':
ret = euscan.handlers.mangle_version(rule, string)
if ret is not None: # Use return value as new string if not None
string = ret
else: # Apply sed like rules
string = apply_mangling_rule(rule, string)
return string
def mangle_version(up_pv, options):
return apply_mangling_rules('versionmangle', options, up_pv)
def mangle_url(url, options):
return apply_mangling_rules('downloadurlmangle', options, url)
# Stolen from g-pypi
def gentoo_mangle_version(up_pv):
"""Convert PV to MY_PV if needed
:param up_pv: Upstream package version
:type up_pv: string
:returns: pv
:rtype: string
Can't determine PV from upstream's version.
Do our best with some well-known versioning schemes:
* 1.0a1 (1.0_alpha1)
* 1.0-a1 (1.0_alpha1)
* 1.0b1 (1.0_beta1)
* 1.0-b1 (1.0_beta1)
* 1.0-r1234 (1.0_pre1234)
* 1.0dev-r1234 (1.0_pre1234)
* 1.0.dev-r1234 (1.0_pre1234)
* 1.0dev-20091118 (1.0_pre20091118)
Regex match.groups():
* pkgfoo-1.0.dev-r1234
* group 1 pv major (1.0)
* group 2 replace this with portage suffix (.dev-r)
* group 3 suffix version (1234)
The order of the regexes is significant. For instance if you have
.dev-r123, dev-r123 and -r123 you should order your regex's in
that order.
The chronological portage release versions are:
* _alpha
* _beta
* _pre
* _rc
* release
* _p
**Example:**
>>> gentoo_mangle_version('1.0b2')
'1.0_beta2'
.. note::
The number of regex's could have been reduced, but we use four
number of match.groups every time to simplify the code
"""
bad_suffixes = re.compile(
r'((?:[._-]*)(?:dev|devel|final|stable|snapshot)$)', re.I)
revision_suffixes = re.compile(
r'(.*?)([\._-]*(?:r|patch|p)[\._-]*)([0-9]*)$', re.I)
suf_matches = {
'_pre': [
r'(.*?)([\._-]*dev[\._-]*r?)([0-9]+)$',
r'(.*?)([\._-]*(?:pre|preview)[\._-]*)([0-9]*)$',
],
'_alpha': [
r'(.*?)([\._-]*(?:alpha|test)[\._-]*)([0-9]*)$',
r'(.*?)([\._-]*a[\._-]*)([0-9]*)$',
r'(.*[^a-z])(a)([0-9]*)$',
],
'_beta': [
r'(.*?)([\._-]*beta[\._-]*)([0-9]*)$',
r'(.*?)([\._-]*b)([0-9]*)$',
r'(.*[^a-z])(b)([0-9]*)$',
],
'_rc': [
r'(.*?)([\._-]*rc[\._-]*)([0-9]*)$',
r'(.*?)([\._-]*c[\._-]*)([0-9]*)$',
r'(.*[^a-z])(c[\._-]*)([0-9]+)$',
],
}
rs_match = None
pv = up_pv
additional_version = ""
rev_match = revision_suffixes.search(up_pv)
if rev_match:
pv = up_pv = rev_match.group(1)
replace_me = rev_match.group(2)
rev = rev_match.group(3)
additional_version = '_p' + rev
for this_suf in suf_matches.keys():
if rs_match:
break
for regex in suf_matches[this_suf]:
rsuffix_regex = re.compile(regex, re.I)
rs_match = rsuffix_regex.match(up_pv)
if rs_match:
portage_suffix = this_suf
break
if rs_match:
# e.g. 1.0.dev-r1234
major_ver = rs_match.group(1) # 1.0
replace_me = rs_match.group(2) # .dev-r
rev = rs_match.group(3) # 1234
pv = major_ver + portage_suffix + rev
else:
# Single suffixes with no numeric component are simply removed.
match = bad_suffixes.search(up_pv)
if match:
suffix = match.groups()[0]
pv = up_pv[: - (len(suffix))]
pv = pv + additional_version
return pv

View File

@ -44,24 +44,6 @@ def filter_versions(cp, versions):
] ]
# gentoolkit stores PORTDB, so even if we modify it to add an overlay
# it will still use the old dbapi
def reload_gentoolkit():
from gentoolkit import dbapi
import gentoolkit.package
import gentoolkit.query
PORTDB = portage.db[portage.root]["porttree"].dbapi
dbapi.PORTDB = PORTDB
if hasattr(dbapi, 'PORTDB'):
dbapi.PORTDB = PORTDB
if hasattr(gentoolkit.package, 'PORTDB'):
gentoolkit.package.PORTDB = PORTDB
if hasattr(gentoolkit.query, 'PORTDB'):
gentoolkit.query.PORTDB = PORTDB
def scan_upstream(query, on_progress=None): def scan_upstream(query, on_progress=None):
""" """
Scans the upstream searching new versions for the given query Scans the upstream searching new versions for the given query