From 8062fddc2369a34bb0229018351c7f2fc4d4a2b0 Mon Sep 17 00:00:00 2001 From: Corentin Chary Date: Mon, 2 Jul 2012 18:16:59 +0200 Subject: [PATCH 1/3] euscanwww: move processing to scan and misc, enhance update_portage_tree update_portage_tree() now: - watch stderr and stdout for each command - use layman command instead of layman API for sync because layman API doesn't work when stdout or stderr is not a real file (we could probably work around that with pipes and epoll) - use egencache instead of emerge to generate cache - export PORTAGE_CONFIGROOT, ROOT, EIX_CACHEFILE etc.. so they are used everywhere Signed-off-by: Corentin Chary --- .../management/commands/regen_rrds.py | 3 +- .../management/commands/scan_metadata.py | 2 +- .../management/commands/scan_portage.py | 2 +- .../management/commands/scan_upstream.py | 2 +- .../management/commands/update_counters.py | 2 +- .../commands/update_portage_trees.py | 16 +++ euscanwww/djeuscan/processing/__init__.py | 5 +- .../djeuscan/processing/misc/__init__.py | 5 + .../processing/{ => misc}/regen_rrds.py | 1 - .../processing/{ => misc}/update_counters.py | 6 +- .../processing/misc/update_portage_trees.py | 108 ++++++++++++++++++ .../djeuscan/processing/scan/__init__.py | 7 ++ .../processing/{ => scan}/scan_metadata.py | 0 .../processing/{ => scan}/scan_portage.py | 0 .../processing/{ => scan}/scan_upstream.py | 84 +++++++------- .../processing/update_portage_trees.py | 66 ----------- euscanwww/euscanwww/settings.py | 10 +- 17 files changed, 194 insertions(+), 125 deletions(-) create mode 100644 euscanwww/djeuscan/management/commands/update_portage_trees.py create mode 100644 euscanwww/djeuscan/processing/misc/__init__.py rename euscanwww/djeuscan/processing/{ => misc}/regen_rrds.py (99%) rename euscanwww/djeuscan/processing/{ => misc}/update_counters.py (97%) create mode 100644 euscanwww/djeuscan/processing/misc/update_portage_trees.py create mode 100644 euscanwww/djeuscan/processing/scan/__init__.py rename euscanwww/djeuscan/processing/{ => scan}/scan_metadata.py (100%) rename euscanwww/djeuscan/processing/{ => scan}/scan_portage.py (100%) rename euscanwww/djeuscan/processing/{ => scan}/scan_upstream.py (68%) delete mode 100644 euscanwww/djeuscan/processing/update_portage_trees.py diff --git a/euscanwww/djeuscan/management/commands/regen_rrds.py b/euscanwww/djeuscan/management/commands/regen_rrds.py index a53f44d..4a708f5 100644 --- a/euscanwww/djeuscan/management/commands/regen_rrds.py +++ b/euscanwww/djeuscan/management/commands/regen_rrds.py @@ -2,11 +2,10 @@ import logging from django.core.management.base import BaseCommand from djeuscan.processing import set_verbosity_level -from djeuscan.processing.regen_rrds import regen_rrds +from djeuscan.processing.misc import regen_rrds logger = logging.getLogger(__name__) - class Command(BaseCommand): _overlays = {} help = 'Regenerate rrd database' diff --git a/euscanwww/djeuscan/management/commands/scan_metadata.py b/euscanwww/djeuscan/management/commands/scan_metadata.py index 3d98a36..891893b 100644 --- a/euscanwww/djeuscan/management/commands/scan_metadata.py +++ b/euscanwww/djeuscan/management/commands/scan_metadata.py @@ -5,7 +5,7 @@ from optparse import make_option from django.core.management.base import BaseCommand from djeuscan.processing import set_verbosity_level -from djeuscan.processing.scan_metadata import scan_metadata +from djeuscan.processing.scan import scan_metadata logger = logging.getLogger(__name__) diff --git a/euscanwww/djeuscan/management/commands/scan_portage.py b/euscanwww/djeuscan/management/commands/scan_portage.py index 0e3bed9..e40778c 100644 --- a/euscanwww/djeuscan/management/commands/scan_portage.py +++ b/euscanwww/djeuscan/management/commands/scan_portage.py @@ -5,7 +5,7 @@ from optparse import make_option from django.core.management.base import BaseCommand from djeuscan.processing import set_verbosity_level -from djeuscan.processing.scan_portage import scan_portage +from djeuscan.processing.scan import scan_portage logger = logging.getLogger(__name__) diff --git a/euscanwww/djeuscan/management/commands/scan_upstream.py b/euscanwww/djeuscan/management/commands/scan_upstream.py index f185c38..e941e73 100644 --- a/euscanwww/djeuscan/management/commands/scan_upstream.py +++ b/euscanwww/djeuscan/management/commands/scan_upstream.py @@ -5,7 +5,7 @@ from optparse import make_option from django.core.management.base import BaseCommand from djeuscan.processing import set_verbosity_level -from djeuscan.processing.scan_upstream import scan_upstream +from djeuscan.processing.scan import scan_upstream logger = logging.getLogger(__name__) diff --git a/euscanwww/djeuscan/management/commands/update_counters.py b/euscanwww/djeuscan/management/commands/update_counters.py index 100d5a7..35d1830 100644 --- a/euscanwww/djeuscan/management/commands/update_counters.py +++ b/euscanwww/djeuscan/management/commands/update_counters.py @@ -4,7 +4,7 @@ from optparse import make_option from django.core.management.base import BaseCommand from djeuscan.processing import set_verbosity_level -from djeuscan.processing.update_counters import update_counters +from djeuscan.processing.misc import update_counters logger = logging.getLogger(__name__) diff --git a/euscanwww/djeuscan/management/commands/update_portage_trees.py b/euscanwww/djeuscan/management/commands/update_portage_trees.py new file mode 100644 index 0000000..b114548 --- /dev/null +++ b/euscanwww/djeuscan/management/commands/update_portage_trees.py @@ -0,0 +1,16 @@ +import logging +from django.core.management.base import BaseCommand + +from djeuscan.processing import set_verbosity_level +from djeuscan.processing.misc import update_portage_trees + +logger = logging.getLogger(__name__) + + +class Command(BaseCommand): + _overlays = {} + help = 'Regenerate rrd database' + + def handle(self, *args, **options): + set_verbosity_level(logger, options.get("verbosity", 1)) + update_portage_trees(logger=logger) diff --git a/euscanwww/djeuscan/processing/__init__.py b/euscanwww/djeuscan/processing/__init__.py index 4a203b7..97bed46 100644 --- a/euscanwww/djeuscan/processing/__init__.py +++ b/euscanwww/djeuscan/processing/__init__.py @@ -1,5 +1,3 @@ -import logging - class FakeLogger(object): def __getattr__(self, key): @@ -7,6 +5,8 @@ class FakeLogger(object): def set_verbosity_level(logger, verbosity): + import logging + try: verbosity = int(verbosity) except (ValueError, TypeError): @@ -29,3 +29,4 @@ def set_verbosity_level(logger, verbosity): logger.setLevel(levels[verbosity]) return logger + diff --git a/euscanwww/djeuscan/processing/misc/__init__.py b/euscanwww/djeuscan/processing/misc/__init__.py new file mode 100644 index 0000000..b5664ad --- /dev/null +++ b/euscanwww/djeuscan/processing/misc/__init__.py @@ -0,0 +1,5 @@ +__all__ = ["regen_rrds", "update_counters", "update_portage_trees"] + +from regen_rrds import regen_rrds +from update_counters import update_counters +from update_portage_trees import update_portage_trees diff --git a/euscanwww/djeuscan/processing/regen_rrds.py b/euscanwww/djeuscan/processing/misc/regen_rrds.py similarity index 99% rename from euscanwww/djeuscan/processing/regen_rrds.py rename to euscanwww/djeuscan/processing/misc/regen_rrds.py index f578aa7..da548a1 100644 --- a/euscanwww/djeuscan/processing/regen_rrds.py +++ b/euscanwww/djeuscan/processing/misc/regen_rrds.py @@ -3,7 +3,6 @@ from djeuscan import charts from djeuscan.processing import FakeLogger - def regen_rrds(logger=None): """ Regenerates the rrd database diff --git a/euscanwww/djeuscan/processing/update_counters.py b/euscanwww/djeuscan/processing/misc/update_counters.py similarity index 97% rename from euscanwww/djeuscan/processing/update_counters.py rename to euscanwww/djeuscan/processing/misc/update_counters.py index 54cafed..09ba1db 100644 --- a/euscanwww/djeuscan/processing/update_counters.py +++ b/euscanwww/djeuscan/processing/misc/update_counters.py @@ -157,17 +157,17 @@ def update_counters(fast=False, nolog=False, logger=None): return for clog in categories.values(): - logger.info('+ [cl] %s\n' % clog) + logger.info('+ [cl] %s' % clog) charts.rrd_update('category-%s' % clog.category, now, clog) clog.save() for hlog in herds.values(): - logger.info('+ [hl] %s\n' % hlog) + logger.info('+ [hl] %s' % hlog) charts.rrd_update('herd-%d' % hlog.herd.id, now, hlog) hlog.save() for mlog in maintainers.values(): - logger.info('+ [ml] %s\n' % mlog) + logger.info('+ [ml] %s' % mlog) charts.rrd_update('maintainer-%d' % mlog.maintainer.id, now, mlog) mlog.save() diff --git a/euscanwww/djeuscan/processing/misc/update_portage_trees.py b/euscanwww/djeuscan/processing/misc/update_portage_trees.py new file mode 100644 index 0000000..e8d07c3 --- /dev/null +++ b/euscanwww/djeuscan/processing/misc/update_portage_trees.py @@ -0,0 +1,108 @@ +import os + +from django.conf import settings + +def _launch_command(cmd, logger=None): + """ + Helper for launching shell commands inside tasks + """ + import sys + import subprocess + import select + + fp = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + + mask = select.EPOLLIN|select.EPOLLHUP|select.EPOLLERR + + epoll = select.epoll() + epoll.register(fp.stdout.fileno(), mask) + epoll.register(fp.stderr.fileno(), mask) + + if logger: + info, error = logger.info, logger.error + else: + info = lambda x: sys.stdout.write(x + '\n') + error = lambda x: sys.stderr.write(x + '\n') + + try: + exited = False + while not exited: + events = epoll.poll(1) + for fileno, event in events: + if event & select.EPOLLIN: + if fileno == fp.stdout.fileno(): + source, out = fp.stdout, info + else: + source, out = fp.stderr, error + line = source.readline().rstrip('\n') + out("%s[%s]: %s" % (cmd[0], fp.pid, line)) + elif event & (select.EPOLLERR|select.EPOLLHUP): + exited = True + finally: + epoll.close() + + fp.wait() + +def emerge_sync(logger): + """ + Launches an emerge --sync + """ + cmd = ["emerge", "--sync", "--root", settings.PORTAGE_ROOT, + "--config-root", settings.PORTAGE_CONFIGROOT] + return _launch_command(cmd, logger) + + +def layman_sync(logger, cache=True): + """ + Syncs Layman repos + """ + from layman import Layman + import shutil + + l = Layman(config=settings.LAYMAN_CONFIG) + + installed_overlays = l.get_installed() + + for overlay in installed_overlays: + logger.info('Cleaning cache for overlay %s...' % overlay) + overlay_path = os.path.join(l.config['storage'], overlay) + shutil.rmtree(os.path.join(overlay_path, 'metadata/cache'), True) + shutil.rmtree(os.path.join(overlay_path, 'metadata/md5-cache'), True) + + # FIXME, try to find a way to log layman output... + #l.sync(installed_overlays, output_results=False) + cmd = ['layman', '-S', '--config', settings.LAYMAN_CONFIG] + _launch_command(cmd, logger) + + cmd = ['egencache', '--jobs', "%s" % settings.EGENCACHE_JOBS, + '--rsync', '--config-root', settings.PORTAGE_CONFIGROOT, + '--update', '--update-use-local-desc'] + + for overlay in installed_overlays: + logger.info('Generating cache for overlay %s...' % overlay) + overlay_path = os.path.join(l.config['storage'], overlay) + if not os.path.exists(os.path.join(overlay_path, 'profiles/repo_name')): + continue + _launch_command(cmd + ['--repo', overlay], logger) + +def eix_update(logger): + """ + Launches eix-update + """ + cmd = ["eix-update"] + return _launch_command(cmd, logger) + + +def update_portage_trees(logger=None): + from djeuscan.processing import FakeLogger + + logger = logger or FakeLogger() + logger.info("Running emerge --sync") + emerge_sync(logger) + logger.info("Running layman --sync") + layman_sync(logger, cache=True) + #logger.info("Running emerge --regen") + #emerge_regen() + logger.info("Running eix-update") + eix_update(logger) + logger.info("Done!") diff --git a/euscanwww/djeuscan/processing/scan/__init__.py b/euscanwww/djeuscan/processing/scan/__init__.py new file mode 100644 index 0000000..611ee4c --- /dev/null +++ b/euscanwww/djeuscan/processing/scan/__init__.py @@ -0,0 +1,7 @@ +__all__ = [ + "scan_metadata", "scan_portage", "scan_upstream", +] + +from scan_metadata import scan_metadata +from scan_portage import scan_portage +from scan_upstream import scan_upstream diff --git a/euscanwww/djeuscan/processing/scan_metadata.py b/euscanwww/djeuscan/processing/scan/scan_metadata.py similarity index 100% rename from euscanwww/djeuscan/processing/scan_metadata.py rename to euscanwww/djeuscan/processing/scan/scan_metadata.py diff --git a/euscanwww/djeuscan/processing/scan_portage.py b/euscanwww/djeuscan/processing/scan/scan_portage.py similarity index 100% rename from euscanwww/djeuscan/processing/scan_portage.py rename to euscanwww/djeuscan/processing/scan/scan_portage.py diff --git a/euscanwww/djeuscan/processing/scan_upstream.py b/euscanwww/djeuscan/processing/scan/scan_upstream.py similarity index 68% rename from euscanwww/djeuscan/processing/scan_upstream.py rename to euscanwww/djeuscan/processing/scan/scan_upstream.py index 981af59..d79a967 100644 --- a/euscanwww/djeuscan/processing/scan_upstream.py +++ b/euscanwww/djeuscan/processing/scan/scan_upstream.py @@ -11,8 +11,9 @@ from djeuscan.models import Package, Version, EuscanResult, VersionLog class ScanUpstream(object): - def __init__(self, logger=None): + def __init__(self, logger=None, purge_versions=False): self.logger = logger or FakeLogger() + self.purge_versions = purge_versions def scan(self, package): CONFIG["format"] = "dict" @@ -31,23 +32,20 @@ class ScanUpstream(object): except KeyError: return {} - with commit_on_success(): - obj = self.store_package(cpv) + obj = self.store_package(cpv) - for res in out[package]["result"]: - self.store_version( - obj, - res["version"], - " ".join(res["urls"]), - res["type"], - res["handler"], - res["confidence"], - ) + for res in out[package]["result"]: + self.store_version( + obj, + res["version"], + " ".join(res["urls"]), + res["type"], + res["handler"], + res["confidence"], + ) self.store_result(obj, out_json, scan_time, ebuild) - return out - def store_result(self, package, formatted_log, scan_time, ebuild): # Remove previous logs EuscanResult.objects.filter(package=package).delete() @@ -70,7 +68,8 @@ class ScanUpstream(object): # Set all versions dead, then set found versions alive and # delete old versions - Version.objects.filter(package=obj, packaged=False).update(alive=False) + if self.purge_versions: + Version.objects.filter(package=obj, packaged=False).update(alive=False) return obj @@ -111,33 +110,34 @@ class ScanUpstream(object): package.save() + def purge_old_versions(self): + if not self.purge_versions: + return + + versions = Version.objects.filter(packaged=False, alive=False) + for version in versions: + VersionLog.objects.create( + package=version.package, + action=VersionLog.VERSION_REMOVED, + slot=version.slot, + revision=version.revision, + version=version.version, + overlay=version.overlay + ) + + version.package.n_versions -= 1 + version.package.save() + + self.logger.info('- [u] %s %s' % (version, version.urls)) + + versions.delete() + @commit_on_success -def do_purge_versions(logger=None): - logger = logger or FakeLogger() - - # For each dead versions - for version in Version.objects.filter(packaged=False, alive=False): - VersionLog.objects.create( - package=version.package, - action=VersionLog.VERSION_REMOVED, - slot=version.slot, - revision=version.revision, - version=version.version, - overlay=version.overlay - ) - - version.package.n_versions -= 1 - version.package.save() - - logger.info('- [u] %s %s' % (version, version.urls)) - Version.objects.filter(packaged=False, alive=False).delete() - - def scan_upstream(packages=None, purge_versions=False, logger=None): logger = logger or FakeLogger() - scan_handler = ScanUpstream(logger=logger) + scan_handler = ScanUpstream(logger=logger, purge_versions=purge_versions) logger.info('Scanning upstream...') @@ -148,14 +148,10 @@ def scan_upstream(packages=None, purge_versions=False, for pkg in packages: if isinstance(pkg, Package): - curr = scan_handler.scan('%s/%s' % (pkg.category, pkg.name)) + scan_handler.scan('%s/%s' % (pkg.category, pkg.name)) else: - curr = scan_handler.scan(pkg) - if not curr: - result = False + scan_handler.scan(pkg) - if purge_versions: - do_purge_versions(logger=logger) + scan_handler.purge_old_versions() logger.info('Done.') - return result diff --git a/euscanwww/djeuscan/processing/update_portage_trees.py b/euscanwww/djeuscan/processing/update_portage_trees.py deleted file mode 100644 index cbf0a0b..0000000 --- a/euscanwww/djeuscan/processing/update_portage_trees.py +++ /dev/null @@ -1,66 +0,0 @@ -import subprocess -from StringIO import StringIO - -from django.conf import settings - -from djeuscan.processing import FakeLogger - - -def _launch_command(cmd): - """ - Helper for launching shell commands inside tasks - """ - fp = subprocess.Popen(cmd, stdout=subprocess.PIPE, - stderr=subprocess.PIPE) - output = StringIO(fp.communicate()[0]) - return output.getvalue() - - -def emerge_sync(): - """ - Launches an emerge --sync - """ - cmd = ["emerge", "--sync", "--root", settings.PORTAGE_ROOT, - "--config-root", settings.PORTAGE_CONFIGROOT] - return _launch_command(cmd) - - -def layman_sync(): - """ - Syncs Layman repos - """ - from layman import Layman - l = Layman(config=settings.LAYMAN_CONFIG) - return l.sync(l.get_installed(), output_results=False) - - -def emerge_regen(): - """ - Launches emerge --regen - """ - cmd = [ - "emerge", "--regen", "--jobs", settings.EMERGE_REGEN_JOBS, "--root", - settings.PORTAGE_ROOT, "--config-root", settings.PORTAGE_CONFIGROOT - ] - return _launch_command(cmd) - - -def eix_update(): - """ - Launches eix-update - """ - cmd = ["eix-update"] - return _launch_command(cmd) - - -def update_portage_trees(logger=None): - logger = logger or FakeLogger() - logger.info("Running emerge --sync") - emerge_sync() - logger.info("Running layman --sync") - layman_sync() - logger.info("Running emerge --regen") - emerge_regen() - logger.info("Running eix-update") - eix_update() - logger.info("Done!") diff --git a/euscanwww/euscanwww/settings.py b/euscanwww/euscanwww/settings.py index 9a96165..1cb50f3 100644 --- a/euscanwww/euscanwww/settings.py +++ b/euscanwww/euscanwww/settings.py @@ -222,10 +222,10 @@ ACCOUNT_ACTIVATION_DAYS = 7 EMAIL_BACKEND = 'django.core.mail.backends.console.EmailBackend' # djeuscan tasks -PORTAGE_ROOT = "/usr/portage/" -PORTAGE_CONFIGROOT = PORTAGE_ROOT +PORTAGE_ROOT = "/" +PORTAGE_CONFIGROOT = "/" LAYMAN_CONFIG = "/etc/layman/layman.cfg" -EMERGE_REGEN_JOBS = 4 +EGENCACHE_JOBS = 4 # Celery config import djcelery @@ -255,3 +255,7 @@ except ImportError, ex: "settings.py: error importing local settings file:\n" "\t%s\nDo you have a local_settings.py module?\n" % str(ex) ) + +os.environ['ROOT'] = PORTAGE_ROOT +os.environ['PORTAGE_CONFIGROOT'] = PORTAGE_CONFIGROOT +os.environ['EIX_CACHEFILE'] = os.path.join(PORTAGE_ROOT, 'var/cache/eix') From bdff6b7c6316ee42704ee7a5eeea2c5ae7237dcd Mon Sep 17 00:00:00 2001 From: Corentin Chary Date: Tue, 3 Jul 2012 10:56:14 +0200 Subject: [PATCH 2/3] euscanwww/scan_portage: try using gentoopm and fix slots gentoopm is still slower with portage backend, will try pkgcore backend later. Signed-off-by: Corentin Chary --- .../djeuscan/processing/scan/scan_portage.py | 37 ++++++++++++++++++- 1 file changed, 35 insertions(+), 2 deletions(-) diff --git a/euscanwww/djeuscan/processing/scan/scan_portage.py b/euscanwww/djeuscan/processing/scan/scan_portage.py index cbda779..ac59bc9 100644 --- a/euscanwww/djeuscan/processing/scan/scan_portage.py +++ b/euscanwww/djeuscan/processing/scan/scan_portage.py @@ -56,6 +56,38 @@ class ScanPortage(object): ) self._cache['versions'][key] = version + def scan_gentoopm(self, query, category=None): + import gentoopm + + pm = gentoopm.get_package_manager() + + if category: + packages = pm.stack.filter(key_category=category) + elif query: + packages = pm.stack.filter(query) + else: + packages = pm.stack + + package = {} + package_name = None + + for p in packages: + pkg = p.key.package + + if pkg != package_name: + if package_name: + yield package + package_name = pkg + package['package'] = p.key.package + package['category'] = p.key.category + package['homepage'] = ' '.join(p.homepages) + package['description'] = p.description + package['versions'] = [] + package['versions'].append((p._cpv, p.slot, p.repository or 'gentoo')) + + if package_name: + yield package + def scan_eix_xml(self, query, category=None): cmd = ['eix', '--xml'] if query: @@ -93,7 +125,7 @@ class ScanPortage(object): # append version data to versions cpv = "%s/%s-%s" % \ (package["category"], package["package"], elem.attrib["id"]) - slot = elem.attrib.get("slot", "") + slot = elem.attrib.get("slot", "0") overlay = elem.attrib.get("repository", "gentoo") package["versions"].append((cpv, slot, overlay)) @@ -119,7 +151,7 @@ class ScanPortage(object): self.logger.info('Killing existing versions...') qs = Version.objects.filter(packaged=True) if category: - qs.filter(package__category=category) + qs = qs.filter(package__category=category) qs.update(alive=False) self.logger.info('done') else: @@ -143,6 +175,7 @@ class ScanPortage(object): packages_alive = set() for data in self.scan_eix_xml(query, category): + #for data in self.scan_gentoopm(query, category): cat, pkg = data['category'], data['package'] package = self.store_package(cat, pkg, data['homepage'], data['description']) packages_alive.add("%s/%s" % (cat, pkg)) From 2f0080e5445c8c23723c9ede4552ce5d1e0b3125 Mon Sep 17 00:00:00 2001 From: Corentin Chary Date: Tue, 3 Jul 2012 10:57:07 +0200 Subject: [PATCH 3/3] git: update .gitignore Signed-off-by: Corentin Chary --- .gitignore | 4 +--- euscanwww/.gitignore | 1 + 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/.gitignore b/.gitignore index 8f5f161..30ec2af 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,4 @@ *~ *.py[co] -euscanwww/rrd/*.rrd -euscanwww/media/charts/*.png *.egg-info -local_settings.py + diff --git a/euscanwww/.gitignore b/euscanwww/.gitignore index 484aad9..83e67ee 100644 --- a/euscanwww/.gitignore +++ b/euscanwww/.gitignore @@ -3,3 +3,4 @@ var/charts/*.png var/rrd/*.rrd var/db/euscan.db var/db/euscan.db-journal +local_settings.py \ No newline at end of file