From 9bfd728d4fcfb0a2ed6d92ef63376d5db045e4a0 Mon Sep 17 00:00:00 2001 From: volpino Date: Fri, 22 Jun 2012 09:32:39 +0200 Subject: [PATCH] euscanwww: Moved business logic to djeuscan.processing Moved stuff from management commands to djeuscan.processing Signed-off-by: volpino --- .../management/commands/regen_rrds.py | 22 +- .../management/commands/scan_metadata.py | 153 +-------- .../management/commands/scan_portage.py | 305 +---------------- .../management/commands/scan_upstream.py | 194 +---------- .../management/commands/update_counters.py | 186 +--------- euscanwww/djeuscan/processing/__init__.py | 3 + euscanwww/djeuscan/processing/regen_rrds.py | 31 ++ .../djeuscan/processing/scan_metadata.py | 148 ++++++++ euscanwww/djeuscan/processing/scan_portage.py | 320 ++++++++++++++++++ .../djeuscan/processing/scan_upstream.py | 139 ++++++++ .../djeuscan/processing/update_counters.py | 185 ++++++++++ .../processing/update_portage_trees.py | 66 ++++ euscanwww/djeuscan/tasks.py | 298 +++++++++------- 13 files changed, 1110 insertions(+), 940 deletions(-) create mode 100644 euscanwww/djeuscan/processing/__init__.py create mode 100644 euscanwww/djeuscan/processing/regen_rrds.py create mode 100644 euscanwww/djeuscan/processing/scan_metadata.py create mode 100644 euscanwww/djeuscan/processing/scan_portage.py create mode 100644 euscanwww/djeuscan/processing/scan_upstream.py create mode 100644 euscanwww/djeuscan/processing/update_counters.py create mode 100644 euscanwww/djeuscan/processing/update_portage_trees.py diff --git a/euscanwww/djeuscan/management/commands/regen_rrds.py b/euscanwww/djeuscan/management/commands/regen_rrds.py index 1c041fa..f6b2ba0 100644 --- a/euscanwww/djeuscan/management/commands/regen_rrds.py +++ b/euscanwww/djeuscan/management/commands/regen_rrds.py @@ -1,25 +1,5 @@ from django.core.management.base import BaseCommand -from djeuscan.models import HerdLog, MaintainerLog, CategoryLog, WorldLog -from djeuscan import charts - - -def regen_rrds(): - """ - Regenerates the rrd database - """ - for wlog in WorldLog.objects.all(): - charts.rrd_update('world', wlog.datetime, wlog) - - for clog in CategoryLog.objects.all(): - charts.rrd_update('category-%s' % clog.category, - clog.datetime, clog) - - for hlog in HerdLog.objects.all(): - charts.rrd_update('herd-%d' % hlog.herd.id, hlog.datetime, hlog) - - for mlog in MaintainerLog.objects.all(): - charts.rrd_update('maintainer-%d' % mlog.maintainer.id, - mlog.datetime, mlog) +from djeuscan.processing.regen_rrds import regen_rrds class Command(BaseCommand): diff --git a/euscanwww/djeuscan/management/commands/scan_metadata.py b/euscanwww/djeuscan/management/commands/scan_metadata.py index 1e2cfdc..5a19315 100644 --- a/euscanwww/djeuscan/management/commands/scan_metadata.py +++ b/euscanwww/djeuscan/management/commands/scan_metadata.py @@ -1,142 +1,9 @@ import sys - from optparse import make_option -from gentoolkit.query import Query -from gentoolkit.errors import GentoolkitFatalError - -from django.db.transaction import commit_on_success from django.core.management.base import BaseCommand -from django.core.management.color import color_style -from django.core.exceptions import ValidationError -from djeuscan.models import Package, Herd, Maintainer - - -class ScanMetadata(object): - def __init__(self, quiet=False): - self.quiet = quiet - self.style = color_style() - - @commit_on_success - def scan(self, query=None, obj=None): - matches = Query(query).find( - include_masked=True, - in_installed=False, - ) - - if not matches: - sys.stderr.write( - self.style.ERROR("Unknown package '%s'\n" % query) - ) - return - - matches = sorted(matches) - pkg = matches.pop() - if '9999' in pkg.version and len(matches): - pkg = matches.pop() - - if not obj: - obj, created = Package.objects.get_or_create( - category=pkg.category, name=pkg.name - ) - else: - created = False - - try: - obj.homepage = pkg.environment("HOMEPAGE") - obj.description = pkg.environment("DESCRIPTION") - except GentoolkitFatalError, err: - sys.stderr.write( - self.style.ERROR( - "Gentoolkit fatal error: '%s'\n" % str(err) - ) - ) - - if created and not self.quiet: - sys.stdout.write('+ [p] %s/%s\n' % (pkg.category, pkg.name)) - - if pkg.metadata: - herds = dict( - [(herd[0], herd) for herd in pkg.metadata.herds(True)] - ) - maintainers = dict( - [(m.email, m) for m in pkg.metadata.maintainers()] - ) - - existing_herds = [h.herd for h in obj.herds.all()] - new_herds = set(herds.keys()).difference(existing_herds) - old_herds = set(existing_herds).difference(herds.keys()) - - existing_maintainers = [m.email for m in obj.maintainers.all()] - new_maintainers = set( - maintainers.keys()).difference(existing_maintainers - ) - old_maintainers = set( - existing_maintainers).difference(maintainers.keys() - ) - - for herd in obj.herds.all(): - if herd.herd in old_herds: - obj.herds.remove(herd) - - for herd in new_herds: - herd = self.store_herd(*herds[herd]) - obj.herds.add(herd) - - for maintainer in obj.maintainers.all(): - if maintainer.email in old_maintainers: - obj.maintainers.remove(maintainer) - - for maintainer in new_maintainers: - maintainer = maintainers[maintainer] - try: - maintainer = self.store_maintainer( - maintainer.name, maintainer.email - ) - obj.maintainers.add(maintainer) - except ValidationError: - sys.stderr.write( - self.style.ERROR("Bad maintainer: '%s' '%s'\n" % (maintainer.name, maintainer.email)) - ) - - obj.save() - - def store_herd(self, name, email): - if not name: - name = '{nil}' - name = name.strip("\r").strip("\n").strip("\t").strip() - - herd, created = Herd.objects.get_or_create( - herd=name, - defaults={"email": email} - ) - - if created and not self.quiet: - sys.stdout.write('+ [h] %s <%s>\n' % (name, email)) - - herd.email = email - herd.save() - - return herd - - def store_maintainer(self, name, email): - if not name: - name = email - if not name: - name = '{nil}' - - maintainer, created = Maintainer.objects.get_or_create( - email=email, - defaults={"name": name} - ) - - if created: - if not self.quiet: - sys.stdout.write( - '+ [m] %s <%s>\n' % (name.encode('utf-8'), email) - ) - return maintainer +from djeuscan.processing.scan_metadata import scan_metadata class Command(BaseCommand): @@ -158,16 +25,12 @@ class Command(BaseCommand): help = 'Scans metadata and fills database' def handle(self, *args, **options): - self.options = options - - scan_metadata = ScanMetadata(quiet=options["quiet"]) - if options['all']: - for pkg in Package.objects.all(): - scan_metadata.scan('%s/%s' % (pkg.category, pkg.name), pkg) - elif len(args) > 0: - for package in args: - scan_metadata.scan(package) + packages = None + + elif len(args): + packages = [pkg for pkg in args] else: - for package in sys.stdin.readlines(): - scan_metadata.scan(package[:-1]) + packages = [pkg[:-1] for pkg in sys.stdin.readlines()] + + scan_metadata(packages=packages, quiet=options["quiet"]) diff --git a/euscanwww/djeuscan/management/commands/scan_portage.py b/euscanwww/djeuscan/management/commands/scan_portage.py index 039ed78..2bfede8 100644 --- a/euscanwww/djeuscan/management/commands/scan_portage.py +++ b/euscanwww/djeuscan/management/commands/scan_portage.py @@ -1,272 +1,8 @@ -import subprocess -import portage import sys -import os -import re from optparse import make_option -from collections import defaultdict -from django.db.transaction import commit_on_success from django.core.management.base import BaseCommand -from django.core.management.color import color_style - -from djeuscan.models import Package, Version, VersionLog - - -class ScanPortage(object): - def __init__(self, stdout=None, options=None): - self.stdout = sys.stdout if stdout is None else stdout - self.options = defaultdict(lambda: None) \ - if options is None else options - - self.style = color_style() - self._cache = {'packages': {}, 'versions': {}} - self._overlays = None - - def cache_hash_package(self, category, name): - return '%s/%s' % (category, name) - - def cache_store_package(self, package): - key = self.cache_hash_package(package.category, package.name) - self._cache['packages'][key] = package - - def cache_get_package(self, category, name): - return self._cache['packages'].get( - self.cache_hash_package(category, name) - ) - - def cache_hash_version(self, category, name, version, revision, slot, - overlay): - key = '%s/%s-%s-r%s %s %s' % (category, name, - version, revision, - slot, overlay) - return key - - def cache_get_version(self, category, name, version, revision, slot, - overlay): - key = self.cache_hash_version(category, name, version, revision, slot, - overlay) - return self._cache['versions'].get(key) - - def cache_store_version(self, version): - key = self.cache_hash_version( - version.package.category, version.package.name, version.version, - version.revision, version.slot, version.overlay - ) - self._cache['versions'][key] = version - - def overlays(self): - if self._overlays: - return self._overlays - - env = os.environ - env['OVERLAYS_LIST'] = 'all' - env['PRINT_COUNT_ALWAYS'] = 'never' - - cmd = ['eix', '-!'] - - output = subprocess.Popen(cmd, stdout=subprocess.PIPE, env=env).\ - communicate()[0] - output = output.strip().strip('\n').split('\n') - - overlay_re = re.compile(r'^\[(?P\d+)] "(?P.*?)"') - - self._overlays = {} - - for line in output: - match = overlay_re.match(line) - if not match: - continue - self._overlays[match.group('key')] = match.group('name') - - return self._overlays - - @commit_on_success - def scan(self, query=None): - env = os.environ - env['MY'] = "/-: []\n" - - cmd = ['eix', '--format', '', '--pure-packages', - '-x'] - if query: - cmd.extend(['--exact', query]) - - if self.options['all']: - if not self.options['quiet']: - self.stdout.write('Killing existing versions...') - self.stdout.flush() - Version.objects.filter(packaged=True).update(alive=False) - if not self.options['quiet']: - self.stdout.write('done\n') - - output = subprocess.Popen(cmd, stdout=subprocess.PIPE, env=env).\ - communicate()[0] - output = output.strip().strip('\n') - - if len(output) == 0: - if not query: - return - if self.options['purge-packages']: - if not self.options['quiet']: - sys.stdout.write('- [p] %s\n' % (query)) - if '/' in query: - cat, pkg = portage.catsplit(query) - Package.objects.filter(category=cat, name=pkg).delete() - else: - Package.objects.filter(name=query).delete() - else: - sys.stderr.write( - self.style.ERROR( - "Unknown package '%s'\n" % query - ) - ) - return - - output = output.split('\n') - packages = {} - - line_re = re.compile( - r'^(?P.*?):(?P.*?) \[(?P.*?)\]$' - ) - - package = None - - for line in output: - match = line_re.match(line) - - if not match: - continue - - cpv = match.group('cpv') - slot = match.group('slot') - overlay = match.group('overlay') - - cat, pkg, ver, rev = portage.catpkgsplit(cpv) - - packages['%s/%s' % (cat, pkg)] = True - - if not package or not \ - (cat == package.category and pkg == package.name): - package = self.store_package(cat, pkg) - - self.store_version(package, cpv, slot, overlay) - - if self.options['purge-packages'] and not query: - for package in Package.objects.all(): - cp = "%s/%s" % (package.category, package.name) - if cp not in packages: - if not self.options['quiet']: - sys.stdout.write('- [p] %s\n' % (package)) - package.delete() - - def store_package(self, cat, pkg): - created = False - obj = self.cache_get_package(cat, pkg) - - if not obj: - obj, created = Package.objects.get_or_create( - category=cat, - name=pkg - ) - self.cache_store_package(obj) - - if created: - if not self.options['quiet']: - sys.stdout.write('+ [p] %s/%s\n' % (cat, pkg)) - - # Set all versions dead, then set found versions alive and - # delete old versions - if not self.options['all']: - Version.objects.filter( - package=obj, - packaged=True - ).update(alive=False) - - return obj - - def store_version(self, package, cpv, slot, overlay): - cat, pkg, ver, rev = portage.catpkgsplit(cpv) - - overlays = self.overlays() - - if overlay in overlays: - overlay = overlays[overlay] - else: - overlay = 'gentoo' - - created = False - obj = self.cache_get_version( - package.category, package.name, ver, rev, slot, overlay - ) - if not obj: - obj, created = Version.objects.get_or_create( - package=package, slot=slot, - revision=rev, version=ver, - overlay=overlay, - defaults={"alive": True, "packaged": True} - ) - if not created: # Created objects have defaults values - obj.alive = True - obj.packaged = True - obj.save() - - if created: - self.cache_store_version(obj) - - # nothing to do (note: it can't be an upstream version because - # overlay can't be empty here) - if not created: - return - - if not self.options['quiet']: - sys.stdout.write('+ [v] %s \n' % (obj)) - - if overlay == 'gentoo': - package.n_packaged += 1 - else: - package.n_overlay += 1 - package.n_versions += 1 - package.save() - - if self.options['no-log']: - return - - VersionLog.objects.create( - package=obj.package, - action=VersionLog.VERSION_ADDED, - slot=obj.slot, - revision=obj.revision, - version=obj.version, - overlay=obj.overlay - ) - -@commit_on_success -def purge_versions(quiet=False, nolog=False): - # For each dead versions - for version in Version.objects.filter(packaged=True, alive=False): - if version.overlay == 'gentoo': - version.package.n_packaged -= 1 - else: - version.package.n_overlay -= 1 - version.package.n_versions -= 1 - version.package.save() - - if not quiet: - sys.stdout.write('- [v] %s\n' % (version)) - - if nolog: - continue - - VersionLog.objects.create( - package=version.package, - action=VersionLog.VERSION_REMOVED, - slot=version.slot, - revision=version.revision, - version=version.version, - overlay=version.overlay - ) - - Version.objects.filter(packaged=True, alive=False).delete() +from djeuscan.processing.scan_portage import scan_portage class Command(BaseCommand): @@ -309,33 +45,20 @@ class Command(BaseCommand): help = 'Scans portage tree and fills database' def handle(self, *args, **options): - scan_portage = ScanPortage(stdout=self.stdout, options=options) - - if not options['quiet']: - self.stdout.write('Scanning portage tree...\n') - - if options['prefetch']: - if not options['quiet']: - self.stdout.write('Prefetching objects...') - self.stdout.flush() - for package in Package.objects.all(): - scan_portage.cache_store_package(package) - for version in Version.objects.select_related('package').all(): - scan_portage.cache_store_version(version) - if not options['quiet']: - self.stdout.write('done\n') - if options['all']: - scan_portage.scan() + packages = None + elif len(args): - for package in args: - scan_portage.scan(package) + packages = [pkg for pkg in args] else: - for package in sys.stdin.readlines(): - scan_portage.scan(package[:-1]) + packages = [pkg[:-1] for pkg in sys.stdin.readlines()] - if options['purge-versions']: - purge_versions(options["quiet"], options["no-log"]) - - if not options['quiet']: - self.stdout.write('Done.\n') + scan_portage( + packages=packages, + no_log=options["no-log"], + purge_packages=options["purge-packages"], + purge_versions=options["purge-versions"], + prefetch=options["prefetch"], + quiet=options["quiet"], + stdout=self.stdout, + ) diff --git a/euscanwww/djeuscan/management/commands/scan_upstream.py b/euscanwww/djeuscan/management/commands/scan_upstream.py index f850e52..043fb83 100644 --- a/euscanwww/djeuscan/management/commands/scan_upstream.py +++ b/euscanwww/djeuscan/management/commands/scan_upstream.py @@ -1,162 +1,9 @@ -import portage import sys -import re - from optparse import make_option -from django.utils import timezone -from django.db.transaction import commit_on_success from django.core.management.base import BaseCommand -from euscan import CONFIG, output -from euscan.scan import scan_upstream - -from djeuscan.models import Package, Version, EuscanResult, VersionLog - - -class ScanUpstream(object): - def __init__(self, quiet=False): - self.quiet = quiet - - def scan(self, package): - CONFIG["format"] = "dict" - output.set_query(package) - - scan_upstream(package) - - out = output.get_formatted_output() - out_json = output.get_formatted_output("json") - - try: - cpv = out[package]["metadata"]["cpv"] - except KeyError: - return {} - - obj = self.store_package(cpv) - - for res in out[package]["result"]: - self.store_version(obj, res["version"], " ".join(res["urls"])) - - self.store_result(obj, out_json) - - return out - - def store_result(self, package, log): - # Remove previous logs - EuscanResult.objects.filter(package=package).delete() - - obj = EuscanResult() - obj.package = package - obj.result = log - obj.datetime = timezone.now() - obj.save() - - def store_package(self, cpv): - cat, pkg, ver, rev = portage.catpkgsplit(cpv) - - obj, created = Package.objects.get_or_create(category=cat, name=pkg) - - if created and not self.quiet: - sys.stdout.write('+ [p] %s/%s\n' % (cat, pkg)) - - # Set all versions dead, then set found versions alive and - # delete old versions - Version.objects.filter(package=obj, packaged=False).update(alive=False) - - return obj - - def store_version(self, package, ver, url): - obj, created = Version.objects.get_or_create( - package=package, slot='', revision='r0', version=ver, overlay='', - defaults={"alive": True, "urls": url, "packaged": False} - ) - if not created: - obj.alive = True - obj.urls = url - obj.packaged = False - obj.save() - - # If it's not a new version, just update the object and continue - if not created: - return - - if not self.quiet: - sys.stdout.write('+ [u] %s %s\n' % (obj, url)) - - VersionLog.objects.create( - package=package, - action=VersionLog.VERSION_ADDED, - slot='', - revision='r0', - version=ver, - overlay='' - ) - - package.n_versions += 1 - package.save() - - @commit_on_success - def parse_output(self, output): - from portage.versions import _cp - - if type(_cp) == dict: - _cp = _cp["dots_allowed_in_PN"] - - package_re = re.compile( - r'^ \* (?P' + _cp + ') \[(?P.*?)\]$' - ) - version_re = re.compile( - r'^Upstream Version: (?P.*?) (?P.*?)$' - ) - - package = None - log = "" - - while True: - line = output.readline() - if line == '': - break - match = package_re.match(line) - if match: - if package: - self.store_result(package, log) - - cpv = match.group('cpv') - package = self.store_package(cpv) - log = line - continue - - log += line - - match = version_re.match(line) - if match: - ver = match.group('ver') - url = match.group('url') - self.store_version(package, ver, url) - - if package: - self.store_result(package, log) - - -@commit_on_success -def purge_versions(quiet=False): - # For each dead versions - for version in Version.objects.filter(packaged=False, alive=False): - VersionLog.objects.create( - package=version.package, - action=VersionLog.VERSION_REMOVED, - slot=version.slot, - revision=version.revision, - version=version.version, - overlay=version.overlay - ) - - version.package.n_versions -= 1 - version.package.save() - - if not quiet: - sys.stdout.write('- [u] %s %s\n' % (version, version.urls)) - Version.objects.filter(packaged=False, alive=False).delete() +from djeuscan.processing.scan_upstream import scan_upstream class Command(BaseCommand): @@ -168,11 +15,6 @@ class Command(BaseCommand): dest='all', default=False, help='Scan all packages'), - make_option('--feed', - action='store_true', - dest='feed', - default=False, - help='Read euscan output from stdin'), make_option('--purge-versions', action='store_true', dest='purge-versions', @@ -188,29 +30,17 @@ class Command(BaseCommand): help = 'Scans metadata and fills database' def handle(self, *args, **options): - scan_upstream = ScanUpstream(options["quiet"]) - - if options['feed']: - scan_upstream.parse_output(sys.stdin) - if options['purge-versions']: - purge_versions(options["quiet"]) - return - - if not options['quiet']: - self.stdout.write('Scanning upstream...\n') - if options['all']: - for pkg in Package.objects.all(): - scan_upstream.scan('%s/%s' % (pkg.category, pkg.name)) - elif args: - for arg in args: - scan_upstream.scan(arg) + packages = None + + elif len(args): + packages = [pkg for pkg in args] else: - for package in sys.stdin.readlines(): - scan_upstream.scan(package[:-1]) + packages = [pkg[:-1] for pkg in sys.stdin.readlines()] - if options['purge-versions']: - purge_versions(options["quiet"]) - - if not options['quiet']: - self.stdout.write('Done.\n') + scan_upstream( + packages=packages, + purge_versions=options["purge-versions"], + quiet=options["quiet"], + stdout=self.stdout, + ) diff --git a/euscanwww/djeuscan/management/commands/update_counters.py b/euscanwww/djeuscan/management/commands/update_counters.py index 0b67a39..7dc407e 100644 --- a/euscanwww/djeuscan/management/commands/update_counters.py +++ b/euscanwww/djeuscan/management/commands/update_counters.py @@ -1,185 +1,8 @@ -import sys from optparse import make_option -from django.db.transaction import commit_on_success from django.core.management.base import BaseCommand -from django.utils import timezone -from djeuscan.models import Package, Herd, Maintainer, Version -from djeuscan.models import HerdLog, MaintainerLog, CategoryLog, WorldLog -from djeuscan import charts - -from distutils.version import StrictVersion, LooseVersion - - -def compare_versions(version1, version2): - try: - return cmp(StrictVersion(version1), StrictVersion(version2)) - # in case of abnormal version number, fall back to LooseVersion - except ValueError: - return cmp(LooseVersion(version1), LooseVersion(version2)) - - -def add_safe(storage, key): - if key not in storage: - storage[key] = 1 - else: - storage[key] += 1 - - -def add_last_ver(storage, version): - key = version['package_id'] - if key not in storage: - storage[key] = version - return - if version['version'].startswith('9999'): - return - if compare_versions(storage[key]['version'], - version['version']) < 0: - storage[key] = version - - -@commit_on_success -def update_counters(stdout=None, **options): - if stdout is None: - stdout = sys.stdout - - now = timezone.now() - - categories = {} - herds = {} - maintainers = {} - - wlog = None - - if not options['nolog']: - wlog = WorldLog() - wlog.datetime = now - - for cat in Package.objects.values('category').distinct(): - clog = CategoryLog() - clog.datetime = now - clog.category = cat['category'] - categories[clog.category] = clog - - for herd in Herd.objects.all(): - hlog = HerdLog() - hlog.datetime = now - hlog.herd = herd - herds[herd.id] = hlog - - for maintainer in Maintainer.objects.all(): - mlog = MaintainerLog() - mlog.datetime = now - mlog.maintainer = maintainer - maintainers[maintainer.id] = mlog - - package_queryset = Package.objects.all() - - n_versions = {} - n_packaged = {} - n_overlay = {} - - last_versions_gentoo = {} - last_versions_overlay = {} - last_versions_upstream = {} - - if not options['fast']: - attrs = ['id', 'version', 'overlay', 'packaged', 'package_id'] - for version in Version.objects.all().values(*attrs): - overlay, packaged = version['overlay'], version['packaged'] - package_id = version['package_id'] - - add_safe(n_versions, package_id) - - if not packaged: - add_last_ver(last_versions_upstream, version) - continue - if overlay == 'gentoo': - add_safe(n_packaged, package_id) - add_last_ver(last_versions_gentoo, version) - else: - add_safe(n_overlay, package_id) - add_last_ver(last_versions_overlay, version) - - for package in package_queryset.select_related('herds', 'maintainers'): - if not options['fast']: - package.n_versions = n_versions.get(package.id, 0) - package.n_packaged = n_packaged.get(package.id, 0) - package.n_overlay = n_overlay.get(package.id, 0) - - default = {'id': None} - package.last_version_gentoo_id = last_versions_gentoo.get( - package.id, default - )['id'] - package.last_version_overlay_id = last_versions_overlay.get( - package.id, default - )['id'] - package.last_version_upstream_id = last_versions_upstream.get( - package.id, default - )['id'] - - package.save() - - n_packages_gentoo = int(package.n_packaged == package.n_versions) - n_packages_overlay = int(package.n_overlay and package.n_packaged \ - + package.n_overlay == package.n_versions) - n_packages_outdated = int(package.n_packaged + package.n_overlay \ - < package.n_versions) - - def update_row(storage, key): - storage[key].n_packages_gentoo += n_packages_gentoo - storage[key].n_packages_overlay += n_packages_overlay - storage[key].n_packages_outdated += n_packages_outdated - - storage[key].n_versions_gentoo += package.n_packaged - storage[key].n_versions_overlay += package.n_overlay - storage[key].n_versions_upstream += package.n_versions - \ - package.n_packaged - \ - package.n_overlay - - def update_log(storage, qs): - for row in qs: - update_row(storage, row['id']) - - if not options['nolog']: - update_log(herds, package.herds.all().values('id')) - update_log(maintainers, package.maintainers.all().values('id')) - update_row(categories, package.category) - - wlog.n_packages_gentoo += n_packages_gentoo - wlog.n_packages_overlay += n_packages_overlay - wlog.n_packages_outdated += n_packages_outdated - - wlog.n_versions_gentoo += package.n_packaged - wlog.n_versions_overlay += package.n_overlay - wlog.n_versions_upstream += package.n_versions - \ - package.n_packaged - \ - package.n_overlay - - if options['nolog']: - return - - for clog in categories.values(): - if not options['quiet']: - stdout.write('+ [cl] %s\n' % clog) - charts.rrd_update('category-%s' % clog.category, now, clog) - clog.save() - - for hlog in herds.values(): - if not options['quiet']: - stdout.write('+ [hl] %s\n' % hlog) - charts.rrd_update('herd-%d' % hlog.herd.id, now, hlog) - hlog.save() - - for mlog in maintainers.values(): - if not options['quiet']: - stdout.write('+ [ml] %s\n' % mlog) - charts.rrd_update('maintainer-%d' % mlog.maintainer.id, now, mlog) - mlog.save() - - charts.rrd_update('world', now, wlog) - wlog.save() +from djeuscan.processing.update_counters import update_counters class Command(BaseCommand): @@ -205,4 +28,9 @@ class Command(BaseCommand): ) def handle(self, *args, **options): - update_counters(stdout=self.stdout, **options) + update_counters( + stdout=self.stdout, + fast=options["fast"], + quiet=options["quiet"], + nolog=options["nolog"], + ) diff --git a/euscanwww/djeuscan/processing/__init__.py b/euscanwww/djeuscan/processing/__init__.py new file mode 100644 index 0000000..c4ea508 --- /dev/null +++ b/euscanwww/djeuscan/processing/__init__.py @@ -0,0 +1,3 @@ +class FakeLogger(object): + def __getattr__(self, key): + return lambda *x, **y: None diff --git a/euscanwww/djeuscan/processing/regen_rrds.py b/euscanwww/djeuscan/processing/regen_rrds.py new file mode 100644 index 0000000..f578aa7 --- /dev/null +++ b/euscanwww/djeuscan/processing/regen_rrds.py @@ -0,0 +1,31 @@ +from djeuscan.models import HerdLog, MaintainerLog, CategoryLog, WorldLog +from djeuscan import charts + +from djeuscan.processing import FakeLogger + + +def regen_rrds(logger=None): + """ + Regenerates the rrd database + """ + + if logger is None: + logger = FakeLogger() + + logger.info("Regenering RRDs for world") + for wlog in WorldLog.objects.all(): + charts.rrd_update('world', wlog.datetime, wlog) + + logger.info("Regenering RRDs for categories") + for clog in CategoryLog.objects.all(): + charts.rrd_update('category-%s' % clog.category, + clog.datetime, clog) + + logger.info("Regenering RRDs for herds") + for hlog in HerdLog.objects.all(): + charts.rrd_update('herd-%d' % hlog.herd.id, hlog.datetime, hlog) + + logger.info("Regenering RRDs for maintainers") + for mlog in MaintainerLog.objects.all(): + charts.rrd_update('maintainer-%d' % mlog.maintainer.id, + mlog.datetime, mlog) diff --git a/euscanwww/djeuscan/processing/scan_metadata.py b/euscanwww/djeuscan/processing/scan_metadata.py new file mode 100644 index 0000000..15ab108 --- /dev/null +++ b/euscanwww/djeuscan/processing/scan_metadata.py @@ -0,0 +1,148 @@ +import sys +from gentoolkit.query import Query +from gentoolkit.errors import GentoolkitFatalError + +from django.db.transaction import commit_on_success +from django.core.management.color import color_style +from django.core.exceptions import ValidationError + +from djeuscan.models import Package, Herd, Maintainer + + +class ScanMetadata(object): + def __init__(self, quiet=False): + self.quiet = quiet + self.style = color_style() + + @commit_on_success + def scan(self, query=None, obj=None): + matches = Query(query).find( + include_masked=True, + in_installed=False, + ) + + if not matches: + sys.stderr.write( + self.style.ERROR("Unknown package '%s'\n" % query) + ) + return + + matches = sorted(matches) + pkg = matches.pop() + if '9999' in pkg.version and len(matches): + pkg = matches.pop() + + if not obj: + obj, created = Package.objects.get_or_create( + category=pkg.category, name=pkg.name + ) + else: + created = False + + try: + obj.homepage = pkg.environment("HOMEPAGE") + obj.description = pkg.environment("DESCRIPTION") + except GentoolkitFatalError, err: + sys.stderr.write( + self.style.ERROR( + "Gentoolkit fatal error: '%s'\n" % str(err) + ) + ) + + if created and not self.quiet: + sys.stdout.write('+ [p] %s/%s\n' % (pkg.category, pkg.name)) + + if pkg.metadata: + herds = dict( + [(herd[0], herd) for herd in pkg.metadata.herds(True)] + ) + maintainers = dict( + [(m.email, m) for m in pkg.metadata.maintainers()] + ) + + existing_herds = [h.herd for h in obj.herds.all()] + new_herds = set(herds.keys()).difference(existing_herds) + old_herds = set(existing_herds).difference(herds.keys()) + + existing_maintainers = [m.email for m in obj.maintainers.all()] + new_maintainers = set( + maintainers.keys()).difference(existing_maintainers + ) + old_maintainers = set( + existing_maintainers).difference(maintainers.keys() + ) + + for herd in obj.herds.all(): + if herd.herd in old_herds: + obj.herds.remove(herd) + + for herd in new_herds: + herd = self.store_herd(*herds[herd]) + obj.herds.add(herd) + + for maintainer in obj.maintainers.all(): + if maintainer.email in old_maintainers: + obj.maintainers.remove(maintainer) + + for maintainer in new_maintainers: + maintainer = maintainers[maintainer] + try: + maintainer = self.store_maintainer( + maintainer.name, maintainer.email + ) + obj.maintainers.add(maintainer) + except ValidationError: + sys.stderr.write( + self.style.ERROR("Bad maintainer: '%s' '%s'\n" % \ + (maintainer.name, maintainer.email)) + ) + + obj.save() + + def store_herd(self, name, email): + if not name: + name = '{nil}' + name = name.strip("\r").strip("\n").strip("\t").strip() + + herd, created = Herd.objects.get_or_create( + herd=name, + defaults={"email": email} + ) + + if created and not self.quiet: + sys.stdout.write('+ [h] %s <%s>\n' % (name, email)) + + herd.email = email + herd.save() + + return herd + + def store_maintainer(self, name, email): + if not name: + name = email + if not name: + name = '{nil}' + + maintainer, created = Maintainer.objects.get_or_create( + email=email, + defaults={"name": name} + ) + + if created: + if not self.quiet: + sys.stdout.write( + '+ [m] %s <%s>\n' % (name.encode('utf-8'), email) + ) + return maintainer + + +def scan_metadata(packages=None, quiet=False, logger=None): + scan_handler = ScanMetadata(quiet=quiet) + if packages is None: + packages = Package.objects.all() + + for pkg in packages: + if isinstance(pkg, Package): + scan_handler.scan('%s/%s' % (pkg.category, pkg.name), pkg) + else: + scan_handler.scan(pkg) diff --git a/euscanwww/djeuscan/processing/scan_portage.py b/euscanwww/djeuscan/processing/scan_portage.py new file mode 100644 index 0000000..43e91f4 --- /dev/null +++ b/euscanwww/djeuscan/processing/scan_portage.py @@ -0,0 +1,320 @@ +import subprocess +import portage +import sys +import os +import re + +from django.db.transaction import commit_on_success +from django.core.management.color import color_style + +from djeuscan.models import Package, Version, VersionLog + + +class ScanPortage(object): + def __init__(self, stdout=None, no_log=False, purge_packages=False, + purge_versions=False, kill_versions=False, quiet=False): + self.stdout = sys.stdout if stdout is None else stdout + self.no_log = no_log + self.purge_packages = purge_packages + self.purge_versions = purge_versions + self.kill_versions = kill_versions + self.quiet = quiet + + self.style = color_style() + self._cache = {'packages': {}, 'versions': {}} + self._overlays = None + + def cache_hash_package(self, category, name): + return '%s/%s' % (category, name) + + def cache_store_package(self, package): + key = self.cache_hash_package(package.category, package.name) + self._cache['packages'][key] = package + + def cache_get_package(self, category, name): + return self._cache['packages'].get( + self.cache_hash_package(category, name) + ) + + def cache_hash_version(self, category, name, version, revision, slot, + overlay): + key = '%s/%s-%s-r%s %s %s' % (category, name, + version, revision, + slot, overlay) + return key + + def cache_get_version(self, category, name, version, revision, slot, + overlay): + key = self.cache_hash_version(category, name, version, revision, slot, + overlay) + return self._cache['versions'].get(key) + + def cache_store_version(self, version): + key = self.cache_hash_version( + version.package.category, version.package.name, version.version, + version.revision, version.slot, version.overlay + ) + self._cache['versions'][key] = version + + def overlays(self): + if self._overlays: + return self._overlays + + env = os.environ + env['OVERLAYS_LIST'] = 'all' + env['PRINT_COUNT_ALWAYS'] = 'never' + + cmd = ['eix', '-!'] + + output = subprocess.Popen(cmd, stdout=subprocess.PIPE, env=env).\ + communicate()[0] + output = output.strip().strip('\n').split('\n') + + overlay_re = re.compile(r'^\[(?P\d+)] "(?P.*?)"') + + self._overlays = {} + + for line in output: + match = overlay_re.match(line) + if not match: + continue + self._overlays[match.group('key')] = match.group('name') + + return self._overlays + + @commit_on_success + def scan(self, query=None): + env = os.environ + env['MY'] = "/-: []\n" + + cmd = ['eix', '--format', '', '--pure-packages', + '-x'] + if query: + cmd.extend(['--exact', query]) + + if self.kill_versions: + if not self.quiet: + self.stdout.write('Killing existing versions...') + self.stdout.flush() + Version.objects.filter(packaged=True).update(alive=False) + if not self.quiet: + self.stdout.write('done\n') + + output = subprocess.Popen(cmd, stdout=subprocess.PIPE, env=env).\ + communicate()[0] + output = output.strip().strip('\n') + + if len(output) == 0: + if not query: + return + if self.purge_packages: + if not self.quiet: + sys.stdout.write('- [p] %s\n' % (query)) + if '/' in query: + cat, pkg = portage.catsplit(query) + Package.objects.filter(category=cat, name=pkg).delete() + else: + Package.objects.filter(name=query).delete() + else: + sys.stderr.write( + self.style.ERROR( + "Unknown package '%s'\n" % query + ) + ) + return + + output = output.split('\n') + packages = {} + + line_re = re.compile( + r'^(?P.*?):(?P.*?) \[(?P.*?)\]$' + ) + + package = None + + for line in output: + match = line_re.match(line) + + if not match: + continue + + cpv = match.group('cpv') + slot = match.group('slot') + overlay = match.group('overlay') + + cat, pkg, ver, rev = portage.catpkgsplit(cpv) + + packages['%s/%s' % (cat, pkg)] = True + + if not package or not \ + (cat == package.category and pkg == package.name): + package = self.store_package(cat, pkg) + + self.store_version(package, cpv, slot, overlay) + + if self.purge_packages and not query: + for package in Package.objects.all(): + cp = "%s/%s" % (package.category, package.name) + if cp not in packages: + if not self.quiet: + sys.stdout.write('- [p] %s\n' % (package)) + package.delete() + + def store_package(self, cat, pkg): + created = False + obj = self.cache_get_package(cat, pkg) + + if not obj: + obj, created = Package.objects.get_or_create( + category=cat, + name=pkg + ) + self.cache_store_package(obj) + + if created: + if not self.quiet: + sys.stdout.write('+ [p] %s/%s\n' % (cat, pkg)) + + # Set all versions dead, then set found versions alive and + # delete old versions + if not self.kill_versions: + Version.objects.filter( + package=obj, + packaged=True + ).update(alive=False) + + return obj + + def store_version(self, package, cpv, slot, overlay): + cat, pkg, ver, rev = portage.catpkgsplit(cpv) + + overlays = self.overlays() + + if overlay in overlays: + overlay = overlays[overlay] + else: + overlay = 'gentoo' + + created = False + obj = self.cache_get_version( + package.category, package.name, ver, rev, slot, overlay + ) + if not obj: + obj, created = Version.objects.get_or_create( + package=package, slot=slot, + revision=rev, version=ver, + overlay=overlay, + defaults={"alive": True, "packaged": True} + ) + if not created: # Created objects have defaults values + obj.alive = True + obj.packaged = True + obj.save() + + if created: + self.cache_store_version(obj) + + # nothing to do (note: it can't be an upstream version because + # overlay can't be empty here) + if not created: + return + + if not self.quiet: + sys.stdout.write('+ [v] %s \n' % (obj)) + + if overlay == 'gentoo': + package.n_packaged += 1 + else: + package.n_overlay += 1 + package.n_versions += 1 + package.save() + + if self.no_log: + return + + VersionLog.objects.create( + package=obj.package, + action=VersionLog.VERSION_ADDED, + slot=obj.slot, + revision=obj.revision, + version=obj.version, + overlay=obj.overlay + ) + + +@commit_on_success +def purge_versions(quiet=False, nolog=False): + # For each dead versions + for version in Version.objects.filter(packaged=True, alive=False): + if version.overlay == 'gentoo': + version.package.n_packaged -= 1 + else: + version.package.n_overlay -= 1 + version.package.n_versions -= 1 + version.package.save() + + if not quiet: + sys.stdout.write('- [v] %s\n' % (version)) + + if nolog: + continue + + VersionLog.objects.create( + package=version.package, + action=VersionLog.VERSION_REMOVED, + slot=version.slot, + revision=version.revision, + version=version.version, + overlay=version.overlay + ) + + Version.objects.filter(packaged=True, alive=False).delete() + + +def scan_portage(packages=None, no_log=False, purge_packages=False, + purge_versions=False, prefetch=False, logger=None, + quiet=False, stdout=None): + stdout = sys.stdout if stdout is None else stdout + + kill_versions = False + if packages is None: + prefetch = True + kill_versions = True + + scan_handler = ScanPortage( + stdout=stdout, + no_log=no_log, + purge_packages=purge_packages, + purge_versions=purge_versions, + kill_versions=kill_versions, + quiet=quiet, + ) + + if not quiet: + stdout.write('Scanning portage tree...\n') + + if prefetch: + if quiet: + stdout.write('Prefetching objects...') + stdout.flush() + for package in Package.objects.all(): + scan_handler.cache_store_package(package) + for version in Version.objects.select_related('package').all(): + scan_handler.cache_store_version(version) + if quiet: + stdout.write('done\n') + + if packages is None: + scan_handler.scan() + else: + for pkg in packages: + if isinstance(pkg, Package): + scan_handler.scan('%s/%s' % (pkg.category, pkg.name), pkg) + else: + scan_handler.scan(pkg) + + if purge_versions: + purge_versions(quiet, no_log) + + if not quiet: + stdout.write('Done.\n') diff --git a/euscanwww/djeuscan/processing/scan_upstream.py b/euscanwww/djeuscan/processing/scan_upstream.py new file mode 100644 index 0000000..9eee48b --- /dev/null +++ b/euscanwww/djeuscan/processing/scan_upstream.py @@ -0,0 +1,139 @@ +import portage +import sys + +from django.utils import timezone +from django.db.transaction import commit_on_success + +from euscan import CONFIG, output +from euscan.scan import scan_upstream as euscan_scan_upstream + +from djeuscan.models import Package, Version, EuscanResult, VersionLog + + +class ScanUpstream(object): + def __init__(self, quiet=False): + self.quiet = quiet + + def scan(self, package): + CONFIG["format"] = "dict" + output.set_query(package) + + euscan_scan_upstream(package) + + out = output.get_formatted_output() + out_json = output.get_formatted_output("json") + + try: + cpv = out[package]["metadata"]["cpv"] + except KeyError: + return {} + + obj = self.store_package(cpv) + + for res in out[package]["result"]: + self.store_version(obj, res["version"], " ".join(res["urls"])) + + self.store_result(obj, out_json) + + return out + + def store_result(self, package, log): + # Remove previous logs + EuscanResult.objects.filter(package=package).delete() + + obj = EuscanResult() + obj.package = package + obj.result = log + obj.datetime = timezone.now() + obj.save() + + def store_package(self, cpv): + cat, pkg, ver, rev = portage.catpkgsplit(cpv) + + obj, created = Package.objects.get_or_create(category=cat, name=pkg) + + if created and not self.quiet: + sys.stdout.write('+ [p] %s/%s\n' % (cat, pkg)) + + # Set all versions dead, then set found versions alive and + # delete old versions + Version.objects.filter(package=obj, packaged=False).update(alive=False) + + return obj + + def store_version(self, package, ver, url): + obj, created = Version.objects.get_or_create( + package=package, slot='', revision='r0', version=ver, overlay='', + defaults={"alive": True, "urls": url, "packaged": False} + ) + if not created: + obj.alive = True + obj.urls = url + obj.packaged = False + obj.save() + + # If it's not a new version, just update the object and continue + if not created: + return + + if not self.quiet: + sys.stdout.write('+ [u] %s %s\n' % (obj, url)) + + VersionLog.objects.create( + package=package, + action=VersionLog.VERSION_ADDED, + slot='', + revision='r0', + version=ver, + overlay='' + ) + + package.n_versions += 1 + package.save() + + +@commit_on_success +def purge_versions(quiet=False): + # For each dead versions + for version in Version.objects.filter(packaged=False, alive=False): + VersionLog.objects.create( + package=version.package, + action=VersionLog.VERSION_REMOVED, + slot=version.slot, + revision=version.revision, + version=version.version, + overlay=version.overlay + ) + + version.package.n_versions -= 1 + version.package.save() + + if not quiet: + sys.stdout.write('- [u] %s %s\n' % (version, version.urls)) + Version.objects.filter(packaged=False, alive=False).delete() + + +def scan_upstream(packages=None, purge_versions=False, quiet=False, + logger=None, stdout=None): + + stdout = sys.stdout if stdout is None else stdout + + scan_handler = ScanUpstream(quiet) + + if not quiet: + stdout.write('Scanning upstream...\n') + + if packages is None: + packages = Package.objects.all() + + for pkg in packages: + if isinstance(pkg, Package): + scan_handler.scan('%s/%s' % (pkg.category, pkg.name)) + else: + scan_handler.scan(pkg) + + if purge_versions: + purge_versions(quiet) + + if not quiet: + stdout.write('Done.\n') diff --git a/euscanwww/djeuscan/processing/update_counters.py b/euscanwww/djeuscan/processing/update_counters.py new file mode 100644 index 0000000..6d1c832 --- /dev/null +++ b/euscanwww/djeuscan/processing/update_counters.py @@ -0,0 +1,185 @@ +import sys + +from django.db.transaction import commit_on_success +from django.utils import timezone + +from djeuscan.models import Package, Herd, Maintainer, Version +from djeuscan.models import HerdLog, MaintainerLog, CategoryLog, WorldLog +from djeuscan import charts +from djeuscan.processing import FakeLogger + +from distutils.version import StrictVersion, LooseVersion + + +def _compare_versions(version1, version2): + try: + return cmp(StrictVersion(version1), StrictVersion(version2)) + # in case of abnormal version number, fall back to LooseVersion + except ValueError: + return cmp(LooseVersion(version1), LooseVersion(version2)) + + +def _add_safe(storage, key): + if key not in storage: + storage[key] = 1 + else: + storage[key] += 1 + + +def _add_last_ver(storage, version): + key = version['package_id'] + if key not in storage: + storage[key] = version + return + if version['version'].startswith('9999'): + return + if _compare_versions(storage[key]['version'], + version['version']) < 0: + storage[key] = version + + +@commit_on_success +def update_counters(stdout=None, fast=False, quiet=False, nolog=False, + logger=None): + if logger is None: + logger = FakeLogger() # TODO: write log messages + + if stdout is None: + stdout = sys.stdout + + now = timezone.now() + + categories = {} + herds = {} + maintainers = {} + + wlog = None + + if not nolog: + wlog = WorldLog() + wlog.datetime = now + + for cat in Package.objects.values('category').distinct(): + clog = CategoryLog() + clog.datetime = now + clog.category = cat['category'] + categories[clog.category] = clog + + for herd in Herd.objects.all(): + hlog = HerdLog() + hlog.datetime = now + hlog.herd = herd + herds[herd.id] = hlog + + for maintainer in Maintainer.objects.all(): + mlog = MaintainerLog() + mlog.datetime = now + mlog.maintainer = maintainer + maintainers[maintainer.id] = mlog + + package_queryset = Package.objects.all() + + n_versions = {} + n_packaged = {} + n_overlay = {} + + last_versions_gentoo = {} + last_versions_overlay = {} + last_versions_upstream = {} + + if not fast: + attrs = ['id', 'version', 'overlay', 'packaged', 'package_id'] + for version in Version.objects.all().values(*attrs): + overlay, packaged = version['overlay'], version['packaged'] + package_id = version['package_id'] + + _add_safe(n_versions, package_id) + + if not packaged: + _add_last_ver(last_versions_upstream, version) + continue + if overlay == 'gentoo': + _add_safe(n_packaged, package_id) + _add_last_ver(last_versions_gentoo, version) + else: + _add_safe(n_overlay, package_id) + _add_last_ver(last_versions_overlay, version) + + for package in package_queryset.select_related('herds', 'maintainers'): + if not fast: + package.n_versions = n_versions.get(package.id, 0) + package.n_packaged = n_packaged.get(package.id, 0) + package.n_overlay = n_overlay.get(package.id, 0) + + default = {'id': None} + package.last_version_gentoo_id = last_versions_gentoo.get( + package.id, default + )['id'] + package.last_version_overlay_id = last_versions_overlay.get( + package.id, default + )['id'] + package.last_version_upstream_id = last_versions_upstream.get( + package.id, default + )['id'] + + package.save() + + n_packages_gentoo = int(package.n_packaged == package.n_versions) + n_packages_overlay = int(package.n_overlay and package.n_packaged \ + + package.n_overlay == package.n_versions) + n_packages_outdated = int(package.n_packaged + package.n_overlay \ + < package.n_versions) + + def update_row(storage, key): + storage[key].n_packages_gentoo += n_packages_gentoo + storage[key].n_packages_overlay += n_packages_overlay + storage[key].n_packages_outdated += n_packages_outdated + + storage[key].n_versions_gentoo += package.n_packaged + storage[key].n_versions_overlay += package.n_overlay + storage[key].n_versions_upstream += package.n_versions - \ + package.n_packaged - \ + package.n_overlay + + def update_log(storage, qs): + for row in qs: + update_row(storage, row['id']) + + if not nolog: + update_log(herds, package.herds.all().values('id')) + update_log(maintainers, package.maintainers.all().values('id')) + update_row(categories, package.category) + + wlog.n_packages_gentoo += n_packages_gentoo + wlog.n_packages_overlay += n_packages_overlay + wlog.n_packages_outdated += n_packages_outdated + + wlog.n_versions_gentoo += package.n_packaged + wlog.n_versions_overlay += package.n_overlay + wlog.n_versions_upstream += package.n_versions - \ + package.n_packaged - \ + package.n_overlay + + if nolog: + return + + for clog in categories.values(): + if not quiet: + stdout.write('+ [cl] %s\n' % clog) + charts.rrd_update('category-%s' % clog.category, now, clog) + clog.save() + + for hlog in herds.values(): + if not quiet: + stdout.write('+ [hl] %s\n' % hlog) + charts.rrd_update('herd-%d' % hlog.herd.id, now, hlog) + hlog.save() + + for mlog in maintainers.values(): + if not quiet: + stdout.write('+ [ml] %s\n' % mlog) + charts.rrd_update('maintainer-%d' % mlog.maintainer.id, now, mlog) + mlog.save() + + charts.rrd_update('world', now, wlog) + wlog.save() diff --git a/euscanwww/djeuscan/processing/update_portage_trees.py b/euscanwww/djeuscan/processing/update_portage_trees.py new file mode 100644 index 0000000..cbf0a0b --- /dev/null +++ b/euscanwww/djeuscan/processing/update_portage_trees.py @@ -0,0 +1,66 @@ +import subprocess +from StringIO import StringIO + +from django.conf import settings + +from djeuscan.processing import FakeLogger + + +def _launch_command(cmd): + """ + Helper for launching shell commands inside tasks + """ + fp = subprocess.Popen(cmd, stdout=subprocess.PIPE, + stderr=subprocess.PIPE) + output = StringIO(fp.communicate()[0]) + return output.getvalue() + + +def emerge_sync(): + """ + Launches an emerge --sync + """ + cmd = ["emerge", "--sync", "--root", settings.PORTAGE_ROOT, + "--config-root", settings.PORTAGE_CONFIGROOT] + return _launch_command(cmd) + + +def layman_sync(): + """ + Syncs Layman repos + """ + from layman import Layman + l = Layman(config=settings.LAYMAN_CONFIG) + return l.sync(l.get_installed(), output_results=False) + + +def emerge_regen(): + """ + Launches emerge --regen + """ + cmd = [ + "emerge", "--regen", "--jobs", settings.EMERGE_REGEN_JOBS, "--root", + settings.PORTAGE_ROOT, "--config-root", settings.PORTAGE_CONFIGROOT + ] + return _launch_command(cmd) + + +def eix_update(): + """ + Launches eix-update + """ + cmd = ["eix-update"] + return _launch_command(cmd) + + +def update_portage_trees(logger=None): + logger = logger or FakeLogger() + logger.info("Running emerge --sync") + emerge_sync() + logger.info("Running layman --sync") + layman_sync() + logger.info("Running emerge --regen") + emerge_regen() + logger.info("Running eix-update") + eix_update() + logger.info("Done!") diff --git a/euscanwww/djeuscan/tasks.py b/euscanwww/djeuscan/tasks.py index 290df65..2047ffe 100644 --- a/euscanwww/djeuscan/tasks.py +++ b/euscanwww/djeuscan/tasks.py @@ -1,23 +1,20 @@ -import subprocess -from StringIO import StringIO +""" +Celery tasks for djeuscan +""" + from itertools import islice from celery.task import task, periodic_task from celery.task.schedules import crontab from celery.task.sets import TaskSet -from django.conf import settings - -from euscan import output as euscan_output - from djeuscan.models import Package, RefreshPackageQuery -from djeuscan.management.commands.regen_rrds import regen_rrds -from djeuscan.management.commands.update_counters import update_counters -from djeuscan.management.commands.scan_metadata import ScanMetadata -from djeuscan.management.commands.scan_portage import ScanPortage, \ - purge_versions as scan_portage_purge -from djeuscan.management.commands.scan_upstream import ScanUpstream, \ - purge_versions as scan_upstream_purge +from djeuscan.processing.regen_rrds import regen_rrds +from djeuscan.processing.update_counters import update_counters +from djeuscan.processing.scan_metadata import scan_metadata +from djeuscan.processing.scan_portage import scan_portage +from djeuscan.processing.scan_upstream import scan_upstream +from djeuscan.processing.update_portage_trees import update_portage_trees class TaskFailedException(Exception): @@ -27,16 +24,6 @@ class TaskFailedException(Exception): pass -def _launch_command(cmd): - """ - Helper for launching shell commands inside tasks - """ - fp = subprocess.Popen(cmd, stdout=subprocess.PIPE, - stderr=subprocess.PIPE) - output = StringIO(fp.communicate()[0]) - return output.getvalue() - - def _chunks(it, n): """ Chunk generator, takes an iterator and the desired size of the chunk @@ -45,162 +32,231 @@ def _chunks(it, n): yield [first] + list(islice(it, n - 1)) -def _run_in_chunks(task, iterable, n=32): +def _run_in_chunks(task, packages, kwargs=None, concurrently=8, n=32): """ - Runs the given task with the given iterable of args in chunks of - n subtasks + Launches a TaskSet at a time with subtasks. + Each subtask has packages to handle """ output = [] - for chunk in _chunks(iter(iterable), n): - job = TaskSet(tasks=[ - task.subtask(args) - for args in chunk - ]) + + chunk_generator = _chunks(iter(packages), n) + done = False + + while not done: + tasks = [] + for _ in range(concurrently): + try: + chunk = chunk_generator.next() + except StopIteration: + done = True + else: + tasks.append(task.subtask((chunk, ), kwargs)) + job = TaskSet(tasks=tasks) result = job.apply_async() # TODO: understand why this causes timeout - #output.extend(list(result.join(timeout=3600))) + output.extend(list(result.join(timeout=3600))) return output @task def regen_rrds_task(): + """ + Regenerate RRDs + """ return regen_rrds() @task -def update_counters_task(): - return update_counters() +def update_counters_task(fast=True): + """ + Updates counters + """ + return update_counters(fast=fast) @task -def scan_metadata_task(query, obj=None): - logger = scan_metadata_task.get_logger() - logger.info("Starting metadata scanning for package %s ...", query) +def _scan_metadata_task(packages): + """ + Scans metadata for the given set of packages + """ + logger = _scan_metadata_task.get_logger() + logger.info("Starting metadata scanning subtask for %d packages...", + len(packages)) - scan_metadata = ScanMetadata() - result = scan_metadata.scan(query, obj) + result = scan_metadata( + packages=packages, + logger=logger, + ) if not result: - raise TaskFailedException("Couldn't scan metadata") + raise TaskFailedException return result @task def scan_metadata_list_task(query): - return _run_in_chunks(scan_metadata_task, [(p, ) for p in query.split()]) + """ + Runs a parallel metadata scan for packages in the query list (space + separated string). Task used only from the web interface. + """ + _run_in_chunks(_scan_metadata_task, [p for p in query.split()]) @task def scan_metadata_all_task(): - return _run_in_chunks( - scan_metadata_task, - [('%s/%s' % (pkg.category, pkg.name), pkg) - for pkg in Package.objects.all()] + """ + Runs a parallel metadata scan for all packages + """ + _run_in_chunks(_scan_metadata_task, Package.objects.all()) + + +@task +def _scan_portage_task(packages, no_logs=False, purge_packages=False, + purge_versions=False, prefetch=False): + """ + Scans portage for the given set of packages + """ + logger = _scan_portage_task.get_logger() + logger.info("Starting portage scanning subtask for %d packages...", + len(packages)) + + result = scan_portage( + packages=packages, + no_logs=no_logs, + purge_packages=purge_packages, + purge_versions=purge_versions, + prefetch=prefetch, + logger=logger, ) - - -@task -def scan_portage_list_task(query, purge=False): - scan_portage = ScanPortage() - logger = scan_portage_list_task.get_logger() - - for pkg in query.split(): - logger.info("Starting Portage package scanning: %s ...", pkg) - - scan_portage.scan(pkg) - - if purge: - logger.info("Purging") - scan_portage_purge() - - -@task -def scan_portage_all_task(purge=False): - logger = scan_portage_all_task.get_logger() - logger.info("Starting Portage scanning...") - - scan_portage = ScanPortage() - scan_portage.scan() - - if purge: - logger.info("Purging") - scan_portage_purge() - - -@task -def scan_upstream_task(query): - logger = scan_upstream_task.get_logger() - logger.info("Starting upstream scanning for package %s ...", query) - - euscan_output.clean() - scan_upstream = ScanUpstream() - result = scan_upstream.scan(query) - euscan_output.clean() - if not result or result == {}: - raise TaskFailedException("Couldn't scan upstream") + if not result: + raise TaskFailedException return result @task -def scan_upstream_list_task(query): - return _run_in_chunks(scan_upstream_task, [(p, ) for p in query.split()]) +def scan_portage_list_task(query, no_logs=False, purge_packages=False, + purge_versions=False, prefetch=False): + """ + Runs a parallel portage scan for packages in the query list (space + separated string). Task used only from the web interface. + """ + kwargs = {"no_logs": no_logs, "purge_packages": purge_packages, + "purge_versions": purge_versions, "prefetch": prefetch} + _run_in_chunks(_scan_portage_task, [p for p in query.split()], kwargs) @task -def scan_upstream_all_task(purge=False): - output = _run_in_chunks( - scan_upstream_task, - [('%s/%s' % (pkg.category, pkg.name), ) - for pkg in Package.objects.all()], - n=16 +def scan_portage_all_task(no_logs=False, purge_packages=False, + purge_versions=False, prefetch=False): + """ + Runs a parallel portage scan for all packages + """ + kwargs = {"no_logs": no_logs, "purge_packages": purge_packages, + "purge_versions": purge_versions, "prefetch": prefetch} + _run_in_chunks(_scan_metadata_task, Package.objects.all(), kwargs) + + +@task +def _scan_upstream_task(packages, purge_versions=False): + """ + Scans upstream for the given set of packages + """ + logger = _scan_upstream_task.get_logger() + + logger.info("Starting upstream scanning subtask for %d packages...", + len(packages)) + + result = scan_upstream( + packages=packages, + purge_versions=purge_versions, + logger=logger, ) - - if purge: - output += [scan_upstream_purge()] - - return output + if not result: + raise TaskFailedException + return result @task -def emerge_sync(): - cmd = ["emerge", "--sync", "--root", settings.PORTAGE_ROOT, - "--config-root", settings.PORTAGE_CONFIGROOT] - return _launch_command(cmd) +def scan_upstream_list_task(query, purge_versions=False): + """ + Runs a parallel upstream scan for packages in the query list (space + separated string). Task used only from the web interface. + """ + + kwargs = {"purge_versions": purge_versions} + _run_in_chunks(_scan_upstream_task, [p for p in query.split()], kwargs) @task -def layman_sync(): - from layman import Layman - l = Layman(config=settings.LAYMAN_CONFIG) - return l.sync(l.get_installed(), output_results=False) +def scan_upstream_all_task(purge_versions=False): + """ + Runs a parallel portage scan for all packages + """ + kwargs = {"purge_versions": purge_versions} + _run_in_chunks(_scan_upstream_task, Package.objects.all(), kwargs) @task -def emerge_regen(): - cmd = [ - "emerge", "--regen", "--jobs", settings.EMERGE_REGEN_JOBS, "--root", - settings.PORTAGE_ROOT, "--config-root", settings.PORTAGE_CONFIGROOT - ] - return _launch_command(cmd) +def update_portage_trees_task(): + """ + Update portage tree + """ + logger = update_portage_trees_task.get_logger() + update_portage_trees(logger=logger) @task -def eix_update(): - cmd = ["eix-update"] - return _launch_command(cmd) +def update_task(update_portage_trees=True, scan_portage=True, + scan_metadata=True, scan_upstream=True, update_counter=True): + """ + Update the whole euscan system + """ + if update_portage_trees: + update_portage_trees_task() + if scan_portage: + scan_portage_all_task(prefetch=True, purge_packages=True, + purge_versions=True) + + # metadata and upstream scan can run concurrently, launch them + # asynchronously and wait for them to finish + metadata_job = None + if scan_metadata: + metadata_job = scan_metadata_all_task().delay() + + upstream_job = None + if scan_upstream: + upstream_job = scan_upstream_all_task().delay() + + if metadata_job: + metadata_job.wait() + if upstream_job: + upstream_job.wait() + + update_counters(fast=False) @periodic_task(run_every=crontab(minute="*/1")) -def refresh_package_consume(): +def consume_refresh_package_request(): + """ + Satisfies user requests for package refreshing, runs every minute + """ try: obj = RefreshPackageQuery.objects.latest() except RefreshPackageQuery.DoesNotExist: return {} else: - result = scan_upstream_task(obj.query) + result = _scan_upstream_task([obj.query]) obj.delete() return result +@periodic_task(run_every=crontab(hour=03, minute=00, day_of_week=1)) +def update_periodic_task(): + """ + Runs a whole update once a week + """ + update_task() + + admin_tasks = [ regen_rrds_task, update_counters_task, @@ -210,8 +266,6 @@ admin_tasks = [ scan_portage_list_task, scan_upstream_all_task, scan_upstream_list_task, - emerge_sync, - layman_sync, - emerge_regen, - eix_update, + update_portage_trees, + update_task ]