diff --git a/euscanwww/djeuscan/management/commands/scan_metadata.py b/euscanwww/djeuscan/management/commands/scan_metadata.py index 891893b..48f2c3c 100644 --- a/euscanwww/djeuscan/management/commands/scan_metadata.py +++ b/euscanwww/djeuscan/management/commands/scan_metadata.py @@ -24,6 +24,11 @@ class Command(BaseCommand): dest='category', default=None, help='Scan only this category'), + make_option('--populate', + action='store', + dest='populate', + default=None, + help='Populate herds and maintainers from herds.xml'), ) args = '' help = 'Scans metadata and fills database' @@ -42,5 +47,6 @@ class Command(BaseCommand): scan_metadata( packages=packages, category=options['category'], - logger=logger + logger=logger, + populate=options['populate'], ) diff --git a/euscanwww/djeuscan/processing/scan/scan_metadata.py b/euscanwww/djeuscan/processing/scan/scan_metadata.py index 07e9ee0..ed7e4f5 100644 --- a/euscanwww/djeuscan/processing/scan/scan_metadata.py +++ b/euscanwww/djeuscan/processing/scan/scan_metadata.py @@ -164,7 +164,7 @@ class ScanMetadata(object): @commit_on_success -def scan_metadata(packages=None, category=None, logger=None, populate=True): +def scan_metadata(packages=None, category=None, logger=None, populate=False): scan_handler = ScanMetadata(logger=logger) if category: diff --git a/euscanwww/djeuscan/processing/scan/scan_portage.py b/euscanwww/djeuscan/processing/scan/scan_portage.py index b00c416..a30254e 100644 --- a/euscanwww/djeuscan/processing/scan/scan_portage.py +++ b/euscanwww/djeuscan/processing/scan/scan_portage.py @@ -15,6 +15,7 @@ from djeuscan.models import Package, Version, VersionLog PORTDB = None + class ScanPortage(object): def __init__(self, logger=None, no_log=False, purge_packages=False, purge_versions=False): @@ -23,8 +24,8 @@ class ScanPortage(object): self.purge_packages = purge_packages self.purge_versions = purge_versions - if not PORTDB: # Lazy loading for portdb - global PORTDB + global PORTDB + if not PORTDB: # Lazy loading for portdb PORTDB = portage.db[portage.root]["porttree"].dbapi self.style = color_style() diff --git a/euscanwww/djeuscan/tasks.py b/euscanwww/djeuscan/tasks.py index 512ad91..04d1b80 100644 --- a/euscanwww/djeuscan/tasks.py +++ b/euscanwww/djeuscan/tasks.py @@ -2,15 +2,15 @@ Celery tasks for djeuscan """ -from itertools import islice - -from celery.task import task, group, chord +from celery.task import task, group from django.conf import settings +import portage + from djeuscan.models import Package, RefreshPackageQuery from djeuscan.processing import scan, misc -from djeuscan.utils import queryset_iterator + class TaskFailedException(Exception): """ @@ -18,24 +18,35 @@ class TaskFailedException(Exception): """ pass + def group_one(task, seq, *args, **kwargs): """ Create a group of tasks, each task handle one element of seq """ tasks = [] - for i in seq: - tasks.append(task.subtask(args=[seq[i]] + list(args), kwargs=kwargs)) + + for elem in seq: + if "attr_name" in kwargs: + kwargs[kwargs["attr_name"]] = elem + del kwargs["attr_name"] + tasks.append(task.subtask(args=args, kwargs=kwargs)) + else: + tasks.append(task.subtask(args=[elem] + list(args), kwargs=kwargs)) return group(tasks) + def group_chunks(task, seq, n, *args, **kwargs): """ Creates a group of tasks, each subtask has elements to handle """ tasks = [] for i in xrange(0, len(seq), n): - tasks.append(task.subtask(args=[seq[i:i+n]] + list(args), kwargs=kwargs)) + tasks.append( + task.subtask(args=[seq[i:i + n]] + list(args), kwargs=kwargs) + ) return group(tasks) + @task def regen_rrds(): """ @@ -44,6 +55,7 @@ def regen_rrds(): misc.regen_rrds() return True + @task def update_counters(fast=False): """ @@ -55,8 +67,9 @@ def update_counters(fast=False): logger.info("Done") return True + @task -def scan_metadata(packages=[], category=None): +def scan_metadata(packages=[], category=None, populate=False): """ Scans metadata for the given set of packages """ @@ -75,9 +88,11 @@ def scan_metadata(packages=[], category=None): packages=packages, category=category, logger=logger, + populate=populate, ) return True + @task def scan_portage(packages=[], category=None, no_log=False, purge_packages=False, @@ -107,6 +122,7 @@ def scan_portage(packages=[], category=None, ) return True + @task def scan_upstream(packages=[], purge_versions=False): """ @@ -128,6 +144,7 @@ def scan_upstream(packages=[], purge_versions=False): ) return True + @task def update_portage_trees(): """ @@ -137,17 +154,19 @@ def update_portage_trees(): misc.update_portage_trees(logger=logger) return True + @task def update_portage(packages=None): + update_portage_trees() + scan_portage(purge_packages=True, purge_versions=True, prefetch=True) ( - update_portage_trees.s() | - scan_portage.si(purge_packages=True, purge_versions=True, prefetch=True) | - #scan_metadata.si() | - group_one(scan_metadata, portage.settings.categories) | + group_one(scan_metadata, portage.settings.categories, + attr_name="category") | update_counters.si(fast=False) )() return True + @task def update_upstream(): if settings.TASKS_UPSTREAM_GROUPS >= 1: @@ -165,6 +184,7 @@ def update_upstream(): )() return True + @task def scan_package(package): scan_portage([package], purge_packages=True, purge_versions=True) @@ -172,11 +192,13 @@ def scan_package(package): scan_upstream([package]) return True + @task(rate_limit="1/m") def scan_package_user(package): scan_package(package) return True + @task def consume_refresh_package_request(): """ @@ -191,6 +213,7 @@ def consume_refresh_package_request(): query.delete() scan_package_user.delay(pkg) + admin_tasks = [ regen_rrds, update_counters, @@ -203,7 +226,8 @@ admin_tasks = [ scan_package, ] -""" Chunk helpers (chunks can't use keyword arguments) """ + +# Chunk helpers (chunks can't use keyword arguments) @task def scan_metadata_category(category): """ @@ -212,6 +236,7 @@ def scan_metadata_category(category): scan_metadata(category=category) return True + @task def scan_upstream_purge(*packages): """ diff --git a/euscanwww/djeuscan/utils.py b/euscanwww/djeuscan/utils.py deleted file mode 100644 index 19fa00d..0000000 --- a/euscanwww/djeuscan/utils.py +++ /dev/null @@ -1,22 +0,0 @@ - -def queryset_iterator(queryset, chunksize=1000): - ''''' - Iterate over a Django Queryset ordered by the primary key - - This method loads a maximum of chunksize (default: 1000) rows in it's - memory at the same time while django normally would load all rows in it's - memory. Using the iterator() method only causes it to not preload all the - classes. - - Note that the implementation of the iterator does not support ordered query sets. - ''' - import gc - - pk = 0 - last_pk = queryset.order_by('-pk')[0].pk - queryset = queryset.order_by('pk') - while pk < last_pk: - for row in queryset.filter(pk__gt=pk)[:chunksize]: - pk = row.pk - yield row - gc.collect()