euscanwww: PEP8, Cleaning unused imports and removing useless stuff

* Removed useless use of chords
* populate in scan_metadata is off by default
* fixed group_one and scan_metadata launch
* removed useless utils.py

Signed-off-by: volpino <fox91@anche.no>
This commit is contained in:
volpino 2012-07-20 13:25:03 +02:00
parent 3d8ea27ecc
commit 7dc322348d
5 changed files with 49 additions and 39 deletions

View File

@ -24,6 +24,11 @@ class Command(BaseCommand):
dest='category', dest='category',
default=None, default=None,
help='Scan only this category'), help='Scan only this category'),
make_option('--populate',
action='store',
dest='populate',
default=None,
help='Populate herds and maintainers from herds.xml'),
) )
args = '<package package ...>' args = '<package package ...>'
help = 'Scans metadata and fills database' help = 'Scans metadata and fills database'
@ -42,5 +47,6 @@ class Command(BaseCommand):
scan_metadata( scan_metadata(
packages=packages, packages=packages,
category=options['category'], category=options['category'],
logger=logger logger=logger,
populate=options['populate'],
) )

View File

@ -164,7 +164,7 @@ class ScanMetadata(object):
@commit_on_success @commit_on_success
def scan_metadata(packages=None, category=None, logger=None, populate=True): def scan_metadata(packages=None, category=None, logger=None, populate=False):
scan_handler = ScanMetadata(logger=logger) scan_handler = ScanMetadata(logger=logger)
if category: if category:

View File

@ -15,6 +15,7 @@ from djeuscan.models import Package, Version, VersionLog
PORTDB = None PORTDB = None
class ScanPortage(object): class ScanPortage(object):
def __init__(self, logger=None, no_log=False, purge_packages=False, def __init__(self, logger=None, no_log=False, purge_packages=False,
purge_versions=False): purge_versions=False):
@ -23,8 +24,8 @@ class ScanPortage(object):
self.purge_packages = purge_packages self.purge_packages = purge_packages
self.purge_versions = purge_versions self.purge_versions = purge_versions
if not PORTDB: # Lazy loading for portdb
global PORTDB global PORTDB
if not PORTDB: # Lazy loading for portdb
PORTDB = portage.db[portage.root]["porttree"].dbapi PORTDB = portage.db[portage.root]["porttree"].dbapi
self.style = color_style() self.style = color_style()

View File

@ -2,15 +2,15 @@
Celery tasks for djeuscan Celery tasks for djeuscan
""" """
from itertools import islice from celery.task import task, group
from celery.task import task, group, chord
from django.conf import settings from django.conf import settings
import portage
from djeuscan.models import Package, RefreshPackageQuery from djeuscan.models import Package, RefreshPackageQuery
from djeuscan.processing import scan, misc from djeuscan.processing import scan, misc
from djeuscan.utils import queryset_iterator
class TaskFailedException(Exception): class TaskFailedException(Exception):
""" """
@ -18,24 +18,35 @@ class TaskFailedException(Exception):
""" """
pass pass
def group_one(task, seq, *args, **kwargs): def group_one(task, seq, *args, **kwargs):
""" """
Create a group of tasks, each task handle one element of seq Create a group of tasks, each task handle one element of seq
""" """
tasks = [] tasks = []
for i in seq:
tasks.append(task.subtask(args=[seq[i]] + list(args), kwargs=kwargs)) for elem in seq:
if "attr_name" in kwargs:
kwargs[kwargs["attr_name"]] = elem
del kwargs["attr_name"]
tasks.append(task.subtask(args=args, kwargs=kwargs))
else:
tasks.append(task.subtask(args=[elem] + list(args), kwargs=kwargs))
return group(tasks) return group(tasks)
def group_chunks(task, seq, n, *args, **kwargs): def group_chunks(task, seq, n, *args, **kwargs):
""" """
Creates a group of tasks, each subtask has <n> elements to handle Creates a group of tasks, each subtask has <n> elements to handle
""" """
tasks = [] tasks = []
for i in xrange(0, len(seq), n): for i in xrange(0, len(seq), n):
tasks.append(task.subtask(args=[seq[i:i+n]] + list(args), kwargs=kwargs)) tasks.append(
task.subtask(args=[seq[i:i + n]] + list(args), kwargs=kwargs)
)
return group(tasks) return group(tasks)
@task @task
def regen_rrds(): def regen_rrds():
""" """
@ -44,6 +55,7 @@ def regen_rrds():
misc.regen_rrds() misc.regen_rrds()
return True return True
@task @task
def update_counters(fast=False): def update_counters(fast=False):
""" """
@ -55,8 +67,9 @@ def update_counters(fast=False):
logger.info("Done") logger.info("Done")
return True return True
@task @task
def scan_metadata(packages=[], category=None): def scan_metadata(packages=[], category=None, populate=False):
""" """
Scans metadata for the given set of packages Scans metadata for the given set of packages
""" """
@ -75,9 +88,11 @@ def scan_metadata(packages=[], category=None):
packages=packages, packages=packages,
category=category, category=category,
logger=logger, logger=logger,
populate=populate,
) )
return True return True
@task @task
def scan_portage(packages=[], category=None, def scan_portage(packages=[], category=None,
no_log=False, purge_packages=False, no_log=False, purge_packages=False,
@ -107,6 +122,7 @@ def scan_portage(packages=[], category=None,
) )
return True return True
@task @task
def scan_upstream(packages=[], purge_versions=False): def scan_upstream(packages=[], purge_versions=False):
""" """
@ -128,6 +144,7 @@ def scan_upstream(packages=[], purge_versions=False):
) )
return True return True
@task @task
def update_portage_trees(): def update_portage_trees():
""" """
@ -137,17 +154,19 @@ def update_portage_trees():
misc.update_portage_trees(logger=logger) misc.update_portage_trees(logger=logger)
return True return True
@task @task
def update_portage(packages=None): def update_portage(packages=None):
update_portage_trees()
scan_portage(purge_packages=True, purge_versions=True, prefetch=True)
( (
update_portage_trees.s() | group_one(scan_metadata, portage.settings.categories,
scan_portage.si(purge_packages=True, purge_versions=True, prefetch=True) | attr_name="category") |
#scan_metadata.si() |
group_one(scan_metadata, portage.settings.categories) |
update_counters.si(fast=False) update_counters.si(fast=False)
)() )()
return True return True
@task @task
def update_upstream(): def update_upstream():
if settings.TASKS_UPSTREAM_GROUPS >= 1: if settings.TASKS_UPSTREAM_GROUPS >= 1:
@ -165,6 +184,7 @@ def update_upstream():
)() )()
return True return True
@task @task
def scan_package(package): def scan_package(package):
scan_portage([package], purge_packages=True, purge_versions=True) scan_portage([package], purge_packages=True, purge_versions=True)
@ -172,11 +192,13 @@ def scan_package(package):
scan_upstream([package]) scan_upstream([package])
return True return True
@task(rate_limit="1/m") @task(rate_limit="1/m")
def scan_package_user(package): def scan_package_user(package):
scan_package(package) scan_package(package)
return True return True
@task @task
def consume_refresh_package_request(): def consume_refresh_package_request():
""" """
@ -191,6 +213,7 @@ def consume_refresh_package_request():
query.delete() query.delete()
scan_package_user.delay(pkg) scan_package_user.delay(pkg)
admin_tasks = [ admin_tasks = [
regen_rrds, regen_rrds,
update_counters, update_counters,
@ -203,7 +226,8 @@ admin_tasks = [
scan_package, scan_package,
] ]
""" Chunk helpers (chunks can't use keyword arguments) """
# Chunk helpers (chunks can't use keyword arguments)
@task @task
def scan_metadata_category(category): def scan_metadata_category(category):
""" """
@ -212,6 +236,7 @@ def scan_metadata_category(category):
scan_metadata(category=category) scan_metadata(category=category)
return True return True
@task @task
def scan_upstream_purge(*packages): def scan_upstream_purge(*packages):
""" """

View File

@ -1,22 +0,0 @@
def queryset_iterator(queryset, chunksize=1000):
'''''
Iterate over a Django Queryset ordered by the primary key
This method loads a maximum of chunksize (default: 1000) rows in it's
memory at the same time while django normally would load all rows in it's
memory. Using the iterator() method only causes it to not preload all the
classes.
Note that the implementation of the iterator does not support ordered query sets.
'''
import gc
pk = 0
last_pk = queryset.order_by('-pk')[0].pk
queryset = queryset.order_by('pk')
while pk < last_pk:
for row in queryset.filter(pk__gt=pk)[:chunksize]:
pk = row.pk
yield row
gc.collect()