This commit is contained in:
volpino 2012-07-20 11:58:07 +02:00
commit 3d8ea27ecc
33 changed files with 257 additions and 258 deletions

View File

@ -128,7 +128,10 @@ def print_usage(_error=None, help=None):
print(yellow(" -I, --ignore-pre-release-if-stable") +
" - Ignore non-stable versions only if current\n" +
" " * 38 + "version is stable", file=out)
print(yellow(" --mirror") +
" - use mirror:// URIs", file=out)
print(yellow(" --ebuild-uri") +
" - use ebuild variables in URIs", file=out)
print(file=out)
if _error in ('packages',) or help:
@ -182,12 +185,14 @@ def parse_args():
pp.output.nocolor()
elif o in ("-p", "--progress"):
CONFIG['progress'] = isatty
elif o in ("-m", "--mirror"):
elif o in ("--mirror"):
CONFIG['mirror'] = True
elif o in ("-i", "--ignore-pre-release"):
CONFIG['ignore-pre-release'] = True
elif o in ("-I", "--ignore-pre-release-if-stable"):
CONFIG['ignore-pre-release-if-stable'] = True
elif o in ("--ebuild-uri"):
CONFIG['ebuild-uri'] = True
else:
return_code = False
@ -195,11 +200,11 @@ def parse_args():
# here are the different allowed command line options (getopt args)
getopt_options = {'short': {}, 'long': {}}
getopt_options['short']['global'] = "hVCqv1bf:pmiI"
getopt_options['short']['global'] = "hVCqv1bf:piI"
getopt_options['long']['global'] = [
"help", "version", "nocolor", "quiet", "verbose", "oneshot",
"brute-force=", "format=", "progress", "mirror", "ignore-pre-release",
"ignore-pre-release-if-stable",
"ignore-pre-release-if-stable", "ebuild-uri"
]
short_opts = getopt_options['short']['global']

View File

@ -9,7 +9,7 @@ or::
python setup.py install # to install euscan and requirements
If you prefer to use portage just install dev-python/django-celery-2.5.5
If you prefer to use portage just install dev-python/django-celery
There's the need of having a broker for tasks. The default and reccommended
broker is RabbitMQ.

View File

@ -8,14 +8,12 @@ from xml.etree.ElementTree import iterparse, ParseError
from django.db.transaction import commit_on_success
from django.core.management.color import color_style
from euscan.helpers import get_version_type
from euscan.version import get_version_type
from djeuscan.processing import FakeLogger
from djeuscan.models import Package, Version, VersionLog
PORTDB = portage.db[portage.root]["porttree"].dbapi
PORTDB = None
class ScanPortage(object):
def __init__(self, logger=None, no_log=False, purge_packages=False,
@ -25,6 +23,10 @@ class ScanPortage(object):
self.purge_packages = purge_packages
self.purge_versions = purge_versions
if not PORTDB: # Lazy loading for portdb
global PORTDB
PORTDB = portage.db[portage.root]["porttree"].dbapi
self.style = color_style()
self._cache = {'packages': {}, 'versions': {}}
@ -342,9 +344,14 @@ def scan_portage(packages=None, category=None, no_log=False,
if prefetch:
logger.info('Prefetching objects...')
for package in Package.objects.all():
ppackages = Package.objects.all()
pversions = Version.objects.select_related('package').all()
if category:
ppackages = ppackages.filter(category=category)
pversions = pversions.filter(package__category=category)
for package in ppackages:
scan_handler.cache_store_package(package)
for version in Version.objects.select_related('package').all():
for version in pversions:
scan_handler.cache_store_version(version)
logger.info('done')

View File

@ -153,9 +153,12 @@ def scan_upstream(packages=None, purge_versions=False,
for pkg in packages:
try:
scan_handler.scan('%s/%s' % (pkg.category, pkg.name))
package = '%s/%s' % (pkg.category, pkg.name)
except AttributeError:
scan_handler.scan(pkg)
package = pkg
logger.info('Scanning %s' % package)
scan_handler.scan(package)
scan_handler.purge_old_versions()

View File

@ -9,10 +9,8 @@ from celery.task import task, group, chord
from django.conf import settings
from djeuscan.models import Package, RefreshPackageQuery
from djeuscan.processing.misc import regen_rrds, update_counters, \
update_portage_trees
from djeuscan.processing.scan import scan_metadata, scan_portage, scan_upstream
from djeuscan.processing import scan, misc
from djeuscan.utils import queryset_iterator
class TaskFailedException(Exception):
"""
@ -20,103 +18,85 @@ class TaskFailedException(Exception):
"""
pass
def _chunks(it, n):
def group_one(task, seq, *args, **kwargs):
"""
Chunk generator, takes an iterator and the desired size of the chunk
Create a group of tasks, each task handle one element of seq
"""
for first in it:
yield [first] + list(islice(it, n - 1))
tasks = []
for i in seq:
tasks.append(task.subtask(args=[seq[i]] + list(args), kwargs=kwargs))
return group(tasks)
def _run_in_chunks(task, packages, kwargs=None,
concurrently=settings.TASKS_CONCURRENTLY,
n=settings.TASKS_SUBTASK_PACKAGES):
def group_chunks(task, seq, n, *args, **kwargs):
"""
Launches a group at a time with <concurrently> subtasks.
Each subtask has <n> packages to handle
Creates a group of tasks, each subtask has <n> elements to handle
"""
output = []
chunk_generator = _chunks(iter(packages), n)
done = False
while not done:
tasks = []
for _ in range(concurrently):
try:
chunk = chunk_generator.next()
except StopIteration:
done = True
else:
tasks.append(task.subtask((chunk, ), kwargs))
output.extend(group(tasks)())
return output
tasks = []
for i in xrange(0, len(seq), n):
tasks.append(task.subtask(args=[seq[i:i+n]] + list(args), kwargs=kwargs))
return group(tasks)
@task
def regen_rrds_task():
def regen_rrds():
"""
Regenerate RRDs
"""
return regen_rrds()
misc.regen_rrds()
return True
@task
def update_counters_task(fast=True):
def update_counters(fast=False):
"""
Updates counters
"""
return update_counters(fast=fast)
logger = update_counters.get_logger()
logger.info("Updating counters (fast=%s)...", fast)
misc.update_counters(fast=fast)
logger.info("Done")
return True
@task
def _scan_metadata_task(packages):
def scan_metadata(packages=[], category=None):
"""
Scans metadata for the given set of packages
"""
logger = _scan_metadata_task.get_logger()
logger.info("Starting metadata scanning subtask for %d packages...",
len(packages))
logger = scan_metadata.get_logger()
scan_metadata(
if packages:
logger.info("Starting metadata scan for %d packages...",
len(packages))
elif category:
logger.info("Starting metadata scan for %s...",
category)
else:
logger.info("Starting metadata scan...")
scan.scan_metadata(
packages=packages,
category=category,
logger=logger,
)
return True
@task
def scan_metadata_list_task(query):
"""
Runs a parallel metadata scan for packages in the query list (space
separated string). Task used only from the web interface.
"""
return _run_in_chunks(_scan_metadata_task, [p for p in query.split()])
@task
def scan_metadata_all_task():
"""
Runs a parallel metadata scan for all packages
"""
return _run_in_chunks(_scan_metadata_task, Package.objects.all())
@task
def _scan_portage_task(packages, category=None, no_log=False,
purge_packages=False, purge_versions=False,
prefetch=False):
def scan_portage(packages=[], category=None,
no_log=False, purge_packages=False,
purge_versions=False, prefetch=False):
"""
Scans portage for the given set of packages
"""
logger = _scan_portage_task.get_logger()
if packages:
logger.info("Starting portage scanning subtask for %d packages...",
len(packages))
else:
logger.info("Starting portage scanning for all packages...")
logger = scan_portage.get_logger()
scan_portage(
if packages:
logger.info("Starting portage scan for %d packages...",
len(packages))
elif category:
logger.info("Starting portage scan for %s...",
category)
else:
logger.info("Starting portage scan...")
scan.scan_portage(
packages=packages,
category=category,
no_log=no_log,
@ -125,131 +105,77 @@ def _scan_portage_task(packages, category=None, no_log=False,
prefetch=prefetch,
logger=logger,
)
return True
@task
def scan_portage_list_task(query, no_log=False, purge_packages=False,
purge_versions=False, prefetch=False):
"""
Runs a parallel portage scan for packages in the query list (space
separated string). Task used only from the web interface.
"""
kwargs = {"no_log": no_log, "purge_packages": purge_packages,
"purge_versions": purge_versions, "prefetch": prefetch}
return _run_in_chunks(
_scan_portage_task, [p for p in query.split()], kwargs
)
@task
def scan_portage_all_task(no_log=False, purge_packages=False,
purge_versions=False, prefetch=False):
"""
Runs a syncronous portage scan for all packages
"""
_scan_portage_task(
packages=None,
category=None,
no_log=no_log,
purge_packages=purge_packages,
purge_versions=purge_versions,
prefetch=prefetch,
)
@task
def _scan_upstream_task(packages, purge_versions=False):
def scan_upstream(packages=[], purge_versions=False):
"""
Scans upstream for the given set of packages
"""
logger = _scan_upstream_task.get_logger()
logger = scan_upstream.get_logger()
logger.info("Starting upstream scanning subtask for %d packages...",
len(packages))
if len(packages):
logger.info("Starting upstream scan subtask for %d packages...",
len(packages))
else:
logger.info("Starting upstream scan...",
len(packages))
result = scan_upstream(
scan.scan_upstream(
packages=packages,
purge_versions=purge_versions,
logger=logger,
)
# TODO: implement some kind of error raising in case of failure
#if not result:
# raise TaskFailedException
return result
return True
@task
def scan_upstream_list_task(query, purge_versions=False):
"""
Runs a parallel upstream scan for packages in the query list (space
separated string). Task used only from the web interface.
"""
kwargs = {"purge_versions": purge_versions}
return _run_in_chunks(_scan_upstream_task, [p for p in query.split()],
kwargs)
@task
def scan_upstream_all_task(purge_versions=False):
"""
Runs a parallel portage scan for all packages
"""
kwargs = {"purge_versions": purge_versions}
return _run_in_chunks(
_scan_upstream_task,
Package.objects.all().order_by('?'),
kwargs
)
@task
def update_portage_trees_task():
def update_portage_trees():
"""
Update portage tree
"""
logger = update_portage_trees_task.get_logger()
update_portage_trees(logger=logger)
logger = update_portage_trees.get_logger()
misc.update_portage_trees(logger=logger)
return True
@task
def update_task(update_portage_trees=True, scan_portage=True,
scan_metadata=True, scan_upstream=True, update_counters=True):
"""
Update the whole euscan system
"""
if update_portage_trees:
update_portage_trees_task()
if scan_portage:
scan_portage_all_task(prefetch=True, purge_packages=True,
purge_versions=True)
def update_portage(packages=None):
(
update_portage_trees.s() |
scan_portage.si(purge_packages=True, purge_versions=True, prefetch=True) |
#scan_metadata.si() |
group_one(scan_metadata, portage.settings.categories) |
update_counters.si(fast=False)
)()
return True
# metadata and upstream scan can run concurrently, launch them
# in a group and wait for them to finish
tasks = []
if scan_metadata:
tasks.append(scan_metadata_all_task.subtask())
@task
def update_upstream():
if settings.TASKS_UPSTREAM_GROUPS >= 1:
packages = Package.objects.all()
if scan_upstream:
tasks.append(scan_upstream_all_task.subtask())
if update_counters:
chord(tasks)(
# immutable means that the result of previous tasks is not passed
update_counters_task.subtask((), {"fast": False}, immutable=True)
)
scan_upstream_sub = group_chunks(scan_upstream, packages,
settings.TASKS_UPSTREAM_GROUPS,
purge_versions=True)
else:
group(tasks)()
scan_upstream_sub = scan_upstream.si(purge_versions=True)
(
scan_upstream_sub |
update_counters.si(fast=False)
)()
return True
@task
def scan_package_task(package):
_scan_portage_task([package], purge_packages=True, purge_versions=True)
_scan_metadata_task([package])
_scan_upstream_task([package])
def scan_package(package):
scan_portage([package], purge_packages=True, purge_versions=True)
scan_metadata([package])
scan_upstream([package])
return True
# Periodic tasks
@task(rate_limit="1/m")
def scan_package_user(package):
scan_package(package)
return True
@task
def consume_refresh_package_request():
@ -257,25 +183,39 @@ def consume_refresh_package_request():
Satisfies user requests for package refreshing, runs every minute
"""
try:
obj = RefreshPackageQuery.objects.all().order_by('-priority')[0]
query = RefreshPackageQuery.objects.all().order_by('-priority')[0]
except IndexError:
return {}
else:
result = scan_package_task(obj.package)
obj.delete()
return result
return
pkg = query.package
query.delete()
scan_package_user.delay(pkg)
admin_tasks = [
regen_rrds_task,
update_counters_task,
scan_metadata_list_task,
scan_metadata_all_task,
scan_portage_all_task,
scan_portage_list_task,
scan_upstream_all_task,
scan_upstream_list_task,
update_portage_trees_task,
update_task,
scan_package_task,
regen_rrds,
update_counters,
scan_metadata,
scan_portage,
scan_upstream,
update_portage_trees,
update_portage,
update_upstream,
scan_package,
]
""" Chunk helpers (chunks can't use keyword arguments) """
@task
def scan_metadata_category(category):
"""
Helper for calling scan_metadata with a category
"""
scan_metadata(category=category)
return True
@task
def scan_upstream_purge(*packages):
"""
Helper for calling scan_upstream with purge_versions=True
"""
scan_upstream(packages, purge_versions=True)
return True

View File

@ -1,5 +1,5 @@
{% load url from future %}
{% load euscan_tags %}
{% load djeuscan_helpers %}
<table id="table" class="display">
<thead>

View File

@ -1,5 +1,5 @@
{% load url from future %}
{% load euscan_tags %}
{% load djeuscan_helpers %}
<table id="table" class="display">
<thead>

View File

@ -1,5 +1,5 @@
{% load url from future %}
{% load euscan_tags %}
{% load djeuscan_helpers %}
<table id="table" class="display">
<thead>

View File

@ -1,4 +1,4 @@
{% load euscan_tags %}
{% load djeuscan_helpers %}
{% load mul %}
{% load sub %}
{% load div %}

View File

@ -1,8 +1,8 @@
{% load euscan_tags %}
{% load djeuscan_helpers %}
{% load sub %}
{% load div %}
{% load mul %}
{% load euscan_tags %}
{% load djeuscan_helpers %}
{% load url from future %}

View File

@ -1,6 +1,6 @@
{% extends "euscan/_datatable.html" %}
{% load url from future %}
{% load euscan_tags %}
{% load djeuscan_helpers %}
{% block title %}
{{ block.super }} - Watched categories

View File

@ -1,6 +1,6 @@
{% extends "euscan/_datatable.html" %}
{% load url from future %}
{% load euscan_tags %}
{% load djeuscan_helpers %}
{% block title %}
{{ block.super }} - Watched herds

View File

@ -1,6 +1,6 @@
{% extends "euscan/_datatable.html" %}
{% load url from future %}
{% load euscan_tags %}
{% load djeuscan_helpers %}
{% block title %}
{{ block.super }} - Watched maintainers

View File

@ -1,6 +1,6 @@
{% extends "euscan/_datatable.html" %}
{% load euscan_tags %}
{% load djeuscan_helpers %}
{% load url from future %}
{% block title %}

View File

@ -1,7 +1,7 @@
{% extends "euscan/_datatable.html" %}
{% load url from future %}
{% load euscan_tags %}
{% load djeuscan_helpers %}
{% block title %}
{{ block.super }} - Watched packages

View File

@ -1,6 +1,6 @@
{% extends "euscan/_datatable.html" %}
{% load euscan_tags %}
{% load djeuscan_helpers %}
{% load url from future %}
{% block title %}

View File

@ -1,6 +1,6 @@
{% extends "euscan/_datatable.html" %}
{% load euscan_tags %}
{% load djeuscan_helpers %}
{% load url from future %}
{% block title %}

View File

@ -1,6 +1,6 @@
{% extends "euscan/_datatable.html" %}
{% load euscan_tags %}
{% load djeuscan_helpers %}
{% load url from future %}
{% block title %}

View File

@ -1,6 +1,6 @@
{% extends "euscan/_datatable.html" %}
{% load euscan_tags %}
{% load djeuscan_helpers %}
{% load url from future %}
{% block title %}

View File

@ -1,6 +1,6 @@
{% extends "euscan/_datatable.html" %}
{% load euscan_tags %}
{% load djeuscan_helpers %}
{% load url from future %}
{% block title %}

View File

@ -1,6 +1,6 @@
{% extends "euscan/_datatable.html" %}
{% load euscan_tags %}
{% load djeuscan_helpers %}
{% load url from future %}
{% block title %}

View File

@ -1,7 +1,7 @@
{% extends "euscan/_datatable.html" %}
{% load url from future %}
{% load euscan_tags %}
{% load djeuscan_helpers %}
{% block title %}
{{ block.super }} - Overlay: {{ overlay }}

View File

@ -1,6 +1,6 @@
{% extends "euscan/_datatable.html" %}
{% load euscan_tags %}
{% load djeuscan_helpers %}
{% load url from future %}
{% block title %}

View File

@ -1,7 +1,7 @@
{% extends "euscan/_datatable.html" %}
{% load sub %}
{% load euscan_tags %}
{% load djeuscan_helpers %}
{% load url from future %}
{% block meta %}

View File

@ -1,6 +1,6 @@
{% extends "euscan/_datatable.html" %}
{% load euscan_tags %}
{% load djeuscan_helpers %}
{% block title %}
{{ block.super }} - World Scan

View File

@ -1,11 +1,10 @@
from django import template
from django.conf import settings
from euscan import helpers
from euscan.version import is_version_type_stable, get_version_type
register = template.Library()
@register.inclusion_tag('euscan/_packages.html', takes_context=True)
def packages(context, pkgs):
context['packages'] = pkgs
@ -61,9 +60,9 @@ def overlays_table(overlays):
@register.filter
def is_stable(version_type):
return helpers.is_version_type_stable(version_type)
return is_version_type_stable(version_type)
@register.filter
def version_type(version):
return helpers.get_version_type(version)
return get_version_type(version)

View File

@ -0,0 +1,22 @@
def queryset_iterator(queryset, chunksize=1000):
'''''
Iterate over a Django Queryset ordered by the primary key
This method loads a maximum of chunksize (default: 1000) rows in it's
memory at the same time while django normally would load all rows in it's
memory. Using the iterator() method only causes it to not preload all the
classes.
Note that the implementation of the iterator does not support ordered query sets.
'''
import gc
pk = 0
last_pk = queryset.order_by('-pk')[0].pk
queryset = queryset.order_by('pk')
while pk < last_pk:
for row in queryset.filter(pk__gt=pk)[:chunksize]:
pk = row.pk
yield row
gc.collect()

View File

@ -159,7 +159,6 @@ TEMPLATE_CONTEXT_PROCESSORS = (
INSTALLED_APPS = (
'euscanwww',
'djeuscan',
'django.contrib.auth',
'django.contrib.contenttypes',
'django.contrib.sessions',
@ -237,10 +236,10 @@ CELERY_RESULT_BACKEND = "amqp"
BROKER_CONNECTION_TIMEOUT = 3600
CELERYD_CONCURRENCY = 4
TASKS_CONCURRENTLY = 8
TASKS_SUBTASK_PACKAGES = 32
TASKS_UPSTREAM_GROUPS = 32
CELERYBEAT_SCHEDULER = "djcelery.schedulers.DatabaseScheduler"
CELERYBEAT_SCHEDULE = {}
# LDAP authentication
# TODO: Test data - change me!

View File

@ -24,6 +24,7 @@ CONFIG = {
'mirror': False,
'ignore-pre-release': False,
'ignore-pre-release-if-stable': False,
'ebuild-uri': False,
}
BLACKLIST_VERSIONS = [

View File

@ -33,27 +33,6 @@ VERSION_CMP_PACKAGE_QUIRKS = {
_v_end = '((-|_)(pre|p|beta|b|alpha|a|rc|r)\d*)'
_v = r'((\d+)((\.\d+)*)([a-zA-Z]*?)(' + _v_end + '*))'
def is_version_stable(version):
return is_version_type_stable(get_version_type(version))
def is_version_type_stable(version_type):
return version_type not in ("alpha", "beta", "pre", "rc")
def get_version_type(version):
types = []
gentoo_types = ("alpha", "beta", "pre", "rc", "p")
for token in re.findall("[\._-]([a-zA-Z]+)", version):
if token in gentoo_types:
types.append(token)
if types:
return types[0] # TODO: consider returning all types
return "release"
# Stolen from g-pypi
def gentoo_mangle_version(up_pv):
"""Convert PV to MY_PV if needed
@ -416,7 +395,7 @@ def urlallowed(url):
setdefaulttimeout(timeout)
return rp.can_fetch(CONFIG['user-agent'], url) if rp else False
return rp.can_fetch(CONFIG['user-agent'], url) if rp else True
def urlopen(url, timeout=None, verb="GET"):

View File

@ -5,10 +5,9 @@ import signal
import time
import re
from gentoolkit import pprinter as pp
import portage
from portage.output import EOutput, TermProgressBar
from gentoolkit import pprinter as pp
class ProgressHandler(object):
def __init__(self, progress_bar):
@ -66,6 +65,31 @@ def clean_colors(string):
return string
def transform_url(config, cpv, url):
if config['mirror']:
url = to_mirror(url)
if config['ebuild-uri']:
url = to_ebuild_uri(cpv, url)
return url
def to_ebuild_uri(cpv, url):
cat, pkg, ver, rev = portage.catpkgsplit(cpv)
p = '%s-%s' % (pkg, ver)
pvr = '%s%s' % (ver, '-%s' % rev if rev != 'r0' else '')
pf = '%s-%s' % (pkg, pvr)
evars = (
(p , 'P'),
(pkg, 'PN'),
(ver, 'PV'),
(rev, 'PR'),
(pvr, 'PVR'),
(pf , 'PF'),
(cat, 'CATEGORY')
)
for src, dst in evars:
url = url.replace(src, '${%s}' % dst)
return url
def to_mirror(url):
mirrors = portage.settings.thirdpartymirrors()
for mirror_name in mirrors:
@ -146,15 +170,17 @@ class EuscanOutput(object):
raise TypeError("Invalid output format")
def result(self, cp, version, urls, handler, confidence):
from euscan.helpers import get_version_type
from euscan.version import get_version_type
if self.config['format']:
cpv = '%s-%s' % (cp, version)
urls = ' '.join(transform_url(self.config, cpv, url) for url in urls.split())
if self.config['format'] in ['json']:
_curr = self.queries[self.current_query]
_curr["result"].append(
{
"version": version,
"urls": [to_mirror(url) if self.config['mirror'] else url
for url in urls.split()],
"urls": urls.split(),
"handler": handler,
"confidence": confidence,
"type": get_version_type(version)

View File

@ -13,7 +13,8 @@ from gentoolkit.package import Package
from euscan import CONFIG, BLACKLIST_PACKAGES
from euscan import handlers, output
from euscan.helpers import version_blacklisted, is_version_stable
from euscan.helpers import version_blacklisted
from euscan.version import is_version_stable
from euscan.ebuild import package_from_ebuild

View File

@ -1,5 +1,22 @@
import re
def is_version_type_stable(version_type):
return version_type not in ("alpha", "beta", "pre", "rc")
def is_version_stable(version):
return is_version_type_stable(get_version_type(version))
def get_version_type(version):
types = []
gentoo_types = ("alpha", "beta", "pre", "rc", "p")
for token in re.findall("[\._-]([a-zA-Z]+)", version):
if token in gentoo_types:
types.append(token)
if types:
return types[0] # TODO: consider returning all types
return "release"
# Stolen from pkg_resources, but importing it is not a good idea
component_re = re.compile(r'(\d+ | [a-z]+ | \.| -)', re.VERBOSE)