From 28b913295a9d6489c230e9cafdd5944b7ae4a1ad Mon Sep 17 00:00:00 2001 From: Corentin Chary Date: Mon, 16 Jul 2012 07:12:09 +0200 Subject: [PATCH 1/6] euscan: ignore robots.txt for fedorahosted.org Signed-off-by: Corentin Chary --- pym/euscan/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pym/euscan/__init__.py b/pym/euscan/__init__.py index 8efdce4..dad6742 100644 --- a/pym/euscan/__init__.py +++ b/pym/euscan/__init__.py @@ -70,6 +70,7 @@ ROBOTS_TXT_BLACKLIST_DOMAINS = [ '(.*)chromium.org(.*)', '(.*)nodejs.org(.*)', '(.*)download.mono-project.com(.*)', + '(.*)fedorahosted.org(.*)', ] from out import EuscanOutput From f670645747782bc44e73660eab4df33b40e4ce4e Mon Sep 17 00:00:00 2001 From: Corentin Chary Date: Wed, 25 Jul 2012 17:08:19 +0200 Subject: [PATCH 2/6] Revert "djeuscan: helpers.py is actually unused" This reverts commit 8146dc1f44946c4d894652b92f0848e97f28b7d7. --- euscanwww/djeuscan/helpers.py | 108 ++++++++++++++++++++++++++++++++++ 1 file changed, 108 insertions(+) create mode 100644 euscanwww/djeuscan/helpers.py diff --git a/euscanwww/djeuscan/helpers.py b/euscanwww/djeuscan/helpers.py new file mode 100644 index 0000000..f7a6b82 --- /dev/null +++ b/euscanwww/djeuscan/helpers.py @@ -0,0 +1,108 @@ +""" +djeuscan.helpers +""" + +from distutils.version import StrictVersion, LooseVersion + + +def xint(i): + """ + Tries to cast to int, fallbacks to 0 + """ + try: + return int(i) + except Exception: + return 0 + + +def select_related_last_versions(queryset): + queryset = queryset.select_related( + 'last_version_gentoo', + 'last_version_overlay', + 'last_version_upstream' + ) + + +def version_key(version): + version = version.version + try: + return StrictVersion(version) + # in case of abnormal version number, fall back to LooseVersion + except ValueError: + return LooseVersion(version) + + +def packages_from_names(data): + """ + Returns a list of Package objects from a string of names + """ + + from djeuscan.models import Package + + packages = [] + data = data.replace("\r", "") + + for pkg in data.split('\n'): + if '/' in pkg: + cat, pkg = pkg.split('/') + packages.extend(Package.objects.filter(category=cat, name=pkg)) + else: + packages.extend(Package.objects.filter(name=pkg)) + return packages + + +def rename_fields(vqs, fields): + ret = [] + for n in vqs: + for tr in fields: + if tr[0] in n: + n[tr[1]] = n[tr[0]] + del n[tr[0]] + ret.append(n) + return ret + + +class catch_and_return(object): + def __init__(self, err, response): + self.err = err + self.response = response + + def __call__(self, fn): + def wrapper(*args, **kwargs): + try: + return fn(*args, **kwargs) + except self.err: + return self.response + return wrapper + + +def get_account_categories(user): + from djeuscan.models import Package, CategoryAssociation + + # TODO: This is quite ugly + category_names = [obj.category for obj in + CategoryAssociation.objects.filter(user=user)] + return [c for c in Package.objects.categories() + if c["category"] in category_names] + + +def get_account_herds(user): + from djeuscan.models import Package, HerdAssociation + + ids = [obj.herd.pk for obj in + HerdAssociation.objects.filter(user=user)] + return Package.objects.herds(ids=ids) + + +def get_account_maintainers(user): + from djeuscan.models import Package, MaintainerAssociation + + ids = [obj.maintainer.pk for obj in + MaintainerAssociation.objects.filter(user=user)] + return Package.objects.maintainers(ids=ids) + + +def get_account_packages(user): + from djeuscan.models import PackageAssociation + return [obj.package for obj in + PackageAssociation.objects.filter(user=user)] From 3c30cae6ebda7c66fb84658d6baa8ea52d3559e9 Mon Sep 17 00:00:00 2001 From: Corentin Chary Date: Thu, 26 Jul 2012 21:17:45 +0200 Subject: [PATCH 3/6] djeuscan: use store_true for populqte in scan_metadata Signed-off-by: Corentin Chary --- euscanwww/djeuscan/management/commands/scan_metadata.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/euscanwww/djeuscan/management/commands/scan_metadata.py b/euscanwww/djeuscan/management/commands/scan_metadata.py index 48f2c3c..7fb5b52 100644 --- a/euscanwww/djeuscan/management/commands/scan_metadata.py +++ b/euscanwww/djeuscan/management/commands/scan_metadata.py @@ -25,9 +25,9 @@ class Command(BaseCommand): default=None, help='Scan only this category'), make_option('--populate', - action='store', + action='store_true', dest='populate', - default=None, + default=False, help='Populate herds and maintainers from herds.xml'), ) args = '' From 89408dde2b87671d85fa2b21e2546116438dc736 Mon Sep 17 00:00:00 2001 From: Corentin Chary Date: Thu, 26 Jul 2012 21:18:31 +0200 Subject: [PATCH 4/6] djeuscan: pkg.metadata can fail Signed-off-by: Corentin Chary --- euscanwww/djeuscan/processing/scan/scan_metadata.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/euscanwww/djeuscan/processing/scan/scan_metadata.py b/euscanwww/djeuscan/processing/scan/scan_metadata.py index 54dd9c4..329c226 100644 --- a/euscanwww/djeuscan/processing/scan/scan_metadata.py +++ b/euscanwww/djeuscan/processing/scan/scan_metadata.py @@ -49,7 +49,13 @@ class ScanMetadata(object): if created: self.logger.info('+ [p] %s/%s' % (pkg.category, pkg.name)) - if not pkg.metadata: + try: + if not pkg.metadata: + return + except Exception as e: + self.logger.error( + self.style.ERROR('%s/%s: %s' % (pkg.category, pkg.name, str(e))) + ) return herds = dict( From 8d65eaea1fa5eeb0b4f105cef8b21e8eee1a2960 Mon Sep 17 00:00:00 2001 From: Corentin Chary Date: Thu, 26 Jul 2012 21:18:43 +0200 Subject: [PATCH 5/6] djeuscan: use crontabs and depends on djcelery migrations Signed-off-by: Corentin Chary --- .../0015_initial_celery_periodictasks.py | 34 +++++++++++-------- 1 file changed, 19 insertions(+), 15 deletions(-) diff --git a/euscanwww/djeuscan/migrations/0015_initial_celery_periodictasks.py b/euscanwww/djeuscan/migrations/0015_initial_celery_periodictasks.py index a5b3bed..d86eca9 100644 --- a/euscanwww/djeuscan/migrations/0015_initial_celery_periodictasks.py +++ b/euscanwww/djeuscan/migrations/0015_initial_celery_periodictasks.py @@ -4,34 +4,38 @@ from south.v2 import DataMigration class Migration(DataMigration): + depends_on = ( + ("djcelery", "0001_initial"), + ) + def forwards(self, orm): - every_minute = orm["djcelery.IntervalSchedule"].objects.create( - every=1, period="minutes" + every_day = orm["djcelery.CrontabSchedule"].objects.create( + minute = "00", + hour = "01", + day_of_week = "*", + day_of_month = "*", + month_of_year = "*" ) - every_day = orm["djcelery.IntervalSchedule"].objects.create( - every=1, period="days" - ) - every_week = orm["djcelery.IntervalSchedule"].objects.create( - every=7, period="days" - ) - orm["djcelery.PeriodicTask"].objects.create( - name="Refresh package queries", - task="djeuscan.tasks.consume_refresh_package_request", - interval=every_minute + every_week = orm["djcelery.CrontabSchedule"].objects.create( + minute = "00", + hour = "03", + day_of_week = "1", + day_of_month = "*", + month_of_year = "*" ) orm["djcelery.PeriodicTask"].objects.create( name="Daily portage update", task="djeuscan.tasks.update_portage", - interval=every_day + crontab=every_day ) orm["djcelery.PeriodicTask"].objects.create( name="Weekly upstream update", task="djeuscan.tasks.update_upstream", - interval=every_week + crontab=every_week ) def backwards(self, orm): - orm["djcelery.IntervalSchedule"].objects.all().delete() + orm["djcelery.CrontabSchedule"].objects.all().delete() orm["djcelery.PeriodicTask"].objects.all().delete() From 6dce7707c8a144f149b15740fc81b38514682b63 Mon Sep 17 00:00:00 2001 From: Corentin Chary Date: Thu, 26 Jul 2012 21:31:10 +0200 Subject: [PATCH 6/6] euscan: fix some \n issues Signed-off-by: Corentin Chary --- bin/euscan | 3 +++ pym/euscan/out.py | 6 ++++-- pym/euscan/scan.py | 5 +++-- 3 files changed, 10 insertions(+), 4 deletions(-) diff --git a/bin/euscan b/bin/euscan index 441d479..4e2e10f 100755 --- a/bin/euscan +++ b/bin/euscan @@ -308,6 +308,9 @@ def main(): "for more informations" ) + if not (CONFIG['format'] or CONFIG['quiet']) and len(queries) > 1: + print("") + if CONFIG['progress']: on_progress_gen.next() print("\n", file=sys.stderr) diff --git a/pym/euscan/out.py b/pym/euscan/out.py index 8a505fc..65947ee 100644 --- a/pym/euscan/out.py +++ b/pym/euscan/out.py @@ -37,8 +37,10 @@ class ProgressHandler(object): def progress_bar(): on_progress = None - progress_bar = TermProgressBar(title="euscan") - progress_bar.file = sys.stderr + try: + progress_bar = TermProgressBar(fd=sys.stderr, title="euscan") + except TypeError: + progress_bar = TermProgressBar(title="euscan") progress_handler = ProgressHandler(progress_bar) on_progress = progress_handler.on_progress diff --git a/pym/euscan/scan.py b/pym/euscan/scan.py index 5d9fd06..86574ae 100644 --- a/pym/euscan/scan.py +++ b/pym/euscan/scan.py @@ -169,8 +169,7 @@ def scan_upstream(query, on_progress=None): is_current_version_stable = is_version_stable(ver) if len(result) > 0: if not (CONFIG['format'] or CONFIG['quiet']): - print("\n", file=sys.stderr) - + print("") for cp, url, version, handler, confidence in result: if CONFIG["ignore-pre-release"]: if not is_version_stable(version): @@ -179,6 +178,8 @@ def scan_upstream(query, on_progress=None): if is_current_version_stable and \ not is_version_stable(version): continue + if CONFIG['progress']: + print ("", file=sys.stderr) output.result(cp, version, url, handler, confidence) return result