From ad5e399096651915b940a0374945ed43071cb082 Mon Sep 17 00:00:00 2001 From: Corentin Chary Date: Mon, 2 Jul 2012 08:09:06 +0200 Subject: [PATCH 1/6] euscanwww: move start_dev_services.sh to scripts Signed-off-by: Corentin Chary --- euscanwww/{ => scripts}/start_dev_services.sh | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename euscanwww/{ => scripts}/start_dev_services.sh (100%) diff --git a/euscanwww/start_dev_services.sh b/euscanwww/scripts/start_dev_services.sh similarity index 100% rename from euscanwww/start_dev_services.sh rename to euscanwww/scripts/start_dev_services.sh From 093cb4da478007df2c0f26d815646a44155a1b21 Mon Sep 17 00:00:00 2001 From: Corentin Chary Date: Mon, 2 Jul 2012 09:43:31 +0200 Subject: [PATCH 2/6] euscanwww/scan_portage: fixes and add --category Signed-off-by: Corentin Chary --- .../management/commands/scan_portage.py | 8 +- euscanwww/djeuscan/processing/scan_portage.py | 188 +++++++++--------- 2 files changed, 104 insertions(+), 92 deletions(-) diff --git a/euscanwww/djeuscan/management/commands/scan_portage.py b/euscanwww/djeuscan/management/commands/scan_portage.py index 537c367..0e3bed9 100644 --- a/euscanwww/djeuscan/management/commands/scan_portage.py +++ b/euscanwww/djeuscan/management/commands/scan_portage.py @@ -19,6 +19,11 @@ class Command(BaseCommand): dest='all', default=False, help='Scan all packages'), + make_option('--category', + action='store', + dest='category', + default=None, + help='Scan only this category'), make_option('--purge-packages', action='store_true', dest='purge-packages', @@ -47,7 +52,7 @@ class Command(BaseCommand): def handle(self, *args, **options): set_verbosity_level(logger, options.get("verbosity", 1)) - if options['all']: + if options['all'] or options['category']: packages = None elif len(args): packages = [pkg for pkg in args] @@ -56,6 +61,7 @@ class Command(BaseCommand): scan_portage( packages=packages, + category=options['category'], no_log=options["no-log"], purge_packages=options["purge-packages"], purge_versions=options["purge-versions"], diff --git a/euscanwww/djeuscan/processing/scan_portage.py b/euscanwww/djeuscan/processing/scan_portage.py index e7c8748..b8c9576 100644 --- a/euscanwww/djeuscan/processing/scan_portage.py +++ b/euscanwww/djeuscan/processing/scan_portage.py @@ -56,11 +56,78 @@ class ScanPortage(object): ) self._cache['versions'][key] = version - def scan(self, query=None): + def scan_eix_xml(self, query, category=None): cmd = ['eix', '--xml'] if query: cmd.extend(['--exact', query]) + if category: + cmd.extend(['-C', category]) + sub = subprocess.Popen(cmd, stdout=subprocess.PIPE) + output = sub.stdout + + try: + parser = iterparse(output, ["start", "end"]) + parser.next() # read root tag just for testing output + except ParseError: + if query: + msg = "Unknown package '%s'" % query + else: + msg = "No packages." + self.logger.error(self.style.ERROR(msg)) + return + + package = {'versions' : []} + category = "" + + for event, elem in parser: + if event == "start": # on tag opening + if elem.tag == "category": + category = elem.attrib["name"] + elif elem.tag == "package": + package["package"] = elem.attrib["name"] + package["category"] = category + elif elem.tag in ["description", "homepage"]: + package[elem.tag] = elem.text or "" + elif elem.tag == "version": + # append version data to versions + cpv = "%s/%s-%s" % \ + (package["category"], package["package"], elem.attrib["id"]) + slot = elem.attrib.get("slot", "") + overlay = elem.attrib.get("repository", "gentoo") + package["versions"].append((cpv, slot, overlay)) + + elif event == "end": # on tag closing + if elem.tag == "package": + # clean old data + yield package + package = {"versions" : []} + + if elem.tag == "category": + # clean old data + category = "" + elem.clear() + + def prepare_purge_versions(self, packages, query=None, category=None): + if not self.purge_versions: + return + + # Set all versions dead, then set found versions alive and + # delete old versions + if not query: + # Optimisation for --all or --category + self.logger.info('Killing existing versions...') + qs = Version.objects.filter(packaged=True) + if category: + qs.filter(package__category=category) + qs.update(alive=False) + self.logger.info('done') + else: + for package in packages: + Version.objects.filter(package=package, packaged=True).\ + update(alive=False) + + def scan(self, query=None, category=None): if not query: current_packages = Package.objects.all() elif '/' in query: @@ -68,78 +135,22 @@ class ScanPortage(object): current_packages = Package.objects.filter(category=cat, name=pkg) else: current_packages = Package.objects.filter(name=query) + if category: + current_packages = current_packages.filter(category=category) - if self.purge_versions: - if not query: - self.logger.info('Killing existing versions...') - Version.objects.filter(packaged=True).update(alive=False) - self.logger.info('done') - else: - for package in current_packages: - Version.objects.filter(package=package, packaged=True).\ - update(alive=False) + self.prepare_purge_versions(current_packages, query, category) - sub = subprocess.Popen(cmd, stdout=subprocess.PIPE) + packages_alive = set() - output = sub.stdout + for data in self.scan_eix_xml(query, category): + cat, pkg = data['category'], data['package'] + package = self.store_package(cat, pkg, data['homepage'], data['description']) + packages_alive.add("%s/%s" % (cat, pkg)) + for cpv, slot, overlay in data['versions']: + self.store_version(package, cpv, slot, overlay) - try: - parser = iterparse(output, ["start", "end"]) - parser.next() # read root tag just for testing output - except ParseError: - self.logger.error( - self.style.ERROR( - "Unknown package '%s'" % query - ) - ) - else: - cat, pkg, homepage, desc = ("", "", "", "") - versions = [] - packages_alive = set() - - for event, elem in parser: - if event == "start": # on tag opening - if elem.tag == "category": - cat = elem.attrib["name"] - if elem.tag == "package": - pkg = elem.attrib["name"] - if elem.tag == "description": - desc = elem.text or "" - if elem.tag == "homepage": - homepage = elem.text or "" - if elem.tag == "version": - # append version data to versions - cpv = "%s/%s-%s" % (cat, pkg, elem.attrib["id"]) - slot = elem.attrib.get("slot", "") - overlay = elem.attrib.get("overlay", "") - versions.append((cpv, slot, overlay)) - - elif event == "end": # on tag closing - if elem.tag == "package": - # package tag has been closed, saving everything! - package = self.store_package(cat, pkg, homepage, - desc) - packages_alive.add('%s/%s' % (cat, pkg)) - for cpv, slot, overlay in versions: - self.store_version(package, cpv, slot, overlay) - - # clean old data - pkg, homepage, desc = ("", "", "") - versions = [] - - if elem.tag == "category": - # clean old data - cat = "" - elem.clear() - - if self.purge_packages: - for package in current_packages: - cp = "%s/%s" % (package.category, package.name) - if cp not in packages_alive: - self.logger.info('- [p] %s' % (package)) - package.delete() - if self.purge_versions: - self.purge_old_versions(current_packages) + self.purge_old_packages(current_packages, packages_alive) + self.purge_old_versions() def store_package(self, cat, pkg, homepage, description): created = False @@ -157,14 +168,6 @@ class ScanPortage(object): if created: self.logger.info('+ [p] %s/%s' % (cat, pkg)) - # Set all versions dead, then set found versions alive and - # delete old versions - if not self.purge_versions: - Version.objects.filter( - package=obj, - packaged=True - ).update(alive=False) - return obj def store_version(self, package, cpv, slot, overlay): @@ -223,18 +226,21 @@ class ScanPortage(object): overlay=obj.overlay ) - def purge_old_versions(self, packages): - # For each dead versions - if packages: - versions = [] - for package in packages: - qs = Version.objects.filter(package=package, packaged=True, - alive=False) - for version in qs: - versions.append(version) - else: - versions = Version.objects.filter(packaged=True, alive=False) + def purge_old_packages(self, packages, alive): + if not self.purge_packages: + return + for package in packages: + cp = "%s/%s" % (package.category, package.name) + if cp not in alive: + self.logger.info('- [p] %s' % (package)) + package.delete() + + def purge_old_versions(self): + if not self.purge_versions: + return + + versions = Version.objects.filter(packaged=True, alive=False) for version in versions: if version.overlay == 'gentoo': version.package.n_packaged -= 1 @@ -257,11 +263,11 @@ class ScanPortage(object): overlay=version.overlay ) - Version.objects.filter(packaged=True, alive=False).delete() + versions.delete() @commit_on_success -def scan_portage(packages=None, no_log=False, purge_packages=False, +def scan_portage(packages=None, category=None, no_log=False, purge_packages=False, purge_versions=False, prefetch=False, logger=None): logger = logger or FakeLogger() @@ -287,7 +293,7 @@ def scan_portage(packages=None, no_log=False, purge_packages=False, logger.info('done') if not packages: - scan_handler.scan() + scan_handler.scan(category=category) else: for pkg in packages: if isinstance(pkg, Package): From 0e32ccf46edc0b97d01feaeec7f040f3e96d88ee Mon Sep 17 00:00:00 2001 From: Corentin Chary Date: Mon, 2 Jul 2012 09:44:00 +0200 Subject: [PATCH 3/6] euscanwww: make it work with MySQL unique indexes length must be <= 255 Signed-off-by: Corentin Chary --- ...packagequery__chg_field_package_last_version_gentoo__.py | 6 +++--- euscanwww/djeuscan/models.py | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/euscanwww/djeuscan/migrations/0003_auto__add_refreshpackagequery__chg_field_package_last_version_gentoo__.py b/euscanwww/djeuscan/migrations/0003_auto__add_refreshpackagequery__chg_field_package_last_version_gentoo__.py index 2724c09..9dfa51b 100644 --- a/euscanwww/djeuscan/migrations/0003_auto__add_refreshpackagequery__chg_field_package_last_version_gentoo__.py +++ b/euscanwww/djeuscan/migrations/0003_auto__add_refreshpackagequery__chg_field_package_last_version_gentoo__.py @@ -11,7 +11,7 @@ class Migration(SchemaMigration): # Adding model 'RefreshPackageQuery' db.create_table('djeuscan_refreshpackagequery', ( ('id', self.gf('django.db.models.fields.AutoField')(primary_key=True)), - ('query', self.gf('django.db.models.fields.CharField')(unique=True, max_length=256)), + ('query', self.gf('django.db.models.fields.CharField')(unique=True, max_length=255)), ('priority', self.gf('django.db.models.fields.IntegerField')(default=0)), )) db.send_create_signal('djeuscan', ['RefreshPackageQuery']) @@ -104,7 +104,7 @@ class Migration(SchemaMigration): 'Meta': {'object_name': 'RefreshPackageQuery'}, 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), 'priority': ('django.db.models.fields.IntegerField', [], {'default': '0'}), - 'query': ('django.db.models.fields.CharField', [], {'unique': 'True', 'max_length': '256'}) + 'query': ('django.db.models.fields.CharField', [], {'unique': 'True', 'max_length': '255'}) }, 'djeuscan.version': { 'Meta': {'unique_together': "(['package', 'slot', 'revision', 'version', 'overlay'],)", 'object_name': 'Version'}, @@ -136,4 +136,4 @@ class Migration(SchemaMigration): } } - complete_apps = ['djeuscan'] \ No newline at end of file + complete_apps = ['djeuscan'] diff --git a/euscanwww/djeuscan/models.py b/euscanwww/djeuscan/models.py index e5abd81..39c488b 100644 --- a/euscanwww/djeuscan/models.py +++ b/euscanwww/djeuscan/models.py @@ -269,7 +269,7 @@ class MaintainerLog(Log): class RefreshPackageQuery(models.Model): - query = models.CharField(max_length=256, unique=True) + query = models.CharField(max_length=255, unique=True) priority = models.IntegerField(default=0) def __unicode__(self): From ba805536ad66aa196fbce88f20a1146fb2c0d685 Mon Sep 17 00:00:00 2001 From: Corentin Chary Date: Mon, 2 Jul 2012 11:19:49 +0200 Subject: [PATCH 4/6] euscanwww: add --category to scan_metadata, remove homepage and description Signed-off-by: Corentin Chary --- .../management/commands/scan_metadata.py | 13 ++- .../djeuscan/processing/scan_metadata.py | 108 +++++++++--------- euscanwww/djeuscan/processing/scan_portage.py | 2 +- 3 files changed, 63 insertions(+), 60 deletions(-) diff --git a/euscanwww/djeuscan/management/commands/scan_metadata.py b/euscanwww/djeuscan/management/commands/scan_metadata.py index 5decb70..3d98a36 100644 --- a/euscanwww/djeuscan/management/commands/scan_metadata.py +++ b/euscanwww/djeuscan/management/commands/scan_metadata.py @@ -19,6 +19,11 @@ class Command(BaseCommand): dest='all', default=False, help='Scan all packages'), + make_option('--category', + action='store', + dest='category', + default=None, + help='Scan only this category'), ) args = '' help = 'Scans metadata and fills database' @@ -26,7 +31,7 @@ class Command(BaseCommand): def handle(self, *args, **options): set_verbosity_level(logger, options.get("verbosity", 1)) - if options['all']: + if options['all'] or options['category']: packages = None elif len(args): @@ -34,4 +39,8 @@ class Command(BaseCommand): else: packages = [pkg[:-1] for pkg in sys.stdin.readlines()] - scan_metadata(packages=packages, logger=logger) + scan_metadata( + packages=packages, + category=options['category'], + logger=logger + ) diff --git a/euscanwww/djeuscan/processing/scan_metadata.py b/euscanwww/djeuscan/processing/scan_metadata.py index c834327..263f4c0 100644 --- a/euscanwww/djeuscan/processing/scan_metadata.py +++ b/euscanwww/djeuscan/processing/scan_metadata.py @@ -1,3 +1,5 @@ +import os.path + from gentoolkit.query import Query from gentoolkit.errors import GentoolkitFatalError @@ -8,17 +10,19 @@ from django.core.exceptions import ValidationError from djeuscan.models import Package, Herd, Maintainer from djeuscan.processing import FakeLogger - class ScanMetadata(object): def __init__(self, logger=None): self.style = color_style() self.logger = logger or FakeLogger() - @commit_on_success def scan(self, query=None, obj=None): - matches = Query(query).find( - include_masked=True, - in_installed=False, + matches = Query(query).smart_find( + in_installed=True, + in_porttree=True, + in_overlay=True, + include_masked=True, + show_progress=False, + no_matches_fatal=False, ) if not matches: @@ -39,65 +43,52 @@ class ScanMetadata(object): else: created = False - try: - obj.homepage = pkg.environment("HOMEPAGE") - obj.description = pkg.environment("DESCRIPTION") - except GentoolkitFatalError, err: - self.logger.error( - self.style.ERROR( - "Gentoolkit fatal error: '%s'" % str(err) - ) - ) - if created: self.logger.info('+ [p] %s/%s' % (pkg.category, pkg.name)) - if pkg.metadata: - herds = dict( - [(herd[0], herd) for herd in pkg.metadata.herds(True)] - ) - maintainers = dict( - [(m.email, m) for m in pkg.metadata.maintainers()] - ) + if not pkg.metadata: + return - existing_herds = [h.herd for h in obj.herds.all()] - new_herds = set(herds.keys()).difference(existing_herds) - old_herds = set(existing_herds).difference(herds.keys()) + herds = dict( + [(herd[0], herd) for herd in pkg.metadata.herds(True)] + ) + maintainers = dict( + [(m.email, m) for m in pkg.metadata.maintainers()] + ) - existing_maintainers = [m.email for m in obj.maintainers.all()] - new_maintainers = set( - maintainers.keys()).difference(existing_maintainers - ) - old_maintainers = set( - existing_maintainers).difference(maintainers.keys() - ) + existing_herds = [h.herd for h in obj.herds.all()] + new_herds = set(herds.keys()).difference(existing_herds) + old_herds = set(existing_herds).difference(herds.keys()) - for herd in obj.herds.all(): - if herd.herd in old_herds: - obj.herds.remove(herd) + existing_maintainers = [m.email for m in obj.maintainers.all()] + new_maintainers = set(maintainers.keys()).difference(existing_maintainers) + old_maintainers = set(existing_maintainers).difference(maintainers.keys()) - for herd in new_herds: - herd = self.store_herd(*herds[herd]) - obj.herds.add(herd) + for herd in obj.herds.all(): + if herd.herd in old_herds: + obj.herds.remove(herd) - for maintainer in obj.maintainers.all(): - if maintainer.email in old_maintainers: - obj.maintainers.remove(maintainer) + for herd in new_herds: + herd = self.store_herd(*herds[herd]) + obj.herds.add(herd) - for maintainer in new_maintainers: - maintainer = maintainers[maintainer] - try: - maintainer = self.store_maintainer( - maintainer.name, maintainer.email + for maintainer in obj.maintainers.all(): + if maintainer.email in old_maintainers: + obj.maintainers.remove(maintainer) + + for maintainer in new_maintainers: + maintainer = maintainers[maintainer] + try: + maintainer = self.store_maintainer( + maintainer.name, maintainer.email ) - obj.maintainers.add(maintainer) - except ValidationError: - self.logger.error( - self.style.ERROR("Bad maintainer: '%s' '%s'" % \ + obj.maintainers.add(maintainer) + except ValidationError: + self.logger.error( + self.style.ERROR("Bad maintainer: '%s' '%s'" % \ (maintainer.name, maintainer.email)) ) obj.save() - return True def store_herd(self, name, email): if not name: @@ -134,15 +125,18 @@ class ScanMetadata(object): ) return maintainer - -def scan_metadata(packages=None, logger=None): +@commit_on_success +def scan_metadata(packages=None, category=None, logger=None): scan_handler = ScanMetadata(logger=logger) - if not packages: + + if category: + packages = Package.objects.filter(category=category) + elif not packages: packages = Package.objects.all() for pkg in packages: if isinstance(pkg, Package): - result = scan_handler.scan('%s/%s' % (pkg.category, pkg.name), pkg) + scan_handler.scan('%s/%s' % (pkg.category, pkg.name), pkg) else: - result = scan_handler.scan(pkg) - return result + scan_handler.scan(pkg) + diff --git a/euscanwww/djeuscan/processing/scan_portage.py b/euscanwww/djeuscan/processing/scan_portage.py index b8c9576..cbda779 100644 --- a/euscanwww/djeuscan/processing/scan_portage.py +++ b/euscanwww/djeuscan/processing/scan_portage.py @@ -302,4 +302,4 @@ def scan_portage(packages=None, category=None, no_log=False, purge_packages=Fals scan_handler.scan(pkg) logger.info('Done.') - return True + From 84a764dad5a1a16ee48f2eb61e532ba311ffb4cf Mon Sep 17 00:00:00 2001 From: Corentin Chary Date: Mon, 2 Jul 2012 11:20:18 +0200 Subject: [PATCH 5/6] euscan: allow --progress with --quiet Signed-off-by: Corentin Chary --- bin/euscan | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bin/euscan b/bin/euscan index 963193f..7e6b593 100755 --- a/bin/euscan +++ b/bin/euscan @@ -188,7 +188,7 @@ def parse_args(): getopt_options['short']['global'] = "hVCqv1bf:p" getopt_options['long']['global'] = [ "help", "version", "nocolor", "quiet", "verbose", "oneshot", - "brute-force=", "format=" + "brute-force=", "format=", "progress" ] short_opts = getopt_options['short']['global'] @@ -241,7 +241,7 @@ def main(): if CONFIG['verbose'] > 2: HTTPConnection.debuglevel = 1 - if not CONFIG["format"]: + if not CONFIG["format"] and not CONFIG['quiet']: CONFIG["progress"] = False on_progress = None From 90702ddee82dd9f5e5582a37012d57d422a59bb5 Mon Sep 17 00:00:00 2001 From: Corentin Chary Date: Mon, 2 Jul 2012 11:20:52 +0200 Subject: [PATCH 6/6] euscan: don't import pkg_resources, we don't want all these warnings Signed-off-by: Corentin Chary --- pym/euscan/helpers.py | 15 +++++------ pym/euscan/version.py | 60 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 66 insertions(+), 9 deletions(-) create mode 100644 pym/euscan/version.py diff --git a/pym/euscan/helpers.py b/pym/euscan/helpers.py index 5629b7b..e50267a 100644 --- a/pym/euscan/helpers.py +++ b/pym/euscan/helpers.py @@ -1,8 +1,6 @@ import os import re -import pkg_resources import errno - import urllib2 import portage @@ -15,10 +13,9 @@ except ImportError: import robotparser import urlparse - -from euscan import CONFIG, BLACKLIST_VERSIONS, ROBOTS_TXT_BLACKLIST_DOMAINS import euscan - +from euscan import CONFIG, BLACKLIST_VERSIONS, ROBOTS_TXT_BLACKLIST_DOMAINS +from euscan.version import parse_version def htop_vercmp(a, b): def fixver(v): @@ -182,8 +179,8 @@ def simple_vercmp(a, b): return r # Fallback - a = pkg_resources.parse_version(a) - b = pkg_resources.parse_version(b) + a = pkg_parse_version(a) + b = pkg_parse_version(b) if a < b: return -1 @@ -198,8 +195,8 @@ def vercmp(package, a, b): def version_is_nightly(a, b): - a = pkg_resources.parse_version(a) - b = pkg_resources.parse_version(b) + a = parse_version(a) + b = parse_version(b) ''' Try to skip nightly builds when not wanted (www-apps/moodle) ''' if len(a) != len(b) and len(b) == 2 and len(b[0]) == len('yyyymmdd'): diff --git a/pym/euscan/version.py b/pym/euscan/version.py new file mode 100644 index 0000000..d836c7e --- /dev/null +++ b/pym/euscan/version.py @@ -0,0 +1,60 @@ +import re + +# Stolen from pkg_resources, but importing it is not a good idea + +component_re = re.compile(r'(\d+ | [a-z]+ | \.| -)', re.VERBOSE) +replace = {'pre':'c', 'preview':'c','-':'final-','rc':'c','dev':'@'}.get + +def _parse_version_parts(s): + for part in component_re.split(s): + part = replace(part,part) + if not part or part=='.': + continue + if part[:1] in '0123456789': + yield part.zfill(8) # pad for numeric comparison + else: + yield '*'+part + + yield '*final' # ensure that alpha/beta/candidate are before final + +def parse_version(s): + """Convert a version string to a chronologically-sortable key + + This is a rough cross between distutils' StrictVersion and LooseVersion; + if you give it versions that would work with StrictVersion, then it behaves + the same; otherwise it acts like a slightly-smarter LooseVersion. It is + *possible* to create pathological version coding schemes that will fool + this parser, but they should be very rare in practice. + + The returned value will be a tuple of strings. Numeric portions of the + version are padded to 8 digits so they will compare numerically, but + without relying on how numbers compare relative to strings. Dots are + dropped, but dashes are retained. Trailing zeros between alpha segments + or dashes are suppressed, so that e.g. "2.4.0" is considered the same as + "2.4". Alphanumeric parts are lower-cased. + + The algorithm assumes that strings like "-" and any alpha string that + alphabetically follows "final" represents a "patch level". So, "2.4-1" + is assumed to be a branch or patch of "2.4", and therefore "2.4.1" is + considered newer than "2.4-1", which in turn is newer than "2.4". + + Strings like "a", "b", "c", "alpha", "beta", "candidate" and so on (that + come before "final" alphabetically) are assumed to be pre-release versions, + so that the version "2.4" is considered newer than "2.4a1". + + Finally, to handle miscellaneous cases, the strings "pre", "preview", and + "rc" are treated as if they were "c", i.e. as though they were release + candidates, and therefore are not as new as a version string that does not + contain them, and "dev" is replaced with an '@' so that it sorts lower than + than any other pre-release tag. + """ + parts = [] + for part in _parse_version_parts(s.lower()): + if part.startswith('*'): + if part<'*final': # remove '-' before a prerelease tag + while parts and parts[-1]=='*final-': parts.pop() + # remove trailing zeros from each series of numeric parts + while parts and parts[-1]=='00000000': + parts.pop() + parts.append(part) + return tuple(parts)