euscanwww: move processing to scan and misc, enhance update_portage_tree
update_portage_tree() now: - watch stderr and stdout for each command - use layman command instead of layman API for sync because layman API doesn't work when stdout or stderr is not a real file (we could probably work around that with pipes and epoll) - use egencache instead of emerge to generate cache - export PORTAGE_CONFIGROOT, ROOT, EIX_CACHEFILE etc.. so they are used everywhere Signed-off-by: Corentin Chary <corentin.chary@gmail.com>
This commit is contained in:
7
euscanwww/djeuscan/processing/scan/__init__.py
Normal file
7
euscanwww/djeuscan/processing/scan/__init__.py
Normal file
@ -0,0 +1,7 @@
|
||||
__all__ = [
|
||||
"scan_metadata", "scan_portage", "scan_upstream",
|
||||
]
|
||||
|
||||
from scan_metadata import scan_metadata
|
||||
from scan_portage import scan_portage
|
||||
from scan_upstream import scan_upstream
|
142
euscanwww/djeuscan/processing/scan/scan_metadata.py
Normal file
142
euscanwww/djeuscan/processing/scan/scan_metadata.py
Normal file
@ -0,0 +1,142 @@
|
||||
import os.path
|
||||
|
||||
from gentoolkit.query import Query
|
||||
from gentoolkit.errors import GentoolkitFatalError
|
||||
|
||||
from django.db.transaction import commit_on_success
|
||||
from django.core.management.color import color_style
|
||||
from django.core.exceptions import ValidationError
|
||||
|
||||
from djeuscan.models import Package, Herd, Maintainer
|
||||
from djeuscan.processing import FakeLogger
|
||||
|
||||
class ScanMetadata(object):
|
||||
def __init__(self, logger=None):
|
||||
self.style = color_style()
|
||||
self.logger = logger or FakeLogger()
|
||||
|
||||
def scan(self, query=None, obj=None):
|
||||
matches = Query(query).smart_find(
|
||||
in_installed=True,
|
||||
in_porttree=True,
|
||||
in_overlay=True,
|
||||
include_masked=True,
|
||||
show_progress=False,
|
||||
no_matches_fatal=False,
|
||||
)
|
||||
|
||||
if not matches:
|
||||
self.logger.error(
|
||||
self.style.ERROR("Unknown package '%s'" % query)
|
||||
)
|
||||
return
|
||||
|
||||
matches = sorted(matches)
|
||||
pkg = matches.pop()
|
||||
if '9999' in pkg.version and len(matches):
|
||||
pkg = matches.pop()
|
||||
|
||||
if not obj:
|
||||
obj, created = Package.objects.get_or_create(
|
||||
category=pkg.category, name=pkg.name
|
||||
)
|
||||
else:
|
||||
created = False
|
||||
|
||||
if created:
|
||||
self.logger.info('+ [p] %s/%s' % (pkg.category, pkg.name))
|
||||
|
||||
if not pkg.metadata:
|
||||
return
|
||||
|
||||
herds = dict(
|
||||
[(herd[0], herd) for herd in pkg.metadata.herds(True)]
|
||||
)
|
||||
maintainers = dict(
|
||||
[(m.email, m) for m in pkg.metadata.maintainers()]
|
||||
)
|
||||
|
||||
existing_herds = [h.herd for h in obj.herds.all()]
|
||||
new_herds = set(herds.keys()).difference(existing_herds)
|
||||
old_herds = set(existing_herds).difference(herds.keys())
|
||||
|
||||
existing_maintainers = [m.email for m in obj.maintainers.all()]
|
||||
new_maintainers = set(maintainers.keys()).difference(existing_maintainers)
|
||||
old_maintainers = set(existing_maintainers).difference(maintainers.keys())
|
||||
|
||||
for herd in obj.herds.all():
|
||||
if herd.herd in old_herds:
|
||||
obj.herds.remove(herd)
|
||||
|
||||
for herd in new_herds:
|
||||
herd = self.store_herd(*herds[herd])
|
||||
obj.herds.add(herd)
|
||||
|
||||
for maintainer in obj.maintainers.all():
|
||||
if maintainer.email in old_maintainers:
|
||||
obj.maintainers.remove(maintainer)
|
||||
|
||||
for maintainer in new_maintainers:
|
||||
maintainer = maintainers[maintainer]
|
||||
try:
|
||||
maintainer = self.store_maintainer(
|
||||
maintainer.name, maintainer.email
|
||||
)
|
||||
obj.maintainers.add(maintainer)
|
||||
except ValidationError:
|
||||
self.logger.error(
|
||||
self.style.ERROR("Bad maintainer: '%s' '%s'" % \
|
||||
(maintainer.name, maintainer.email))
|
||||
)
|
||||
obj.save()
|
||||
|
||||
def store_herd(self, name, email):
|
||||
if not name:
|
||||
name = '{nil}'
|
||||
name = name.strip("\r").strip("\n").strip("\t").strip()
|
||||
|
||||
herd, created = Herd.objects.get_or_create(
|
||||
herd=name,
|
||||
defaults={"email": email}
|
||||
)
|
||||
|
||||
if created:
|
||||
self.logger.info('+ [h] %s <%s>' % (name, email))
|
||||
|
||||
herd.email = email
|
||||
herd.save()
|
||||
|
||||
return herd
|
||||
|
||||
def store_maintainer(self, name, email):
|
||||
if not name:
|
||||
name = email
|
||||
if not name:
|
||||
name = '{nil}'
|
||||
|
||||
maintainer, created = Maintainer.objects.get_or_create(
|
||||
email=email,
|
||||
defaults={"name": name}
|
||||
)
|
||||
|
||||
if created:
|
||||
self.logger.info(
|
||||
'+ [m] %s <%s>' % (name.encode('utf-8'), email)
|
||||
)
|
||||
return maintainer
|
||||
|
||||
@commit_on_success
|
||||
def scan_metadata(packages=None, category=None, logger=None):
|
||||
scan_handler = ScanMetadata(logger=logger)
|
||||
|
||||
if category:
|
||||
packages = Package.objects.filter(category=category)
|
||||
elif not packages:
|
||||
packages = Package.objects.all()
|
||||
|
||||
for pkg in packages:
|
||||
if isinstance(pkg, Package):
|
||||
scan_handler.scan('%s/%s' % (pkg.category, pkg.name), pkg)
|
||||
else:
|
||||
scan_handler.scan(pkg)
|
||||
|
305
euscanwww/djeuscan/processing/scan/scan_portage.py
Normal file
305
euscanwww/djeuscan/processing/scan/scan_portage.py
Normal file
@ -0,0 +1,305 @@
|
||||
import subprocess
|
||||
import portage
|
||||
|
||||
from xml.etree.ElementTree import iterparse, ParseError
|
||||
|
||||
from django.db.transaction import commit_on_success
|
||||
from django.core.management.color import color_style
|
||||
|
||||
from euscan.helpers import get_version_type
|
||||
|
||||
from djeuscan.processing import FakeLogger
|
||||
from djeuscan.models import Package, Version, VersionLog
|
||||
|
||||
|
||||
class ScanPortage(object):
|
||||
def __init__(self, logger=None, no_log=False, purge_packages=False,
|
||||
purge_versions=False):
|
||||
self.logger = logger or FakeLogger()
|
||||
self.no_log = no_log
|
||||
self.purge_packages = purge_packages
|
||||
self.purge_versions = purge_versions
|
||||
|
||||
self.style = color_style()
|
||||
self._cache = {'packages': {}, 'versions': {}}
|
||||
self._overlays = None
|
||||
|
||||
def cache_hash_package(self, category, name):
|
||||
return '%s/%s' % (category, name)
|
||||
|
||||
def cache_store_package(self, package):
|
||||
key = self.cache_hash_package(package.category, package.name)
|
||||
self._cache['packages'][key] = package
|
||||
|
||||
def cache_get_package(self, category, name):
|
||||
return self._cache['packages'].get(
|
||||
self.cache_hash_package(category, name)
|
||||
)
|
||||
|
||||
def cache_hash_version(self, category, name, version, revision, slot,
|
||||
overlay):
|
||||
key = '%s/%s-%s-r%s %s %s' % (category, name,
|
||||
version, revision,
|
||||
slot, overlay)
|
||||
return key
|
||||
|
||||
def cache_get_version(self, category, name, version, revision, slot,
|
||||
overlay):
|
||||
key = self.cache_hash_version(category, name, version, revision, slot,
|
||||
overlay)
|
||||
return self._cache['versions'].get(key)
|
||||
|
||||
def cache_store_version(self, version):
|
||||
key = self.cache_hash_version(
|
||||
version.package.category, version.package.name, version.version,
|
||||
version.revision, version.slot, version.overlay
|
||||
)
|
||||
self._cache['versions'][key] = version
|
||||
|
||||
def scan_eix_xml(self, query, category=None):
|
||||
cmd = ['eix', '--xml']
|
||||
if query:
|
||||
cmd.extend(['--exact', query])
|
||||
if category:
|
||||
cmd.extend(['-C', category])
|
||||
|
||||
sub = subprocess.Popen(cmd, stdout=subprocess.PIPE)
|
||||
output = sub.stdout
|
||||
|
||||
try:
|
||||
parser = iterparse(output, ["start", "end"])
|
||||
parser.next() # read root tag just for testing output
|
||||
except ParseError:
|
||||
if query:
|
||||
msg = "Unknown package '%s'" % query
|
||||
else:
|
||||
msg = "No packages."
|
||||
self.logger.error(self.style.ERROR(msg))
|
||||
return
|
||||
|
||||
package = {'versions' : []}
|
||||
category = ""
|
||||
|
||||
for event, elem in parser:
|
||||
if event == "start": # on tag opening
|
||||
if elem.tag == "category":
|
||||
category = elem.attrib["name"]
|
||||
elif elem.tag == "package":
|
||||
package["package"] = elem.attrib["name"]
|
||||
package["category"] = category
|
||||
elif elem.tag in ["description", "homepage"]:
|
||||
package[elem.tag] = elem.text or ""
|
||||
elif elem.tag == "version":
|
||||
# append version data to versions
|
||||
cpv = "%s/%s-%s" % \
|
||||
(package["category"], package["package"], elem.attrib["id"])
|
||||
slot = elem.attrib.get("slot", "")
|
||||
overlay = elem.attrib.get("repository", "gentoo")
|
||||
package["versions"].append((cpv, slot, overlay))
|
||||
|
||||
elif event == "end": # on tag closing
|
||||
if elem.tag == "package":
|
||||
# clean old data
|
||||
yield package
|
||||
package = {"versions" : []}
|
||||
|
||||
if elem.tag == "category":
|
||||
# clean old data
|
||||
category = ""
|
||||
elem.clear()
|
||||
|
||||
def prepare_purge_versions(self, packages, query=None, category=None):
|
||||
if not self.purge_versions:
|
||||
return
|
||||
|
||||
# Set all versions dead, then set found versions alive and
|
||||
# delete old versions
|
||||
if not query:
|
||||
# Optimisation for --all or --category
|
||||
self.logger.info('Killing existing versions...')
|
||||
qs = Version.objects.filter(packaged=True)
|
||||
if category:
|
||||
qs.filter(package__category=category)
|
||||
qs.update(alive=False)
|
||||
self.logger.info('done')
|
||||
else:
|
||||
for package in packages:
|
||||
Version.objects.filter(package=package, packaged=True).\
|
||||
update(alive=False)
|
||||
|
||||
def scan(self, query=None, category=None):
|
||||
if not query:
|
||||
current_packages = Package.objects.all()
|
||||
elif '/' in query:
|
||||
cat, pkg = portage.catsplit(query)
|
||||
current_packages = Package.objects.filter(category=cat, name=pkg)
|
||||
else:
|
||||
current_packages = Package.objects.filter(name=query)
|
||||
if category:
|
||||
current_packages = current_packages.filter(category=category)
|
||||
|
||||
self.prepare_purge_versions(current_packages, query, category)
|
||||
|
||||
packages_alive = set()
|
||||
|
||||
for data in self.scan_eix_xml(query, category):
|
||||
cat, pkg = data['category'], data['package']
|
||||
package = self.store_package(cat, pkg, data['homepage'], data['description'])
|
||||
packages_alive.add("%s/%s" % (cat, pkg))
|
||||
for cpv, slot, overlay in data['versions']:
|
||||
self.store_version(package, cpv, slot, overlay)
|
||||
|
||||
self.purge_old_packages(current_packages, packages_alive)
|
||||
self.purge_old_versions()
|
||||
|
||||
def store_package(self, cat, pkg, homepage, description):
|
||||
created = False
|
||||
obj = self.cache_get_package(cat, pkg)
|
||||
|
||||
if not obj:
|
||||
obj, created = Package.objects.get_or_create(
|
||||
category=cat,
|
||||
name=pkg,
|
||||
homepage=homepage,
|
||||
description=description,
|
||||
)
|
||||
self.cache_store_package(obj)
|
||||
|
||||
if created:
|
||||
self.logger.info('+ [p] %s/%s' % (cat, pkg))
|
||||
|
||||
return obj
|
||||
|
||||
def store_version(self, package, cpv, slot, overlay):
|
||||
cat, pkg, ver, rev = portage.catpkgsplit(cpv)
|
||||
if not overlay:
|
||||
overlay = 'gentoo'
|
||||
|
||||
created = False
|
||||
obj = self.cache_get_version(
|
||||
package.category, package.name, ver, rev, slot, overlay
|
||||
)
|
||||
if not obj:
|
||||
obj, created = Version.objects.get_or_create(
|
||||
package=package, slot=slot,
|
||||
revision=rev, version=ver,
|
||||
overlay=overlay,
|
||||
defaults={
|
||||
"alive": True,
|
||||
"packaged": True,
|
||||
"version_type": get_version_type(ver),
|
||||
"confidence": 100,
|
||||
"handler": "portage"
|
||||
}
|
||||
)
|
||||
if not created: # Created objects have defaults values
|
||||
obj.alive = True
|
||||
obj.packaged = True
|
||||
obj.save()
|
||||
|
||||
if created:
|
||||
self.cache_store_version(obj)
|
||||
|
||||
# nothing to do (note: it can't be an upstream version because
|
||||
# overlay can't be empty here)
|
||||
if not created:
|
||||
return
|
||||
|
||||
self.logger.info('+ [v] %s' % (obj))
|
||||
|
||||
if overlay == 'gentoo':
|
||||
package.n_packaged += 1
|
||||
else:
|
||||
package.n_overlay += 1
|
||||
package.n_versions += 1
|
||||
package.save()
|
||||
|
||||
if self.no_log:
|
||||
return
|
||||
|
||||
VersionLog.objects.create(
|
||||
package=obj.package,
|
||||
action=VersionLog.VERSION_ADDED,
|
||||
slot=obj.slot,
|
||||
revision=obj.revision,
|
||||
version=obj.version,
|
||||
overlay=obj.overlay
|
||||
)
|
||||
|
||||
def purge_old_packages(self, packages, alive):
|
||||
if not self.purge_packages:
|
||||
return
|
||||
|
||||
for package in packages:
|
||||
cp = "%s/%s" % (package.category, package.name)
|
||||
if cp not in alive:
|
||||
self.logger.info('- [p] %s' % (package))
|
||||
package.delete()
|
||||
|
||||
def purge_old_versions(self):
|
||||
if not self.purge_versions:
|
||||
return
|
||||
|
||||
versions = Version.objects.filter(packaged=True, alive=False)
|
||||
for version in versions:
|
||||
if version.overlay == 'gentoo':
|
||||
version.package.n_packaged -= 1
|
||||
else:
|
||||
version.package.n_overlay -= 1
|
||||
version.package.n_versions -= 1
|
||||
version.package.save()
|
||||
|
||||
self.logger.info('- [v] %s' % (version))
|
||||
|
||||
if self.no_log:
|
||||
continue
|
||||
|
||||
VersionLog.objects.create(
|
||||
package=version.package,
|
||||
action=VersionLog.VERSION_REMOVED,
|
||||
slot=version.slot,
|
||||
revision=version.revision,
|
||||
version=version.version,
|
||||
overlay=version.overlay
|
||||
)
|
||||
|
||||
versions.delete()
|
||||
|
||||
|
||||
@commit_on_success
|
||||
def scan_portage(packages=None, category=None, no_log=False, purge_packages=False,
|
||||
purge_versions=False, prefetch=False, logger=None):
|
||||
|
||||
logger = logger or FakeLogger()
|
||||
|
||||
if packages is None:
|
||||
prefetch = True
|
||||
|
||||
scan_handler = ScanPortage(
|
||||
logger=logger,
|
||||
no_log=no_log,
|
||||
purge_packages=purge_packages,
|
||||
purge_versions=purge_versions,
|
||||
)
|
||||
|
||||
logger.info('Scanning portage tree...')
|
||||
|
||||
if prefetch:
|
||||
logger.info('Prefetching objects...')
|
||||
for package in Package.objects.all():
|
||||
scan_handler.cache_store_package(package)
|
||||
for version in Version.objects.select_related('package').all():
|
||||
scan_handler.cache_store_version(version)
|
||||
logger.info('done')
|
||||
|
||||
if not packages:
|
||||
scan_handler.scan(category=category)
|
||||
else:
|
||||
for pkg in packages:
|
||||
if isinstance(pkg, Package):
|
||||
scan_handler.scan('%s/%s' % (pkg.category, pkg.name))
|
||||
else:
|
||||
scan_handler.scan(pkg)
|
||||
|
||||
logger.info('Done.')
|
||||
|
157
euscanwww/djeuscan/processing/scan/scan_upstream.py
Normal file
157
euscanwww/djeuscan/processing/scan/scan_upstream.py
Normal file
@ -0,0 +1,157 @@
|
||||
import portage
|
||||
|
||||
from django.utils import timezone
|
||||
from django.db.transaction import commit_on_success
|
||||
|
||||
from euscan import CONFIG, output
|
||||
from euscan.scan import scan_upstream as euscan_scan_upstream
|
||||
|
||||
from djeuscan.processing import FakeLogger
|
||||
from djeuscan.models import Package, Version, EuscanResult, VersionLog
|
||||
|
||||
|
||||
class ScanUpstream(object):
|
||||
def __init__(self, logger=None, purge_versions=False):
|
||||
self.logger = logger or FakeLogger()
|
||||
self.purge_versions = purge_versions
|
||||
|
||||
def scan(self, package):
|
||||
CONFIG["format"] = "dict"
|
||||
output.clean()
|
||||
output.set_query(package)
|
||||
|
||||
euscan_scan_upstream(package)
|
||||
|
||||
out = output.get_formatted_output()
|
||||
out_json = output.get_formatted_output("json")
|
||||
|
||||
try:
|
||||
cpv = out[package]["metadata"]["cpv"]
|
||||
scan_time = out[package]["metadata"]["scan_time"]
|
||||
ebuild = out[package]["metadata"]["ebuild"]
|
||||
except KeyError:
|
||||
return {}
|
||||
|
||||
obj = self.store_package(cpv)
|
||||
|
||||
for res in out[package]["result"]:
|
||||
self.store_version(
|
||||
obj,
|
||||
res["version"],
|
||||
" ".join(res["urls"]),
|
||||
res["type"],
|
||||
res["handler"],
|
||||
res["confidence"],
|
||||
)
|
||||
|
||||
self.store_result(obj, out_json, scan_time, ebuild)
|
||||
|
||||
def store_result(self, package, formatted_log, scan_time, ebuild):
|
||||
# Remove previous logs
|
||||
EuscanResult.objects.filter(package=package).delete()
|
||||
|
||||
obj = EuscanResult()
|
||||
obj.package = package
|
||||
obj.result = formatted_log
|
||||
obj.datetime = timezone.now()
|
||||
obj.scan_time = scan_time
|
||||
obj.ebuild = ebuild
|
||||
obj.save()
|
||||
|
||||
def store_package(self, cpv):
|
||||
cat, pkg, ver, rev = portage.catpkgsplit(cpv)
|
||||
|
||||
obj, created = Package.objects.get_or_create(category=cat, name=pkg)
|
||||
|
||||
if created:
|
||||
self.logger.info('+ [p] %s/%s' % (cat, pkg))
|
||||
|
||||
# Set all versions dead, then set found versions alive and
|
||||
# delete old versions
|
||||
if self.purge_versions:
|
||||
Version.objects.filter(package=obj, packaged=False).update(alive=False)
|
||||
|
||||
return obj
|
||||
|
||||
def store_version(self, package, ver, url, version_type, handler,
|
||||
confidence):
|
||||
obj, created = Version.objects.get_or_create(
|
||||
package=package,
|
||||
slot='',
|
||||
revision='r0',
|
||||
version=ver,
|
||||
overlay='',
|
||||
defaults={"alive": True, "urls": url, "packaged": False,
|
||||
"version_type": version_type, "handler": handler,
|
||||
"confidence": confidence}
|
||||
)
|
||||
if not created:
|
||||
obj.alive = True
|
||||
obj.urls = url
|
||||
obj.packaged = False
|
||||
obj.save()
|
||||
|
||||
# If it's not a new version, just update the object and continue
|
||||
if not created:
|
||||
return
|
||||
|
||||
self.logger.info('+ [u] %s %s' % (obj, url))
|
||||
|
||||
VersionLog.objects.create(
|
||||
package=package,
|
||||
action=VersionLog.VERSION_ADDED,
|
||||
slot='',
|
||||
revision='r0',
|
||||
version=ver,
|
||||
overlay=''
|
||||
)
|
||||
|
||||
package.n_versions += 1
|
||||
package.save()
|
||||
|
||||
|
||||
def purge_old_versions(self):
|
||||
if not self.purge_versions:
|
||||
return
|
||||
|
||||
versions = Version.objects.filter(packaged=False, alive=False)
|
||||
for version in versions:
|
||||
VersionLog.objects.create(
|
||||
package=version.package,
|
||||
action=VersionLog.VERSION_REMOVED,
|
||||
slot=version.slot,
|
||||
revision=version.revision,
|
||||
version=version.version,
|
||||
overlay=version.overlay
|
||||
)
|
||||
|
||||
version.package.n_versions -= 1
|
||||
version.package.save()
|
||||
|
||||
self.logger.info('- [u] %s %s' % (version, version.urls))
|
||||
|
||||
versions.delete()
|
||||
|
||||
@commit_on_success
|
||||
def scan_upstream(packages=None, purge_versions=False,
|
||||
logger=None):
|
||||
logger = logger or FakeLogger()
|
||||
|
||||
scan_handler = ScanUpstream(logger=logger, purge_versions=purge_versions)
|
||||
|
||||
logger.info('Scanning upstream...')
|
||||
|
||||
if not packages:
|
||||
packages = Package.objects.all()
|
||||
|
||||
result = True
|
||||
|
||||
for pkg in packages:
|
||||
if isinstance(pkg, Package):
|
||||
scan_handler.scan('%s/%s' % (pkg.category, pkg.name))
|
||||
else:
|
||||
scan_handler.scan(pkg)
|
||||
|
||||
scan_handler.purge_old_versions()
|
||||
|
||||
logger.info('Done.')
|
Reference in New Issue
Block a user