diff --git a/euscanwww/djeuscan/processing/scan_metadata.py b/euscanwww/djeuscan/processing/scan_metadata.py index 02a8e84..c834327 100644 --- a/euscanwww/djeuscan/processing/scan_metadata.py +++ b/euscanwww/djeuscan/processing/scan_metadata.py @@ -137,7 +137,7 @@ class ScanMetadata(object): def scan_metadata(packages=None, logger=None): scan_handler = ScanMetadata(logger=logger) - if packages is None: + if not packages: packages = Package.objects.all() for pkg in packages: diff --git a/euscanwww/djeuscan/processing/scan_portage.py b/euscanwww/djeuscan/processing/scan_portage.py index 14d7a8e..29f5974 100644 --- a/euscanwww/djeuscan/processing/scan_portage.py +++ b/euscanwww/djeuscan/processing/scan_portage.py @@ -15,11 +15,10 @@ from djeuscan.models import Package, Version, VersionLog class ScanPortage(object): def __init__(self, logger=None, no_log=False, purge_packages=False, - purge_versions=False, kill_versions=False): + kill_versions=False): self.logger = logger or FakeLogger() self.no_log = no_log self.purge_packages = purge_packages - self.purge_versions = purge_versions self.kill_versions = kill_versions self.style = color_style() @@ -84,8 +83,13 @@ class ScanPortage(object): return self._overlays - @commit_on_success def scan(self, query=None): + if self.purge_packages: + with commit_on_success(): + for package in Package.objects.all(): + self.logger.info('- [p] %s' % (package)) + package.delete() + cmd = ['eix', '--xml', '--pure-packages', '-x'] if query: cmd.extend(['--exact', query]) @@ -97,7 +101,6 @@ class ScanPortage(object): output = subprocess.Popen(cmd, stdout=subprocess.PIPE).\ communicate()[0] - output = output.strip().strip('\n') if len(output) == 0: if not query: @@ -124,24 +127,26 @@ class ScanPortage(object): cat = category_tag.getAttribute("name") pkg = package_tag.getAttribute("name") homepage_tags = package_tag.getElementsByTagName("homepage") - homepage = homepage_tags[0].firstChild.nodeValue \ - if homepage_tags else "" + try: + homepage = homepage_tags[0].firstChild.nodeValue + except (IndexError, AttributeError): + homepage = "" desc_tags = package_tag.getElementsByTagName("description") - desc = desc_tags[0].firstChild.nodeValue if desc_tags else "" + try: + desc = desc_tags[0].firstChild.nodeValue + except (IndexError, AttributeError): + desc = "" - package = self.store_package(cat, pkg, homepage, desc) + with commit_on_success(): + package = self.store_package(cat, pkg, homepage, desc) - for version_tag in package_tag.getElementsByTagName("version"): - cpv = "%s/%s-%s" % (cat, pkg, - version_tag.getAttribute("id")) - slot = version_tag.getAttribute("slot") - overlay = version_tag.getAttribute("overlay") - self.store_version(package, cpv, slot, overlay) - - if self.purge_packages and not query: - for package in Package.objects.all(): - self.logger.info('- [p] %s' % (package)) - package.delete() + for version_tag in package_tag.\ + getElementsByTagName("version"): + cpv = "%s/%s-%s" % (cat, pkg, + version_tag.getAttribute("id")) + slot = version_tag.getAttribute("slot") + overlay = version_tag.getAttribute("overlay") + self.store_version(package, cpv, slot, overlay) def store_package(self, cat, pkg, homepage, description): created = False @@ -232,7 +237,7 @@ class ScanPortage(object): @commit_on_success -def purge_versions(logger=None, nolog=False): +def do_purge_versions(logger=None, no_log=False): logger = logger or FakeLogger() # For each dead versions @@ -246,7 +251,7 @@ def purge_versions(logger=None, nolog=False): logger.info('- [v] %s' % (version)) - if nolog: + if no_log: continue VersionLog.objects.create( @@ -274,7 +279,6 @@ def scan_portage(packages=None, no_log=False, purge_packages=False, logger=logger, no_log=no_log, purge_packages=purge_packages, - purge_versions=purge_versions, kill_versions=kill_versions, ) @@ -288,16 +292,16 @@ def scan_portage(packages=None, no_log=False, purge_packages=False, scan_handler.cache_store_version(version) logger.info('done') - if packages is None: + if not packages: scan_handler.scan() else: for pkg in packages: if isinstance(pkg, Package): - scan_handler.scan('%s/%s' % (pkg.category, pkg.name), pkg) + scan_handler.scan('%s/%s' % (pkg.category, pkg.name)) else: scan_handler.scan(pkg) if purge_versions: - purge_versions(logger=logger, no_log=no_log) + do_purge_versions(logger=logger, no_log=no_log) logger.info('Done.') return True diff --git a/euscanwww/djeuscan/processing/scan_upstream.py b/euscanwww/djeuscan/processing/scan_upstream.py index 827ad91..981af59 100644 --- a/euscanwww/djeuscan/processing/scan_upstream.py +++ b/euscanwww/djeuscan/processing/scan_upstream.py @@ -16,6 +16,7 @@ class ScanUpstream(object): def scan(self, package): CONFIG["format"] = "dict" + output.clean() output.set_query(package) euscan_scan_upstream(package) @@ -34,7 +35,14 @@ class ScanUpstream(object): obj = self.store_package(cpv) for res in out[package]["result"]: - self.store_version(obj, res["version"], " ".join(res["urls"])) + self.store_version( + obj, + res["version"], + " ".join(res["urls"]), + res["type"], + res["handler"], + res["confidence"], + ) self.store_result(obj, out_json, scan_time, ebuild) @@ -66,10 +74,17 @@ class ScanUpstream(object): return obj - def store_version(self, package, ver, url): + def store_version(self, package, ver, url, version_type, handler, + confidence): obj, created = Version.objects.get_or_create( - package=package, slot='', revision='r0', version=ver, overlay='', - defaults={"alive": True, "urls": url, "packaged": False} + package=package, + slot='', + revision='r0', + version=ver, + overlay='', + defaults={"alive": True, "urls": url, "packaged": False, + "version_type": version_type, "handler": handler, + "confidence": confidence} ) if not created: obj.alive = True @@ -97,7 +112,7 @@ class ScanUpstream(object): @commit_on_success -def purge_versions(logger=None): +def do_purge_versions(logger=None): logger = logger or FakeLogger() # For each dead versions @@ -126,17 +141,21 @@ def scan_upstream(packages=None, purge_versions=False, logger.info('Scanning upstream...') - if packages is None: + if not packages: packages = Package.objects.all() + result = True + for pkg in packages: if isinstance(pkg, Package): - result = scan_handler.scan('%s/%s' % (pkg.category, pkg.name)) + curr = scan_handler.scan('%s/%s' % (pkg.category, pkg.name)) else: - result = scan_handler.scan(pkg) + curr = scan_handler.scan(pkg) + if not curr: + result = False if purge_versions: - purge_versions(logger=logger) + do_purge_versions(logger=logger) logger.info('Done.') return result diff --git a/euscanwww/djeuscan/tasks.py b/euscanwww/djeuscan/tasks.py index 4f6842e..3fcbcae 100644 --- a/euscanwww/djeuscan/tasks.py +++ b/euscanwww/djeuscan/tasks.py @@ -58,7 +58,7 @@ def _run_in_chunks(task, packages, kwargs=None, job = TaskSet(tasks=tasks) result = job.apply_async() # TODO: understand why this causes timeout - output.extend(list(result.join(timeout=3600))) + output.extend(list(result.join())) return output @@ -102,7 +102,7 @@ def scan_metadata_list_task(query): Runs a parallel metadata scan for packages in the query list (space separated string). Task used only from the web interface. """ - _run_in_chunks(_scan_metadata_task, [p for p in query.split()]) + return _run_in_chunks(_scan_metadata_task, [p for p in query.split()]) @task @@ -110,7 +110,7 @@ def scan_metadata_all_task(): """ Runs a parallel metadata scan for all packages """ - _run_in_chunks(_scan_metadata_task, Package.objects.all()) + return _run_in_chunks(_scan_metadata_task, Package.objects.all()) @task @@ -120,8 +120,11 @@ def _scan_portage_task(packages, no_log=False, purge_packages=False, Scans portage for the given set of packages """ logger = _scan_portage_task.get_logger() - logger.info("Starting portage scanning subtask for %d packages...", - len(packages)) + if packages: + logger.info("Starting portage scanning subtask for %d packages...", + len(packages)) + else: + logger.info("Starting portage scanning for all packages...") result = scan_portage( packages=packages, @@ -145,18 +148,23 @@ def scan_portage_list_task(query, no_log=False, purge_packages=False, """ kwargs = {"no_log": no_log, "purge_packages": purge_packages, "purge_versions": purge_versions, "prefetch": prefetch} - _run_in_chunks(_scan_portage_task, [p for p in query.split()], kwargs) + return _run_in_chunks(_scan_portage_task, [p for p in query.split()], + kwargs) @task def scan_portage_all_task(no_log=False, purge_packages=False, purge_versions=False, prefetch=False): """ - Runs a parallel portage scan for all packages + Runs a syncronous portage scan for all packages """ - kwargs = {"no_log": no_log, "purge_packages": purge_packages, - "purge_versions": purge_versions, "prefetch": prefetch} - _run_in_chunks(_scan_metadata_task, Package.objects.all(), kwargs) + return _scan_portage_task( + packages=None, + no_log=no_log, + purge_packages=purge_packages, + purge_versions=purge_versions, + prefetch=prefetch, + ) @task @@ -187,7 +195,8 @@ def scan_upstream_list_task(query, purge_versions=False): """ kwargs = {"purge_versions": purge_versions} - _run_in_chunks(_scan_upstream_task, [p for p in query.split()], kwargs) + return _run_in_chunks(_scan_upstream_task, [p for p in query.split()], + kwargs) @task @@ -196,7 +205,7 @@ def scan_upstream_all_task(purge_versions=False): Runs a parallel portage scan for all packages """ kwargs = {"purge_versions": purge_versions} - _run_in_chunks(_scan_upstream_task, Package.objects.all(), kwargs) + return _run_in_chunks(_scan_upstream_task, Package.objects.all(), kwargs) @task diff --git a/euscanwww/euscanwww/settings.py b/euscanwww/euscanwww/settings.py index 0fe0f89..9a96165 100644 --- a/euscanwww/euscanwww/settings.py +++ b/euscanwww/euscanwww/settings.py @@ -235,7 +235,7 @@ CELERY_RESULT_BACKEND = "amqp" BROKER_CONNECTION_TIMEOUT = 3600 CELERYD_CONCURRENCY = 4 -TASKS_CONCURRENTLY = 8 +TASKS_CONCURRENTLY = 4 TASKS_SUBTASK_PACKAGES = 32 # LDAP authentication diff --git a/pym/euscan/out.py b/pym/euscan/out.py index 1928980..8a260d3 100644 --- a/pym/euscan/out.py +++ b/pym/euscan/out.py @@ -67,12 +67,20 @@ class EOutputMem(EOutput): self.out = StringIO() def getvalue(self): - return re.sub("\033\[[0-9;]+m", "", self.out.getvalue()) + return clean_colors(self.out.getvalue()) def _write(self, f, msg): super(EOutputMem, self)._write(self.out, msg) +def clean_colors(string): + if type(string) is str: + string = re.sub("\033\[[0-9;]+m", "", string) + string = re.sub(r"\\u001b\[[0-9;]+m", "", string) + string = re.sub(r"\x1b\[[0-9;]+m", "", string) + return string + + class EuscanOutput(object): """ Class that handles output for euscan @@ -141,7 +149,8 @@ class EuscanOutput(object): def metadata(self, key, value, show=True): if self.config["format"]: - self.queries[self.current_query]["metadata"][key] = value + self.queries[self.current_query]["metadata"][key] = \ + clean_colors(value) elif show: print "%s: %s" % (key.capitalize(), value) diff --git a/pym/euscan/scan.py b/pym/euscan/scan.py index be49b96..fa573f3 100644 --- a/pym/euscan/scan.py +++ b/pym/euscan/scan.py @@ -45,9 +45,13 @@ def filter_versions(cp, versions): def scan_upstream_urls(cpv, urls, on_progress): versions = [] - progress_available = 70 - num_urls = sum([len(urls[fn]) for fn in urls]) - progress_increment = progress_available / num_urls + if on_progress: + progress_available = 70 + num_urls = sum([len(urls[fn]) for fn in urls]) + if num_urls > 0: + progress_increment = progress_available / num_urls + else: + progress_increment = 0 for filename in urls: for url in urls[filename]: