euscanwww: Using incremental xml parsing for portage scanning
Signed-off-by: volpino <fox91@anche.no>
This commit is contained in:
		| @@ -2,7 +2,7 @@ import subprocess | |||||||
| import portage | import portage | ||||||
| import os | import os | ||||||
| import re | import re | ||||||
| from xml.dom.minidom import parseString | from xml.etree.ElementTree import iterparse, ParseError | ||||||
|  |  | ||||||
| from django.db.transaction import commit_on_success | from django.db.transaction import commit_on_success | ||||||
| from django.core.management.color import color_style | from django.core.management.color import color_style | ||||||
| @@ -99,10 +99,14 @@ class ScanPortage(object): | |||||||
|             Version.objects.filter(packaged=True).update(alive=False) |             Version.objects.filter(packaged=True).update(alive=False) | ||||||
|             self.logger.info('done') |             self.logger.info('done') | ||||||
|  |  | ||||||
|         output = subprocess.Popen(cmd, stdout=subprocess.PIPE).\ |         sub = subprocess.Popen(cmd, stdout=subprocess.PIPE) | ||||||
|             communicate()[0] |  | ||||||
|  |  | ||||||
|         if len(output) == 0: |         output = sub.stdout | ||||||
|  |  | ||||||
|  |         try: | ||||||
|  |             parser = iterparse(output, ["start", "end"]) | ||||||
|  |             parser.next()  # read root tag just for testing output | ||||||
|  |         except ParseError: | ||||||
|             if not query: |             if not query: | ||||||
|                 return |                 return | ||||||
|             if self.purge_packages: |             if self.purge_packages: | ||||||
| @@ -120,34 +124,42 @@ class ScanPortage(object): | |||||||
|                 ) |                 ) | ||||||
|             return |             return | ||||||
|  |  | ||||||
|         dom = parseString(output) |         cat, pkg, homepage, desc = ("", "", "", "") | ||||||
|  |         versions = [] | ||||||
|  |  | ||||||
|         for category_tag in dom.getElementsByTagName("category"): |         for event, elem in parser: | ||||||
|             for package_tag in category_tag.getElementsByTagName("package"): |             if event == "start":  # on tag opening | ||||||
|                 cat = category_tag.getAttribute("name") |                 if elem.tag == "category": | ||||||
|                 pkg = package_tag.getAttribute("name") |                     cat = elem.attrib["name"] | ||||||
|                 homepage_tags = package_tag.getElementsByTagName("homepage") |                 if elem.tag == "package": | ||||||
|                 try: |                     pkg = elem.attrib["name"] | ||||||
|                     homepage = homepage_tags[0].firstChild.nodeValue |                 if elem.tag == "description": | ||||||
|                 except (IndexError, AttributeError): |                     desc = elem.text or "" | ||||||
|                     homepage = "" |                 if elem.tag == "homepage": | ||||||
|                 desc_tags = package_tag.getElementsByTagName("description") |                     homepage = elem.text or "" | ||||||
|                 try: |                 if elem.tag == "version": | ||||||
|                     desc = desc_tags[0].firstChild.nodeValue |                     # append version data to versions | ||||||
|                 except (IndexError, AttributeError): |                     cpv = "%s/%s-%s" % (cat, pkg, elem.attrib["id"]) | ||||||
|                     desc = "" |                     slot = elem.attrib.get("slot", "") | ||||||
|  |                     overlay = elem.attrib.get("overlay", "") | ||||||
|  |                     versions.append((cpv, slot, overlay)) | ||||||
|  |  | ||||||
|  |             elif event == "end":  # on tag closing | ||||||
|  |                 if elem.tag == "package": | ||||||
|  |                     # package tag has been closed, saving everything! | ||||||
|                     with commit_on_success(): |                     with commit_on_success(): | ||||||
|                         package = self.store_package(cat, pkg, homepage, desc) |                         package = self.store_package(cat, pkg, homepage, desc) | ||||||
|  |                         for cpv, slot, overlay in versions: | ||||||
|                     for version_tag in package_tag.\ |  | ||||||
|                                        getElementsByTagName("version"): |  | ||||||
|                         cpv = "%s/%s-%s" % (cat, pkg, |  | ||||||
|                                             version_tag.getAttribute("id")) |  | ||||||
|                         slot = version_tag.getAttribute("slot") |  | ||||||
|                         overlay = version_tag.getAttribute("overlay") |  | ||||||
|                             self.store_version(package, cpv, slot, overlay) |                             self.store_version(package, cpv, slot, overlay) | ||||||
|  |  | ||||||
|  |                     # clean old data | ||||||
|  |                     pkg, homepage, desc = ("", "", "") | ||||||
|  |                     versions = [] | ||||||
|  |  | ||||||
|  |                 if elem.tag == "category": | ||||||
|  |                     # clean old data | ||||||
|  |                     cat = "" | ||||||
|  |  | ||||||
|     def store_package(self, cat, pkg, homepage, description): |     def store_package(self, cat, pkg, homepage, description): | ||||||
|         created = False |         created = False | ||||||
|         obj = self.cache_get_package(cat, pkg) |         obj = self.cache_get_package(cat, pkg) | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user