euscanwww: Using incremental xml parsing for portage scanning

Signed-off-by: volpino <fox91@anche.no>
This commit is contained in:
volpino 2012-06-30 10:28:03 +02:00
parent a83e4de59d
commit 0c5a0ac36f

View File

@ -2,7 +2,7 @@ import subprocess
import portage import portage
import os import os
import re import re
from xml.dom.minidom import parseString from xml.etree.ElementTree import iterparse, ParseError
from django.db.transaction import commit_on_success from django.db.transaction import commit_on_success
from django.core.management.color import color_style from django.core.management.color import color_style
@ -99,10 +99,14 @@ class ScanPortage(object):
Version.objects.filter(packaged=True).update(alive=False) Version.objects.filter(packaged=True).update(alive=False)
self.logger.info('done') self.logger.info('done')
output = subprocess.Popen(cmd, stdout=subprocess.PIPE).\ sub = subprocess.Popen(cmd, stdout=subprocess.PIPE)
communicate()[0]
if len(output) == 0: output = sub.stdout
try:
parser = iterparse(output, ["start", "end"])
parser.next() # read root tag just for testing output
except ParseError:
if not query: if not query:
return return
if self.purge_packages: if self.purge_packages:
@ -120,34 +124,42 @@ class ScanPortage(object):
) )
return return
dom = parseString(output) cat, pkg, homepage, desc = ("", "", "", "")
versions = []
for category_tag in dom.getElementsByTagName("category"): for event, elem in parser:
for package_tag in category_tag.getElementsByTagName("package"): if event == "start": # on tag opening
cat = category_tag.getAttribute("name") if elem.tag == "category":
pkg = package_tag.getAttribute("name") cat = elem.attrib["name"]
homepage_tags = package_tag.getElementsByTagName("homepage") if elem.tag == "package":
try: pkg = elem.attrib["name"]
homepage = homepage_tags[0].firstChild.nodeValue if elem.tag == "description":
except (IndexError, AttributeError): desc = elem.text or ""
homepage = "" if elem.tag == "homepage":
desc_tags = package_tag.getElementsByTagName("description") homepage = elem.text or ""
try: if elem.tag == "version":
desc = desc_tags[0].firstChild.nodeValue # append version data to versions
except (IndexError, AttributeError): cpv = "%s/%s-%s" % (cat, pkg, elem.attrib["id"])
desc = "" slot = elem.attrib.get("slot", "")
overlay = elem.attrib.get("overlay", "")
versions.append((cpv, slot, overlay))
elif event == "end": # on tag closing
if elem.tag == "package":
# package tag has been closed, saving everything!
with commit_on_success(): with commit_on_success():
package = self.store_package(cat, pkg, homepage, desc) package = self.store_package(cat, pkg, homepage, desc)
for cpv, slot, overlay in versions:
for version_tag in package_tag.\
getElementsByTagName("version"):
cpv = "%s/%s-%s" % (cat, pkg,
version_tag.getAttribute("id"))
slot = version_tag.getAttribute("slot")
overlay = version_tag.getAttribute("overlay")
self.store_version(package, cpv, slot, overlay) self.store_version(package, cpv, slot, overlay)
# clean old data
pkg, homepage, desc = ("", "", "")
versions = []
if elem.tag == "category":
# clean old data
cat = ""
def store_package(self, cat, pkg, homepage, description): def store_package(self, cat, pkg, homepage, description):
created = False created = False
obj = self.cache_get_package(cat, pkg) obj = self.cache_get_package(cat, pkg)