euscan: Redesigning the handlers layout
Signed-off-by: volpino <fox91@anche.no>
This commit is contained in:
19
pym/euscan/handlers/url/__init__.py
Normal file
19
pym/euscan/handlers/url/__init__.py
Normal file
@ -0,0 +1,19 @@
|
||||
"""
|
||||
Url wide handlers for scanning upstream
|
||||
"""
|
||||
|
||||
import pkgutil
|
||||
|
||||
handlers = []
|
||||
|
||||
# autoimport all modules in this directory and append them to handlers list
|
||||
for loader, module_name, is_pkg in pkgutil.walk_packages(__path__):
|
||||
module = loader.find_module(module_name).load_module(module_name)
|
||||
handlers.append(module)
|
||||
|
||||
# sort handlers by priority
|
||||
handlers = sorted(
|
||||
handlers,
|
||||
key=lambda handler: handler.PRIORITY,
|
||||
reverse=True
|
||||
)
|
133
pym/euscan/handlers/url/cpan.py
Normal file
133
pym/euscan/handlers/url/cpan.py
Normal file
@ -0,0 +1,133 @@
|
||||
import re
|
||||
import portage
|
||||
import urllib2
|
||||
import json
|
||||
|
||||
from euscan import helpers, output
|
||||
|
||||
HANDLER_NAME = "cpan"
|
||||
CONFIDENCE = 100.0
|
||||
PRIORITY = 90
|
||||
|
||||
_cpan_package_name_re = re.compile("mirror://cpan/authors/.*/([^/.]*).*")
|
||||
|
||||
|
||||
def can_handle(pkg, url):
|
||||
return url.startswith('mirror://cpan/')
|
||||
|
||||
|
||||
def guess_package(cp, url):
|
||||
match = _cpan_package_name_re.search(url)
|
||||
|
||||
pkg = None
|
||||
|
||||
if match:
|
||||
pkg = match.group(1)
|
||||
try:
|
||||
cp, ver, rev = portage.pkgsplit('fake/' + pkg)
|
||||
except:
|
||||
pass
|
||||
|
||||
cat, pkg = cp.split("/")
|
||||
|
||||
return pkg
|
||||
|
||||
|
||||
def gentoo_mangle_version(up_pv):
|
||||
pv = ""
|
||||
|
||||
if up_pv.count('.') == 1:
|
||||
digits = 0
|
||||
for i in range(len(up_pv)):
|
||||
if digits == 3:
|
||||
pv += "."
|
||||
digits = 0
|
||||
c = up_pv[i]
|
||||
pv += c
|
||||
digits += int(c.isdigit())
|
||||
if c == '.':
|
||||
digits = 0
|
||||
else:
|
||||
pv = up_pv
|
||||
|
||||
return helpers.gentoo_mangle_version(pv)
|
||||
|
||||
|
||||
def cpan_trim_version(pv):
|
||||
pv = re.sub('^[a-zA-Z]+', '', pv)
|
||||
pv = re.sub('[a-zA-Z]$', '', pv)
|
||||
return pv
|
||||
|
||||
|
||||
def cpan_mangle_version(pv):
|
||||
pos = pv.find('.')
|
||||
if pos < 0:
|
||||
return pv
|
||||
up_pv = pv.replace('.', '')
|
||||
up_pv = up_pv[0:pos] + '.' + up_pv[pos:]
|
||||
up_pv = cpan_trim_version(up_pv)
|
||||
return up_pv
|
||||
|
||||
|
||||
def cpan_vercmp(cp, a, b):
|
||||
try:
|
||||
return float(a) - float(b)
|
||||
except:
|
||||
if a < b:
|
||||
return -1
|
||||
else:
|
||||
return 1
|
||||
|
||||
|
||||
def scan(pkg, url):
|
||||
cp, ver, rev = portage.pkgsplit(pkg.cpv)
|
||||
pkg = guess_package(cp, url)
|
||||
|
||||
orig_url = url
|
||||
url = 'http://search.cpan.org/api/dist/%s' % pkg
|
||||
|
||||
output.einfo("Using: " + url)
|
||||
|
||||
try:
|
||||
fp = helpers.urlopen(url)
|
||||
except urllib2.URLError:
|
||||
return []
|
||||
except IOError:
|
||||
return []
|
||||
|
||||
if not fp:
|
||||
return []
|
||||
|
||||
data = fp.read()
|
||||
data = json.loads(data)
|
||||
|
||||
if 'releases' not in data:
|
||||
return []
|
||||
|
||||
ret = []
|
||||
|
||||
for version in data['releases']:
|
||||
#if version['status'] == 'testing':
|
||||
# continue
|
||||
|
||||
up_pv = version['version']
|
||||
up_pv = cpan_trim_version(up_pv)
|
||||
pv = gentoo_mangle_version(up_pv)
|
||||
up_ver = cpan_mangle_version(ver)
|
||||
|
||||
if helpers.version_filtered(cp, up_ver, up_pv, cpan_vercmp):
|
||||
continue
|
||||
|
||||
url = 'mirror://cpan/authors/id/%s/%s/%s/%s' % (
|
||||
version['cpanid'][0],
|
||||
version['cpanid'][0:1],
|
||||
version['cpanid'],
|
||||
version['archive']
|
||||
)
|
||||
|
||||
if url == orig_url:
|
||||
continue
|
||||
|
||||
ret.append((url, pv, HANDLER_NAME, CONFIDENCE))
|
||||
|
||||
return ret
|
240
pym/euscan/handlers/url/generic.py
Normal file
240
pym/euscan/handlers/url/generic.py
Normal file
@ -0,0 +1,240 @@
|
||||
from urlparse import urljoin
|
||||
import urllib2
|
||||
import re
|
||||
import StringIO
|
||||
|
||||
try:
|
||||
from BeautifulSoup import BeautifulSoup
|
||||
except ImportError:
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
import portage
|
||||
|
||||
from euscan import CONFIG, SCANDIR_BLACKLIST_URLS, \
|
||||
BRUTEFORCE_BLACKLIST_PACKAGES, BRUTEFORCE_BLACKLIST_URLS, output, helpers
|
||||
|
||||
HANDLER_NAME = "generic"
|
||||
CONFIDENCE = 50.0
|
||||
PRIORITY = 0
|
||||
|
||||
BRUTEFORCE_HANDLER_NAME = "brute_force"
|
||||
BRUTEFORCE_CONFIDENCE = 30.0
|
||||
|
||||
|
||||
def scan_html(data, url, pattern):
|
||||
soup = BeautifulSoup(data)
|
||||
results = []
|
||||
|
||||
for link in soup.findAll('a'):
|
||||
href = link.get("href")
|
||||
if not href:
|
||||
continue
|
||||
|
||||
if href.startswith(url):
|
||||
href = href.replace(url, "", 1)
|
||||
|
||||
match = re.match(pattern, href, re.I)
|
||||
if match:
|
||||
results.append(
|
||||
(".".join([x for x in match.groups() if x is not None]),
|
||||
match.group(0))
|
||||
)
|
||||
return results
|
||||
|
||||
|
||||
def scan_ftp(data, url, pattern):
|
||||
buf = StringIO.StringIO(data)
|
||||
results = []
|
||||
|
||||
for line in buf.readlines():
|
||||
line = line.replace("\n", "").replace("\r", "")
|
||||
match = re.search(pattern, line, re.I)
|
||||
if match:
|
||||
results.append(
|
||||
(".".join([x for x in match.groups() if x is not None]),
|
||||
match.group(0))
|
||||
)
|
||||
return results
|
||||
|
||||
|
||||
def scan_directory_recursive(cp, ver, rev, url, steps, orig_url):
|
||||
if not steps:
|
||||
return []
|
||||
|
||||
url += steps[0][0]
|
||||
pattern = steps[0][1]
|
||||
|
||||
steps = steps[1:]
|
||||
|
||||
output.einfo("Scanning: %s" % url)
|
||||
|
||||
try:
|
||||
fp = helpers.urlopen(url)
|
||||
except urllib2.URLError:
|
||||
return []
|
||||
except IOError:
|
||||
return []
|
||||
|
||||
if not fp:
|
||||
return []
|
||||
|
||||
data = fp.read()
|
||||
|
||||
results = []
|
||||
|
||||
if re.search("<\s*a\s+[^>]*href", data, re.I):
|
||||
results.extend(scan_html(data, url, pattern))
|
||||
elif url.startswith('ftp://'):
|
||||
results.extend(scan_ftp(data, url, pattern))
|
||||
|
||||
versions = []
|
||||
|
||||
for up_pv, path in results:
|
||||
pv = helpers.gentoo_mangle_version(up_pv)
|
||||
if helpers.version_filtered(cp, ver, pv):
|
||||
continue
|
||||
if not url.endswith("/"):
|
||||
url = url + "/"
|
||||
path = urljoin(url, path)
|
||||
|
||||
if not steps and path not in orig_url:
|
||||
versions.append((path, pv, HANDLER_NAME, CONFIDENCE))
|
||||
|
||||
if steps:
|
||||
ret = scan_directory_recursive(cp, ver, rev, path, steps, orig_url)
|
||||
versions.extend(ret)
|
||||
|
||||
return versions
|
||||
|
||||
|
||||
def scan(pkg, url):
|
||||
if CONFIG["scan-dir"]:
|
||||
for bu in SCANDIR_BLACKLIST_URLS:
|
||||
if re.match(bu, url):
|
||||
output.einfo("%s is blacklisted by rule %s" % (url, bu))
|
||||
return []
|
||||
|
||||
resolved_url = helpers.parse_mirror(url)
|
||||
if not resolved_url:
|
||||
return []
|
||||
|
||||
cp, ver, rev = portage.pkgsplit(pkg.cpv)
|
||||
|
||||
# 'Hack' for _beta/_rc versions where _ is used instead of -
|
||||
if ver not in resolved_url:
|
||||
newver = helpers.version_change_end_sep(ver)
|
||||
if newver and newver in resolved_url:
|
||||
output.einfo(
|
||||
"Version: using %s instead of %s" % (newver, ver)
|
||||
)
|
||||
ver = newver
|
||||
|
||||
template = helpers.template_from_url(resolved_url, ver)
|
||||
if '${' not in template:
|
||||
output.einfo(
|
||||
"Url doesn't seems to depend on version: %s not found in %s" %
|
||||
(ver, resolved_url)
|
||||
)
|
||||
return []
|
||||
else:
|
||||
output.einfo("Scanning: %s" % template)
|
||||
|
||||
steps = helpers.generate_scan_paths(template)
|
||||
ret = scan_directory_recursive(cp, ver, rev, "", steps, url)
|
||||
|
||||
if not ret:
|
||||
brute_force(pkg, url)
|
||||
|
||||
return ret
|
||||
|
||||
|
||||
def brute_force(pkg, url):
|
||||
if CONFIG["brute-force"] == 0:
|
||||
return []
|
||||
|
||||
cp, ver, rev = portage.pkgsplit(pkg.cpv)
|
||||
|
||||
url = helpers.parse_mirror(url)
|
||||
if not url:
|
||||
return []
|
||||
|
||||
for bp in BRUTEFORCE_BLACKLIST_PACKAGES:
|
||||
if re.match(bp, cp):
|
||||
output.einfo("%s is blacklisted by rule %s" % (cp, bp))
|
||||
return []
|
||||
|
||||
for bp in BRUTEFORCE_BLACKLIST_URLS:
|
||||
if re.match(bp, url):
|
||||
output.einfo("%s is blacklisted by rule %s" % (cp, bp))
|
||||
return []
|
||||
|
||||
output.einfo("Generating version from " + ver)
|
||||
|
||||
components = helpers.split_version(ver)
|
||||
versions = helpers.gen_versions(components, CONFIG["brute-force"])
|
||||
|
||||
# Remove unwanted versions
|
||||
for v in versions:
|
||||
if helpers.vercmp(cp, ver, helpers.join_version(v)) >= 0:
|
||||
versions.remove(v)
|
||||
|
||||
if not versions:
|
||||
output.einfo("Can't generate new versions from " + ver)
|
||||
return []
|
||||
|
||||
template = helpers.template_from_url(url, ver)
|
||||
|
||||
if '${PV}' not in template:
|
||||
output.einfo(
|
||||
"Url doesn't seems to depend on full version: %s not found in %s" %
|
||||
(ver, url))
|
||||
return []
|
||||
else:
|
||||
output.einfo("Brute forcing: %s" % template)
|
||||
|
||||
result = []
|
||||
|
||||
i = 0
|
||||
done = []
|
||||
|
||||
while i < len(versions):
|
||||
components = versions[i]
|
||||
i += 1
|
||||
if components in done:
|
||||
continue
|
||||
done.append(tuple(components))
|
||||
|
||||
version = helpers.join_version(components)
|
||||
|
||||
if helpers.version_filtered(cp, ver, version):
|
||||
continue
|
||||
|
||||
url = helpers.url_from_template(template, version)
|
||||
infos = helpers.tryurl(url, template)
|
||||
|
||||
if not infos:
|
||||
continue
|
||||
|
||||
result.append([url, version, BRUTEFORCE_HANDLER_NAME,
|
||||
BRUTEFORCE_CONFIDENCE])
|
||||
|
||||
if len(result) > CONFIG['brute-force-false-watermark']:
|
||||
output.einfo(
|
||||
"Broken server detected ! Skipping brute force."
|
||||
)
|
||||
return []
|
||||
|
||||
if CONFIG["brute-force-recursive"]:
|
||||
for v in helpers.gen_versions(list(components),
|
||||
CONFIG["brute-force"]):
|
||||
if v not in versions and tuple(v) not in done:
|
||||
versions.append(v)
|
||||
|
||||
if CONFIG["oneshot"]:
|
||||
break
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def can_handle(pkg, url):
|
||||
return True
|
54
pym/euscan/handlers/url/github.py
Normal file
54
pym/euscan/handlers/url/github.py
Normal file
@ -0,0 +1,54 @@
|
||||
import json
|
||||
import urllib2
|
||||
import re
|
||||
|
||||
import portage
|
||||
|
||||
from euscan import helpers, output
|
||||
|
||||
HANDLER_NAME = "github"
|
||||
CONFIDENCE = 100.0
|
||||
PRIORITY = 90
|
||||
|
||||
|
||||
def can_handle(pkg, url):
|
||||
return url.startswith('mirror://github/')
|
||||
|
||||
|
||||
def guess_package(cp, url):
|
||||
match = re.search('^mirror://github/(.*?)/(.*?)/(.*)$', url)
|
||||
|
||||
assert(match)
|
||||
return (match.group(1), match.group(2), match.group(3))
|
||||
|
||||
|
||||
def scan(pkg, url):
|
||||
'http://developer.github.com/v3/repos/downloads/'
|
||||
|
||||
user, project, filename = guess_package(pkg.cpv, url)
|
||||
|
||||
# find out where version is expected to be found
|
||||
cp, ver, rev = portage.pkgsplit(pkg.cpv)
|
||||
if ver not in filename:
|
||||
return
|
||||
|
||||
# now create a filename-matching regexp
|
||||
# XXX: supposedly replace first with (?P<foo>...)
|
||||
# and remaining ones with (?P=foo)
|
||||
fnre = re.compile('^%s$' % \
|
||||
re.escape(filename).replace(re.escape(ver), '(.*?)'))
|
||||
|
||||
output.einfo("Using github API for: " + '/'.join(filename))
|
||||
|
||||
dlreq = urllib2.urlopen('https://api.github.com/repos/%s/%s/downloads' % \
|
||||
(user, project))
|
||||
dls = json.load(dlreq)
|
||||
|
||||
for dl in dls:
|
||||
m = fnre.match(dl['name'])
|
||||
|
||||
if m:
|
||||
pv = helpers.gentoo_mangle_version(m.group(1))
|
||||
if helpers.version_filtered(cp, ver, pv):
|
||||
continue
|
||||
yield (dl['html_url'], pv, HANDLER_NAME, CONFIDENCE)
|
39
pym/euscan/handlers/url/kde.py
Normal file
39
pym/euscan/handlers/url/kde.py
Normal file
@ -0,0 +1,39 @@
|
||||
from euscan.handlers.url import generic
|
||||
|
||||
PRIORITY = 90
|
||||
|
||||
HANDLER_NAME = "kde"
|
||||
|
||||
|
||||
def can_handle(pkg, url):
|
||||
if url.startswith('mirror://kde/'):
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def clean_results(results):
|
||||
ret = []
|
||||
|
||||
for path, version, _, confidence in results:
|
||||
if version == '5SUMS':
|
||||
continue
|
||||
ret.append((path, version, HANDLER_NAME, confidence))
|
||||
|
||||
return ret
|
||||
|
||||
|
||||
def scan(pkg, url):
|
||||
results = generic.scan(pkg.cpv, url)
|
||||
|
||||
if url.startswith('mirror://kde/unstable/'):
|
||||
url = url.replace('mirror://kde/unstable/', 'mirror://kde/stable/')
|
||||
results += generic.scan(pkg.cpv, url)
|
||||
|
||||
if not results: # if nothing was found go brute forcing
|
||||
results = generic.brute_force(pkg.cpv, url)
|
||||
|
||||
if url.startswith('mirror://kde/unstable/'):
|
||||
url = url.replace('mirror://kde/unstable/', 'mirror://kde/stable/')
|
||||
results += generic.brute_force(pkg.cpv, url)
|
||||
|
||||
return clean_results(results)
|
72
pym/euscan/handlers/url/php.py
Normal file
72
pym/euscan/handlers/url/php.py
Normal file
@ -0,0 +1,72 @@
|
||||
import re
|
||||
import portage
|
||||
import urllib2
|
||||
import xml.dom.minidom
|
||||
|
||||
from euscan import helpers, output
|
||||
|
||||
HANDLER_NAME = "php"
|
||||
CONFIDENCE = 100.0
|
||||
PRIORITY = 90
|
||||
|
||||
|
||||
def can_handle(pkg, url):
|
||||
if url.startswith('http://pear.php.net/get/'):
|
||||
return True
|
||||
if url.startswith('http://pecl.php.net/get/'):
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def guess_package_and_channel(cp, url):
|
||||
match = re.search('http://(.*)/get/(.*)-(.*).tgz', url)
|
||||
|
||||
if match:
|
||||
host = match.group(1)
|
||||
pkg = match.group(2)
|
||||
else:
|
||||
cat, pkg = cp.split("/")
|
||||
|
||||
return pkg, host
|
||||
|
||||
|
||||
def scan(pkg, url):
|
||||
cp, ver, rev = portage.pkgsplit(pkg.cpv)
|
||||
package, channel = guess_package_and_channel(cp, url)
|
||||
|
||||
orig_url = url
|
||||
url = 'http://%s/rest/r/%s/allreleases.xml' % (channel, package.lower())
|
||||
|
||||
output.einfo("Using: " + url)
|
||||
|
||||
try:
|
||||
fp = helpers.urlopen(url)
|
||||
except urllib2.URLError:
|
||||
return []
|
||||
except IOError:
|
||||
return []
|
||||
|
||||
if not fp:
|
||||
return []
|
||||
|
||||
data = fp.read()
|
||||
|
||||
dom = xml.dom.minidom.parseString(data)
|
||||
|
||||
nodes = dom.getElementsByTagName("v")
|
||||
ret = []
|
||||
|
||||
for node in nodes:
|
||||
up_pv = node.childNodes[0].data
|
||||
pv = helpers.gentoo_mangle_version(up_pv)
|
||||
if helpers.version_filtered(cp, ver, pv):
|
||||
continue
|
||||
|
||||
url = 'http://%s/get/%s-%s.tgz' % (channel, package, up_pv)
|
||||
|
||||
if url == orig_url:
|
||||
continue
|
||||
|
||||
ret.append((url, pv, HANDLER_NAME, CONFIDENCE))
|
||||
|
||||
return ret
|
54
pym/euscan/handlers/url/pypi.py
Normal file
54
pym/euscan/handlers/url/pypi.py
Normal file
@ -0,0 +1,54 @@
|
||||
import xmlrpclib
|
||||
import re
|
||||
|
||||
import portage
|
||||
|
||||
from euscan import helpers, output
|
||||
|
||||
HANDLER_NAME = "pypi"
|
||||
CONFIDENCE = 100.0
|
||||
PRIORITY = 90
|
||||
|
||||
|
||||
def can_handle(pkg, url):
|
||||
return url.startswith('mirror://pypi/')
|
||||
|
||||
|
||||
def guess_package(cp, url):
|
||||
match = re.search('mirror://pypi/\w+/(.*)/.*', url)
|
||||
if match:
|
||||
return match.group(1)
|
||||
|
||||
cat, pkg = cp.split("/")
|
||||
|
||||
return pkg
|
||||
|
||||
|
||||
def scan(pkg, url):
|
||||
'http://wiki.python.org/moin/PyPiXmlRpc'
|
||||
|
||||
package = guess_package(pkg.cpv, url)
|
||||
|
||||
output.einfo("Using PyPi XMLRPC: " + package)
|
||||
|
||||
client = xmlrpclib.ServerProxy('http://pypi.python.org/pypi')
|
||||
versions = client.package_releases(package)
|
||||
|
||||
if not versions:
|
||||
return versions
|
||||
|
||||
versions.reverse()
|
||||
|
||||
cp, ver, rev = portage.pkgsplit(pkg.cpv)
|
||||
|
||||
ret = []
|
||||
|
||||
for up_pv in versions:
|
||||
pv = helpers.gentoo_mangle_version(up_pv)
|
||||
if helpers.version_filtered(cp, ver, pv):
|
||||
continue
|
||||
urls = client.release_urls(package, up_pv)
|
||||
urls = " ".join([infos['url'] for infos in urls])
|
||||
ret.append((urls, pv, HANDLER_NAME, CONFIDENCE))
|
||||
|
||||
return ret
|
73
pym/euscan/handlers/url/rubygem.py
Normal file
73
pym/euscan/handlers/url/rubygem.py
Normal file
@ -0,0 +1,73 @@
|
||||
import re
|
||||
import portage
|
||||
import json
|
||||
import urllib2
|
||||
|
||||
from euscan import helpers, output
|
||||
|
||||
HANDLER_NAME = "rubygem"
|
||||
CONFIDENCE = 100.0
|
||||
PRIORITY = 90
|
||||
|
||||
|
||||
def can_handle(pkg, url):
|
||||
return url.startswith('mirror://rubygems/')
|
||||
|
||||
|
||||
def guess_gem(cpv, url):
|
||||
match = re.search('mirror://rubygems/(.*).gem', url)
|
||||
if match:
|
||||
cpv = 'fake/%s' % match.group(1)
|
||||
|
||||
ret = portage.pkgsplit(cpv)
|
||||
if not ret:
|
||||
return None
|
||||
|
||||
cp, ver, rev = ret
|
||||
cat, pkg = cp.split("/")
|
||||
|
||||
return pkg
|
||||
|
||||
|
||||
def scan(pkg, url):
|
||||
'http://guides.rubygems.org/rubygems-org-api/#gemversion'
|
||||
|
||||
gem = guess_gem(pkg.cpv, url)
|
||||
if not gem:
|
||||
output.eerror("Can't guess gem name using %s and %s" % \
|
||||
(pkg.cpv, url))
|
||||
return []
|
||||
|
||||
url = 'http://rubygems.org/api/v1/versions/%s.json' % gem
|
||||
|
||||
output.einfo("Using: " + url)
|
||||
|
||||
try:
|
||||
fp = helpers.urlopen(url)
|
||||
except urllib2.URLError:
|
||||
return []
|
||||
except IOError:
|
||||
return []
|
||||
|
||||
if not fp:
|
||||
return []
|
||||
|
||||
data = fp.read()
|
||||
versions = json.loads(data)
|
||||
|
||||
if not versions:
|
||||
return []
|
||||
|
||||
cp, ver, rev = portage.pkgsplit(pkg.cpv)
|
||||
|
||||
ret = []
|
||||
|
||||
for version in versions:
|
||||
up_pv = version['number']
|
||||
pv = helpers.gentoo_mangle_version(up_pv)
|
||||
if helpers.version_filtered(cp, ver, pv):
|
||||
continue
|
||||
url = 'http://rubygems.org/gems/%s-%s.gem' % (gem, up_pv)
|
||||
ret.append((url, pv, HANDLER_NAME, CONFIDENCE))
|
||||
|
||||
return ret
|
Reference in New Issue
Block a user