euscan: shake the code
- add custom site handlers - use a custom user agent - fix some bugs in management commands Signed-off-by: Corentin Chary <corentincj@iksaif.net>
This commit is contained in:
24
pym/euscan/handlers/__init__.py
Normal file
24
pym/euscan/handlers/__init__.py
Normal file
@ -0,0 +1,24 @@
|
||||
from euscan.handlers import generic
|
||||
from euscan.handlers import php
|
||||
from euscan.handlers import pypi
|
||||
from euscan.handlers import rubygem
|
||||
|
||||
handlers = [ php, pypi, rubygem, generic ]
|
||||
|
||||
def find_best_handler(cpv, url):
|
||||
for handler in handlers:
|
||||
if handler.can_handle(cpv, url):
|
||||
return handler
|
||||
return None
|
||||
|
||||
def scan(cpv, url):
|
||||
handler = find_best_handler(cpv, url)
|
||||
if handler:
|
||||
return handler.scan(cpv, url)
|
||||
return []
|
||||
|
||||
def brute_force(cpv, url):
|
||||
handler = find_best_handler(cpv, url)
|
||||
if handler:
|
||||
return handler.brute_force(cpv, url)
|
||||
return []
|
183
pym/euscan/handlers/generic.py
Normal file
183
pym/euscan/handlers/generic.py
Normal file
@ -0,0 +1,183 @@
|
||||
import urllib2
|
||||
import re
|
||||
import StringIO
|
||||
|
||||
from BeautifulSoup import BeautifulSoup
|
||||
|
||||
import portage
|
||||
|
||||
from euscan import CONFIG, SCANDIR_BLACKLIST_URLS, BRUTEFORCE_BLACKLIST_PACKAGES, BRUTEFORCE_BLACKLIST_URLS, output
|
||||
from euscan import helpers
|
||||
|
||||
def scan_html(data, url, pattern):
|
||||
soup = BeautifulSoup(data)
|
||||
results = []
|
||||
|
||||
for link in soup.findAll('a'):
|
||||
href = link.get("href")
|
||||
if not href:
|
||||
continue
|
||||
if href.startswith(url):
|
||||
href = href.replace(url, "", 1)
|
||||
|
||||
match = re.match(pattern, href, re.I)
|
||||
if match:
|
||||
results.append((match.group(1), match.group(0)))
|
||||
|
||||
return results
|
||||
|
||||
def scan_ftp(data, url, pattern):
|
||||
buf = StringIO.StringIO(data)
|
||||
results = []
|
||||
|
||||
for line in buf.readlines():
|
||||
line = line.replace("\n", "").replace("\r", "")
|
||||
match = re.search(pattern, line, re.I)
|
||||
if match:
|
||||
results.append((match.group(1), match.group(0)))
|
||||
|
||||
return results
|
||||
|
||||
def scan_directory_recursive(cpv, url, steps):
|
||||
if not steps:
|
||||
return []
|
||||
|
||||
cp, ver, rev = portage.pkgsplit(cpv)
|
||||
url += steps[0][0]
|
||||
pattern = steps[0][1]
|
||||
|
||||
steps = steps[1:]
|
||||
|
||||
output.einfo("Scanning: %s" % url)
|
||||
|
||||
try:
|
||||
fp = helpers.urlopen(url)
|
||||
except urllib2.URLError:
|
||||
return []
|
||||
except IOError:
|
||||
return []
|
||||
|
||||
data = fp.read()
|
||||
|
||||
results = []
|
||||
|
||||
if re.search("<\s*a\s+[^>]*href", data):
|
||||
results.extend(scan_html(data, url, pattern))
|
||||
elif url.startswith('ftp://'):
|
||||
results.extend(scan_ftp(data, url, pattern))
|
||||
|
||||
versions = []
|
||||
|
||||
for version, path in results:
|
||||
if helpers.version_filtered(cp, ver, version):
|
||||
continue
|
||||
|
||||
if not url.endswith('/') and not path.startswith('/'):
|
||||
path = url + '/' + path
|
||||
else:
|
||||
path = url + path
|
||||
|
||||
versions.append((path, version))
|
||||
|
||||
if steps:
|
||||
ret = scan_directory_recursive(cpv, path, steps)
|
||||
versions.extend(ret)
|
||||
|
||||
return versions
|
||||
|
||||
def scan(cpv, url):
|
||||
for bu in SCANDIR_BLACKLIST_URLS:
|
||||
if re.match(bu, url):
|
||||
output.einfo("%s is blacklisted by rule %s" % (url, bu))
|
||||
return []
|
||||
|
||||
resolved_url = helpers.parse_mirror(url)
|
||||
|
||||
cp, ver, rev = portage.pkgsplit(cpv)
|
||||
|
||||
template = helpers.template_from_url(resolved_url, ver)
|
||||
if '${' not in template:
|
||||
output.einfo("Url doesn't seems to depend on version: %s not found in %s"
|
||||
% (ver, resolved_url))
|
||||
return []
|
||||
else:
|
||||
output.einfo("Scanning: %s" % template)
|
||||
|
||||
steps = helpers.generate_scan_paths(template)
|
||||
return scan_directory_recursive(cpv, "", steps)
|
||||
|
||||
def brute_force(cpv, url):
|
||||
cp, ver, rev = portage.pkgsplit(cpv)
|
||||
|
||||
url = helpers.parse_mirror(url)
|
||||
|
||||
for bp in BRUTEFORCE_BLACKLIST_PACKAGES:
|
||||
if re.match(bp, cp):
|
||||
output.einfo("%s is blacklisted by rule %s" % (cp, bp))
|
||||
return []
|
||||
|
||||
for bp in BRUTEFORCE_BLACKLIST_URLS:
|
||||
if re.match(bp, url):
|
||||
output.einfo("%s is blacklisted by rule %s" % (cp, bp))
|
||||
return []
|
||||
|
||||
output.einfo("Generating version from " + ver)
|
||||
|
||||
components = helpers.split_version(ver)
|
||||
versions = helpers.gen_versions(components, CONFIG["brute-force"])
|
||||
|
||||
""" Remove unwanted versions """
|
||||
for v in versions:
|
||||
if helpers.vercmp(cp, ver, helpers.join_version(v)) >= 0:
|
||||
versions.remove(v)
|
||||
|
||||
if not versions:
|
||||
output.einfo("Can't generate new versions from " + ver)
|
||||
return []
|
||||
|
||||
template = helpers.template_from_url(url, ver)
|
||||
|
||||
if '${PV}' not in template:
|
||||
output.einfo("Url doesn't seems to depend on full version: %s not found in %s"
|
||||
% (ver, url))
|
||||
return []
|
||||
else:
|
||||
output.einfo("Brute forcing: %s" % template)
|
||||
|
||||
result = []
|
||||
|
||||
i = 0
|
||||
done = []
|
||||
|
||||
while i < len(versions):
|
||||
components = versions[i]
|
||||
i += 1
|
||||
if components in done:
|
||||
continue
|
||||
done.append(tuple(components))
|
||||
|
||||
version = helpers.join_version(components)
|
||||
|
||||
if helpers.version_filtered(cp, ver, version):
|
||||
continue
|
||||
|
||||
url = helpers.url_from_template(template, version)
|
||||
infos = helpers.tryurl(url, template)
|
||||
|
||||
if not infos:
|
||||
continue
|
||||
|
||||
result.append([url, version])
|
||||
|
||||
if CONFIG["brute-force-recursive"]:
|
||||
for v in helpers.gen_versions(components, CONFIG["brute-force"]):
|
||||
if v not in versions and tuple(v) not in done:
|
||||
versions.append(v)
|
||||
|
||||
if CONFIG["oneshot"]:
|
||||
break
|
||||
|
||||
return result
|
||||
|
||||
def can_handle(cpv, url):
|
||||
return True
|
65
pym/euscan/handlers/php.py
Normal file
65
pym/euscan/handlers/php.py
Normal file
@ -0,0 +1,65 @@
|
||||
import re
|
||||
import portage
|
||||
import urllib2
|
||||
import xml.dom.minidom
|
||||
|
||||
from euscan import helpers, output
|
||||
|
||||
def can_handle(cpv, url):
|
||||
if url.startswith('http://pear.php.net/get/'):
|
||||
return True
|
||||
if url.startswith('http://pecl.php.net/get/'):
|
||||
return True
|
||||
return False
|
||||
|
||||
def guess_package_and_channel(cp, url):
|
||||
match = re.search('http://(.*)/get/(.*)-(.*).tgz', url)
|
||||
|
||||
if match:
|
||||
host = match.group(1)
|
||||
pkg = match.group(2)
|
||||
else:
|
||||
cat, pkg = cp.split("/")
|
||||
|
||||
return pkg, host
|
||||
|
||||
def scan(cpv, url):
|
||||
pkg, channel = guess_package_and_channel(cpv, url)
|
||||
|
||||
orig_url = url
|
||||
url = 'http://%s/rest/r/%s/allreleases.xml' % (channel, pkg.lower())
|
||||
|
||||
output.einfo("Using: " + url)
|
||||
|
||||
try:
|
||||
fp = helpers.urlopen(url)
|
||||
except urllib2.URLError:
|
||||
return []
|
||||
except IOError:
|
||||
return []
|
||||
|
||||
data = fp.read()
|
||||
|
||||
dom = xml.dom.minidom.parseString(data)
|
||||
|
||||
nodes = dom.getElementsByTagName("v")
|
||||
ret = []
|
||||
|
||||
cp, ver, rev = portage.pkgsplit(cpv)
|
||||
|
||||
for node in nodes:
|
||||
version = node.childNodes[0].data
|
||||
if helpers.version_filtered(cp, ver, version):
|
||||
continue
|
||||
|
||||
url = 'http://%s/get/%s-%s.tgz' % (channel, pkg, version)
|
||||
|
||||
if url == orig_url:
|
||||
continue
|
||||
|
||||
ret.append(( url, version ))
|
||||
|
||||
return ret
|
||||
|
||||
def brute_force(cpv, url):
|
||||
return []
|
51
pym/euscan/handlers/pypi.py
Normal file
51
pym/euscan/handlers/pypi.py
Normal file
@ -0,0 +1,51 @@
|
||||
import xmlrpclib
|
||||
import pprint
|
||||
import re
|
||||
|
||||
import portage
|
||||
|
||||
from euscan import helpers, output
|
||||
|
||||
def can_handle(cpv, url):
|
||||
return url.startswith('mirror://pypi/')
|
||||
|
||||
def guess_package(cp, url):
|
||||
match = re.search('mirror://pypi/\w+/(.*)/.*', url)
|
||||
if match:
|
||||
return match.group(1)
|
||||
|
||||
cat, pkg = cp.split("/")
|
||||
|
||||
return pkg
|
||||
|
||||
def scan(cpv, url):
|
||||
'http://wiki.python.org/moin/PyPiXmlRpc'
|
||||
|
||||
|
||||
package = guess_package(cpv, url)
|
||||
|
||||
output.einfo("Using PyPi XMLRPC: " + package)
|
||||
|
||||
client = xmlrpclib.ServerProxy('http://pypi.python.org/pypi')
|
||||
versions = client.package_releases(package)
|
||||
|
||||
if not versions:
|
||||
return versions
|
||||
|
||||
versions.reverse()
|
||||
|
||||
cp, ver, rev = portage.pkgsplit(cpv)
|
||||
|
||||
ret = []
|
||||
|
||||
for version in versions:
|
||||
if helpers.version_filtered(cp, ver, version):
|
||||
continue
|
||||
urls = client.release_urls(package, version)
|
||||
urls = " ".join([ infos['url'] for infos in urls ])
|
||||
ret.append(( urls, version ))
|
||||
|
||||
return ret
|
||||
|
||||
def brute_force(cpv, url):
|
||||
return []
|
56
pym/euscan/handlers/rubygem.py
Normal file
56
pym/euscan/handlers/rubygem.py
Normal file
@ -0,0 +1,56 @@
|
||||
import re
|
||||
import portage
|
||||
import json
|
||||
import urllib2
|
||||
|
||||
from euscan import helpers, output
|
||||
|
||||
def can_handle(cpv, url):
|
||||
return url.startswith('mirror://rubygems/')
|
||||
|
||||
def guess_gem(cpv, url):
|
||||
match = re.search('mirror://rubygems/(.*).gem', url)
|
||||
if match:
|
||||
cpv = 'fake/%s' % match.group(1)
|
||||
|
||||
cp, ver, rev = portage.pkgsplit(cpv)
|
||||
cat, pkg = cp.split("/")
|
||||
|
||||
return pkg
|
||||
|
||||
def scan(cpv, url):
|
||||
'http://guides.rubygems.org/rubygems-org-api/#gemversion'
|
||||
|
||||
gem = guess_gem(cpv, url)
|
||||
url = 'http://rubygems.org/api/v1/versions/%s.json' % gem
|
||||
|
||||
output.einfo("Using: " + url)
|
||||
|
||||
try:
|
||||
fp = helpers.urlopen(url, None, 5)
|
||||
except urllib2.URLError:
|
||||
return []
|
||||
except IOError:
|
||||
return []
|
||||
|
||||
data = fp.read()
|
||||
versions = json.loads(data)
|
||||
|
||||
if not versions:
|
||||
return []
|
||||
|
||||
cp, ver, rev = portage.pkgsplit(cpv)
|
||||
|
||||
ret = []
|
||||
|
||||
for version in versions:
|
||||
version = version['number']
|
||||
if helpers.version_filtered(cp, ver, version):
|
||||
continue
|
||||
url = 'http://rubygems.org/gems/%s-%s.gem' % (gem, version)
|
||||
ret.append(( url, version ))
|
||||
|
||||
return ret
|
||||
|
||||
def brute_force(cpv, url):
|
||||
return []
|
Reference in New Issue
Block a user