Merge remote-tracking branch 'fox/master'

This commit is contained in:
Corentin Chary 2012-07-28 11:32:41 +02:00
commit 6ec059bdc2
19 changed files with 102 additions and 52 deletions

View File

@ -23,14 +23,15 @@ import getopt
from errno import EINTR, EINVAL from errno import EINTR, EINVAL
from httplib import HTTPConnection from httplib import HTTPConnection
from portage import settings
from portage.output import white, yellow, turquoise, green from portage.output import white, yellow, turquoise, green
from portage.exception import AmbiguousPackageName from portage.exception import AmbiguousPackageName
from gentoolkit import pprinter as pp from gentoolkit import pprinter as pp
from gentoolkit.eclean.search import port_settings
from gentoolkit.errors import GentoolkitException from gentoolkit.errors import GentoolkitException
from euscan import CONFIG, output from euscan import CONFIG, output
from euscan.out import progress_bar
# Globals # Globals
@ -228,9 +229,8 @@ def parse_args():
def main(): def main():
"""Parse command line and execute all actions.""" """Parse command line and execute all actions."""
CONFIG['nocolor'] = ( CONFIG['nocolor'] = (
port_settings["NOCOLOR"] in ('yes', 'true') or not isatty settings["NOCOLOR"] in ('yes', 'true') or not isatty
) )
if CONFIG['nocolor']: if CONFIG['nocolor']:
pp.output.nocolor() pp.output.nocolor()
@ -255,10 +255,6 @@ def main():
print_usage(e.value) print_usage(e.value)
exit_helper(EINVAL) exit_helper(EINVAL)
# Importing stuff here for performance reasons
from euscan.scan import scan_upstream
from euscan.out import progress_bar
if CONFIG['verbose'] > 2: if CONFIG['verbose'] > 2:
HTTPConnection.debuglevel = 1 HTTPConnection.debuglevel = 1
@ -271,6 +267,9 @@ def main():
on_progress = on_progress_gen.next() on_progress = on_progress_gen.next()
on_progress(maxval=len(queries) * 100, increment=0, label="Working...") on_progress(maxval=len(queries) * 100, increment=0, label="Working...")
# Importing stuff here for performance reasons
from euscan.scan import scan_upstream
for query in queries: for query in queries:
if CONFIG["progress"]: if CONFIG["progress"]:
on_progress(increment=10, label=query) on_progress(increment=10, label=query)

View File

@ -10,18 +10,18 @@ class Migration(DataMigration):
def forwards(self, orm): def forwards(self, orm):
every_day = orm["djcelery.CrontabSchedule"].objects.create( every_day = orm["djcelery.CrontabSchedule"].objects.create(
minute = "00", minute="00",
hour = "01", hour="01",
day_of_week = "*", day_of_week="*",
day_of_month = "*", day_of_month="*",
month_of_year = "*" month_of_year="*"
) )
every_week = orm["djcelery.CrontabSchedule"].objects.create( every_week = orm["djcelery.CrontabSchedule"].objects.create(
minute = "00", minute="00",
hour = "03", hour="03",
day_of_week = "1", day_of_week="1",
day_of_month = "*", day_of_month="*",
month_of_year = "*" month_of_year="*"
) )
orm["djcelery.PeriodicTask"].objects.create( orm["djcelery.PeriodicTask"].objects.create(
name="Daily portage update", name="Daily portage update",

View File

@ -217,6 +217,7 @@ class EuscanResult(models.Model):
self.full_clean() self.full_clean()
super(EuscanResult, self).save(*args, **kwargs) super(EuscanResult, self).save(*args, **kwargs)
@property
def messages(self): def messages(self):
result = json.loads(self.result) result = json.loads(self.result)
@ -324,6 +325,13 @@ class RefreshPackageQuery(models.Model):
priority = models.IntegerField(default=0) priority = models.IntegerField(default=0)
users = models.ManyToManyField(User) users = models.ManyToManyField(User)
@property
def position(self):
ordered = RefreshPackageQuery.objects.all().order_by("-priority")
for pos, obj in enumerate(ordered, start=1):
if obj == self:
return pos
def __unicode__(self): def __unicode__(self):
return u'[%d] %s' % (self.priority, self.package) return u'[%d] %s' % (self.priority, self.package)

View File

@ -54,7 +54,8 @@ class ScanMetadata(object):
return return
except Exception as e: except Exception as e:
self.logger.error( self.logger.error(
self.style.ERROR('%s/%s: %s' % (pkg.category, pkg.name, str(e))) self.style.ERROR('%s/%s: %s' %
(pkg.category, pkg.name, str(e)))
) )
return return

View File

@ -173,7 +173,7 @@ def update_portage(packages=None):
@task @task
def update_upstream(): def update_upstream():
if settings.TASKS_UPSTREAM_GROUPS >= 1: if settings.TASKS_UPSTREAM_GROUPS >= 1:
packages = Package.objects.all() packages = Package.objects.all().order_by("pk")
scan_upstream_sub = group_chunks(scan_upstream, packages, scan_upstream_sub = group_chunks(scan_upstream, packages,
settings.TASKS_UPSTREAM_GROUPS, settings.TASKS_UPSTREAM_GROUPS,

View File

@ -33,7 +33,7 @@
{% endif %} {% endif %}
{% endwith %} {% endwith %}
{{ package.last_version_gentoo.version }} {{ package.last_version_gentoo.version|truncatechars:15 }}
</td> </td>
<td>{{ package.last_version_overlay.version }}</td> <td>{{ package.last_version_overlay.version }}</td>
<td>{{ package.last_version_upstream.version }}</td> <td>{{ package.last_version_upstream.version }}</td>

View File

@ -11,7 +11,9 @@
{{ block.super }} {{ block.super }}
<li> <li>
<img src="{{ STATIC_URL }}/img/feed.png" alt="feed" /> <img src="{{ STATIC_URL }}/img/feed.png" alt="feed" />
<a title="{{ category }} Feed" href="{% url "category_feed" category %}">{{ category }}</a> <a title="{{ category }} Feed" href="{% url "category_feed" category %}">
{{ category|truncatechars:15 }}
</a>
</li> </li>
{% endblock %} {% endblock %}

View File

@ -11,7 +11,9 @@
{{ block.super }} {{ block.super }}
<li> <li>
<img src="{{ STATIC_URL }}/img/feed.png" alt="feed" /> <img src="{{ STATIC_URL }}/img/feed.png" alt="feed" />
<a title="{{ herd.herd }} Feed" href="{% url "herd_feed" herd.herd %}">{{ herd.herd }}</a> <a title="{{ herd.herd }} Feed" href="{% url "herd_feed" herd.herd %}">
{{ herd.herd|truncatechars:15 }}
</a>
</li> </li>
{% endblock %} {% endblock %}

View File

@ -12,7 +12,7 @@
<li> <li>
<img src="{{ STATIC_URL }}/img/feed.png" alt="feed" /> <img src="{{ STATIC_URL }}/img/feed.png" alt="feed" />
<a title="{{ maintainer.name }} Feed" href="{% url "maintainer_feed" maintainer.id %}"> <a title="{{ maintainer.name }} Feed" href="{% url "maintainer_feed" maintainer.id %}">
{{ maintainer.name }} {{ maintainer.name|truncatechars:15 }}
</a> </a>
</li> </li>
{% endblock %} {% endblock %}

View File

@ -14,7 +14,7 @@
<li> <li>
<img src="{{ STATIC_URL }}/img/feed.png" alt="feed" /> <img src="{{ STATIC_URL }}/img/feed.png" alt="feed" />
<a title="{{ package }} Feed" href="{% url "package_feed" package.category package.name %}"> <a title="{{ package }} Feed" href="{% url "package_feed" package.category package.name %}">
{{ package }} {{ package|truncatechars:15 }}
</a> </a>
</li> </li>
{% endblock %} {% endblock %}
@ -25,8 +25,9 @@
{% block content %} {% block content %}
<div class="refresh-alert alert {% if not refreshed %}hide{% endif %}"> <div class="refresh-alert alert {% if not refresh_pos %}hide{% endif %}">
A refresh request is in progress, please wait... <p>A refresh request is in progress, please wait...</p>
<p>Queue position: <span id="refresh-pos">{{ refresh_pos }}</span></p>
</div> </div>
<h2> <h2>
@ -34,10 +35,10 @@
{% if user.is_authenticated %} {% if user.is_authenticated %}
<span class="pull-right"> <span class="pull-right">
<button class="btn refresh-button {% if refreshed %}hide{% endif %}" data-category="{{ package.category }}" data-package="{{ package.name }}"> <button class="btn refresh-button {% if refresh_requested %}hide{% endif %}" data-category="{{ package.category }}" data-package="{{ package.name }}">
<img src="{{ STATIC_URL}}/img/refresh-active.png" alt="Refresh" > <img src="{{ STATIC_URL}}/img/refresh-active.png" alt="Refresh" >
</button> </button>
<button class="btn refresh-button-disabled disabled {% if not refreshed %}hide{% endif %}"> <button class="btn refresh-button-disabled disabled {% if not refresh_requested %}hide{% endif %}">
<img src="{{ STATIC_URL}}/img/refresh-inactive.png" alt="Refresh" > <img src="{{ STATIC_URL}}/img/refresh-inactive.png" alt="Refresh" >
</button> </button>
<button class="btn favourite-button {% if favourited %}hide{% endif %}" data-url="{% url "favourite_package" package.category package.name %}"> <button class="btn favourite-button {% if favourited %}hide{% endif %}" data-url="{% url "favourite_package" package.category package.name %}">
@ -67,9 +68,10 @@
$(".refresh-button").click(function() { $(".refresh-button").click(function() {
var url = "{% url "refresh_package" "XXX" "YYY" %}"; var url = "{% url "refresh_package" "XXX" "YYY" %}";
$.post(url.replace("XXX", $(this).data("category")).replace("YYY", $(this).data("package")), $.post(url.replace("XXX", $(this).data("category")).replace("YYY", $(this).data("package")),
function() { function(data) {
$(".refresh-button").addClass("hide"); $(".refresh-button").addClass("hide");
$(".refresh-button-disabled").removeClass("hide"); $(".refresh-button-disabled").removeClass("hide");
$("#refresh-pos").text(data.position);
$(".refresh-alert").show("slow"); $(".refresh-alert").show("slow");
}); });
}); });

View File

@ -195,10 +195,13 @@ def package(request, category, package):
favourited = True favourited = True
try: try:
refreshed = request.user in \ refresh_query = RefreshPackageQuery.objects.get(package=package)
RefreshPackageQuery.objects.get(package=package).users.all()
except RefreshPackageQuery.DoesNotExist: except RefreshPackageQuery.DoesNotExist:
refreshed = False refresh_requested = False
refresh_pos = None
else:
refresh_requested = request.user in refresh_query.users.all()
refresh_pos = refresh_query.position
return { return {
'package': package, 'package': package,
@ -206,10 +209,11 @@ def package(request, category, package):
'upstream': upstream, 'upstream': upstream,
'vlog': vlog, 'vlog': vlog,
'log': log, 'log': log,
'msg': log.messages() if log else "", 'msg': log.messages if log else "",
'last_scan': last_scan, 'last_scan': last_scan,
'favourited': favourited, 'favourited': favourited,
'refreshed': refreshed, 'refresh_requested': refresh_requested,
'refresh_pos': refresh_pos,
} }
@ -245,7 +249,7 @@ def problem(request, category, package):
'package': package, 'package': package,
'packaged': packaged, 'packaged': packaged,
'upstream': upstream, 'upstream': upstream,
'msg': log.messages() if log else "", 'msg': log.messages if log else "",
} }
@ -369,7 +373,8 @@ def refresh_package(request, category, package):
if not created: if not created:
obj.priority += 1 obj.priority += 1
obj.save() obj.save()
return {"result": "success"}
return {"result": "success", "position": obj.position}
@login_required @login_required

View File

@ -6,7 +6,7 @@ import json
from euscan import helpers, output from euscan import helpers, output
HANDLER_NAME = "cpan" HANDLER_NAME = "cpan"
CONFIDENCE = 100.0 CONFIDENCE = 100
PRIORITY = 90 PRIORITY = 90
_cpan_package_name_re = re.compile("mirror://cpan/authors/.*/([^/.]*).*") _cpan_package_name_re = re.compile("mirror://cpan/authors/.*/([^/.]*).*")

View File

@ -1,7 +1,8 @@
from urlparse import urljoin from urlparse import urljoin, urlparse
import urllib2 import urllib2
import re import re
import StringIO import StringIO
import difflib
try: try:
from BeautifulSoup import BeautifulSoup from BeautifulSoup import BeautifulSoup
@ -14,11 +15,40 @@ from euscan import CONFIG, SCANDIR_BLACKLIST_URLS, \
BRUTEFORCE_BLACKLIST_PACKAGES, BRUTEFORCE_BLACKLIST_URLS, output, helpers BRUTEFORCE_BLACKLIST_PACKAGES, BRUTEFORCE_BLACKLIST_URLS, output, helpers
HANDLER_NAME = "generic" HANDLER_NAME = "generic"
CONFIDENCE = 50.0 CONFIDENCE = 45
PRIORITY = 0 PRIORITY = 0
BRUTEFORCE_HANDLER_NAME = "brute_force" BRUTEFORCE_HANDLER_NAME = "brute_force"
BRUTEFORCE_CONFIDENCE = 30.0 BRUTEFORCE_CONFIDENCE = 30
def confidence_score(found, original, minimum=CONFIDENCE):
found_p = urlparse(found)
original_p = urlparse(original)
# check if the base url is the same
if found_p.netloc != original_p.netloc:
return minimum
# check if the directory depth is the same
if len(found_p.path.split("/")) != len(original_p.path.split("/")):
return minimum
# strip numbers
found_path = re.sub(r"[\d+\.]?", "", found_p.path)
original_path = re.sub(r"[\d+\.]?", "", original_p.path)
# strip the first equal part of the path
i = 0
max_i = len(found_path)
while i < max_i and found_path[i] == original_path[i]:
i += 1
found_path = found_path[i:]
original_path = original_path[i:]
# calculate difference ratio
diff = difflib.SequenceMatcher(None, found_path, original_path).ratio()
return int(minimum + minimum * diff) # maximum score is minimum * 2
def scan_html(data, url, pattern): def scan_html(data, url, pattern):
@ -98,7 +128,8 @@ def scan_directory_recursive(cp, ver, rev, url, steps, orig_url):
path = urljoin(url, path) path = urljoin(url, path)
if not steps and path not in orig_url: if not steps and path not in orig_url:
versions.append((path, pv, HANDLER_NAME, CONFIDENCE)) confidence = confidence_score(path, orig_url)
versions.append((path, pv, HANDLER_NAME, confidence))
if steps: if steps:
ret = scan_directory_recursive(cp, ver, rev, path, steps, orig_url) ret = scan_directory_recursive(cp, ver, rev, path, steps, orig_url)
@ -209,14 +240,14 @@ def brute_force(pkg, url):
if helpers.version_filtered(cp, ver, version): if helpers.version_filtered(cp, ver, version):
continue continue
url = helpers.url_from_template(template, version) try_url = helpers.url_from_template(template, version)
infos = helpers.tryurl(url, template) infos = helpers.tryurl(try_url, template)
if not infos: if not infos:
continue continue
confidence = confidence_score(try_url, url,
result.append([url, version, BRUTEFORCE_HANDLER_NAME, minimum=BRUTEFORCE_CONFIDENCE)
BRUTEFORCE_CONFIDENCE]) result.append([try_url, version, BRUTEFORCE_HANDLER_NAME, confidence])
if len(result) > CONFIG['brute-force-false-watermark']: if len(result) > CONFIG['brute-force-false-watermark']:
output.einfo( output.einfo(

View File

@ -7,7 +7,7 @@ import portage
from euscan import helpers, output from euscan import helpers, output
HANDLER_NAME = "github" HANDLER_NAME = "github"
CONFIDENCE = 100.0 CONFIDENCE = 100
PRIORITY = 90 PRIORITY = 90

View File

@ -6,7 +6,7 @@ import xml.dom.minidom
from euscan import helpers, output from euscan import helpers, output
HANDLER_NAME = "php" HANDLER_NAME = "php"
CONFIDENCE = 100.0 CONFIDENCE = 100
PRIORITY = 90 PRIORITY = 90

View File

@ -6,7 +6,7 @@ import portage
from euscan import helpers, output from euscan import helpers, output
HANDLER_NAME = "pypi" HANDLER_NAME = "pypi"
CONFIDENCE = 100.0 CONFIDENCE = 100
PRIORITY = 90 PRIORITY = 90

View File

@ -6,7 +6,7 @@ import urllib2
from euscan import helpers, output from euscan import helpers, output
HANDLER_NAME = "rubygems" HANDLER_NAME = "rubygems"
CONFIDENCE = 100.0 CONFIDENCE = 100
PRIORITY = 90 PRIORITY = 90

View File

@ -179,7 +179,7 @@ def scan_upstream(query, on_progress=None):
not is_version_stable(version): not is_version_stable(version):
continue continue
if CONFIG['progress']: if CONFIG['progress']:
print ("", file=sys.stderr) print("", file=sys.stderr)
output.result(cp, version, url, handler, confidence) output.result(cp, version, url, handler, confidence)
return result return result