1
0
mirror of https://github.com/certbot/certbot.git synced 2026-01-26 07:41:33 +03:00

Issue payment challenges for top 10,000 domains

(As measured by either the Alexa or Quantcast lists, which are remarkably
divergent)
This commit is contained in:
Peter Eckersley
2012-11-18 20:05:00 -08:00
parent 8a795cbc32
commit fffb5df59b
3 changed files with 20047 additions and 3 deletions

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -17,10 +17,54 @@ def payment_required(session):
"""Does this session require a payment?"""
# Sample policy: require a payment when total number of requested
# subject names is greater than one.
if r.llen("%s:names" % session) > 1:
#if r.llen("%s:names" % session) > 1:
# return True
# Second example: if any of the names are in the Alexa or Quantcast top
# 10,000, call for a payment
names = r.lrange("%s:names" % session, 0, -1)
for name in names:
if in_top_10k(name): return True
return False
def in_top_10k(hostname):
"""Check whether a hostname is part of a top 10,000 website."""
# That includes subdomains of top 10,000 sites, but not if the subdomain
# is below a public suffix (such as a dynamic DNS provider or hosting
# umbrella, perhaps)
parts = hostname.split(".")
for n in range(2, len(parts)+1):
name_or_parent = ".".join(parts[-n:])
if name_or_parent in top_10k:
return True
else:
return False
# XXX if name_or_parent in public_suffix_list: break
return False
def check_domain(domain):
import string as s
allowed = s.ascii_letters + s.digits + "-."
# top 10k domains should contain dots, and ASCII characters (for the TLD,
# if nothing else).
# XXX The Alexa top10k contains a few IP addresses. This currently
# excludes them, but perhaps it shouldn't...
if len([c for c in domain if c in s.ascii_letters]) == 0: return False
if "." not in domain: return False
return all([c in allowed for c in domain])
have_top_10k = False
def get_top_10k():
data_files = ["data/alexa-top-10k.txt","data/quantast-top-10k.txt"]
global top_10k, have_top_10k
top_10k = {}
for f in data_files:
for line in open(f).readlines():
domain=line.split()[1]
if check_domain(domain):
top_10k[domain] = True
have_top_10k = True
get_top_10k()
def expire_session(session, state):
"""Should this session be expired?"""