from urlparse import urlparse def optvalue(alist, key, default=[]): if key in alist: return alist[key] return default def domain(url): return urlparse(url).netloc def join(tags): return "|".join(['//'+tag for tag in tags]) def relevance(kds, wds): if len(kds) == 0 or len(wds) == 0: return 0 return reduce(lambda acc, kw: float(wds.count(kw)) + acc, kds, 0)/len(kds + wds) def is_absurl(url): return reduce(lambda acc, p: url.startswith(p) or acc, [u'http://', u'https://', u'//'], False)