Teadmised note
Allikas: Lambda
import urllib import urllib.request import json
def m(url):
#print ("reading html") try: u=urllib.request.urlopen(url) html = u.read().decode("utf-8") except: print("failed to read url") return #print (str(html)) r=html.replace("."," . ").replace(","," ").replace("\""," ") r=r.replace("<"," ").replace(">", " ") s=r.split() d={} for w in s: if w in d: d[w]=d[w]+1 else: d[w]=1 l=[] for k in d: l.append((k,d[k])) t=sorted(l, key=lambda e: e[1]) print (str(t))
def g():
print ("googling") start=0 while start<100: print("start "+str(start)) url="https://ajax.googleapis.com/ajax/services/search/web?v=1.0&rsz=8" url+="&start="+str(start) url+="&q=" url+="Andrus+Ansip" try: u=urllib.request.urlopen(url) html = u.read().decode("iso-8859-1") j=json.loads(html) if not j or not "responseData" in j: print ("failed to read/parse") return p=j["responseData"] if not p or not "results" in p: print("no more results") return p=p["results"] for r in p: url=r["url"] print ("******* url *****") print (str(url)) m(url) except: print("failed to read google search url") start+=8
g()