Teadmised note

Allikas: Lambda

import urllib import urllib.request import json

def m(url):

 #print ("reading html")
 try:  
   u=urllib.request.urlopen(url)
   html = u.read().decode("utf-8")
 except:
   print("failed to read url")
   return
 #print (str(html))
 r=html.replace("."," . ").replace(","," ").replace("\""," ")
 r=r.replace("<"," ").replace(">", " ")
 s=r.split()
 d={}
 for w in s:
   if w in d:
     d[w]=d[w]+1
   else:
     d[w]=1
 l=[]
 for k in d:
   l.append((k,d[k]))
 t=sorted(l, key=lambda e: e[1])   
 print (str(t))

def g():

 print ("googling")
 start=0
 while start<100:
     print("start "+str(start))
     url="https://ajax.googleapis.com/ajax/services/search/web?v=1.0&rsz=8"
     url+="&start="+str(start)
     url+="&q="
     url+="Andrus+Ansip"
     try:
         u=urllib.request.urlopen(url)
         html = u.read().decode("iso-8859-1")
         j=json.loads(html)
         if not j or not "responseData" in j:
           print ("failed to read/parse")
           return
         p=j["responseData"]
         if not p or not "results" in p:
           print("no more results")
           return
         p=p["results"]
         for r in p:
           url=r["url"]
           print ("******* url *****")
           print (str(url))
           m(url)
     except:
       print("failed to read google search url")      
     start+=8

g()