虫言虫语 虫言虫语

Python编写单线程爬虫

in Pythonread (45967) 文章转载请注明来源!
    #!/usr/bin/env/ python3
import re
import urllib.request
import os

def picurl(url,path):
    if os.path.exists(path):
        print(path + 'exist')
    else:
        os.makedirs(path)
    while True:
        html = loadurl(url)
        if html == '':
            print('load' + url + 'error')
            continue
        else:
            break
    pic_list(re_pic(url),path)

def save_pic(url,path):
    searchname = '.*/(.*?.jpg)|(.*?.gif)'
    name = re.findall(searchname,url)
    filename = path + "/" + name[0][0]
    print(filename + ':start')
    tryTimes = 3
    while tryTimes != 0:
        tryTimes -= 1
        if os.path.exists(filename):
            print(filename + 'exists,skip')
            return True
        else:
            print(filename)
            open(filename,'wb')
        if download(url,filename):
            break
    if tryTimes != 0:
        print(filename + ': Over')
    else:
        print(url + ': Failed to download')

def download(url,filename):
    req = urllib.request.Request(url)
    req.add_header('User-Agent','Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:36.0) Gecko/20100101 Firefox/36.0')
    try:
        conn = urllib.request.urlopen(req,timeout=5)
        with open(filename,'wb') as f:
            f.write(conn.read())
            f.close()
        return True
    except urllib.error.URLError:
        print('load' + url + 'error')
        return False
    except Exception:
        print('unkown exception in conn.read()')
        return ''
def pic_list(picList,path):
    a = ' '.join(picList)
    b = re.findall('(\w+://[^\s]+)',a)
    for picurl in b:
        save_pic(picurl,path)

def re_pic(url):
    name_third = []
    name_fourth = []
    searchname = '<img alt=\"([^\"]*)\"( class="scrollLoading")?(  style="width:100%;")?\s*src=\"(\w+://[^\s]+)\"( title=\"([^\"]*)\")? />'
    name = re.findall(searchname,loadurl(url))
    for i in range(len(name)):
        name_first = name[i]
        for e in range(len(name_first)):
            name_second = name_first[e]
            if 'class' in name_second or 'style' in name_second or 'title' in name_second:
                continue
            else:
                name_third.append(name_second)
    name_fourth = sorted(set(name_third),key=name_third.index)
    return(name_fourth)

def loadurl(url):
    req = urllib.request.Request(url)
    req.add_header('User-Agent','Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:36.0) Gecko/20100101 Firefox/36.0')
    try:
        conn = urllib.request.urlopen(req)
        html = conn.read()
        html = html.decode('GBK')
        return html
    except urllib.error.URLError:
        return ''
    except Exception:
        print('unkown exception in conn.read()')
        return ''

if __name__ == '__main__':
    url = 'http://www.meizitu.com/a/454.html'
    picurl(url,'D:/WW')
jrotty WeChat Pay

微信打赏

jrotty Alipay

支付宝打赏

文章二维码

扫描二维码,在手机上阅读!

发表新评论
已有 11867 条评论
  1. vtgdretelf
    vtgdretelf
    回复

    cialis online http://cialonlinesja.com/ - cialis buy cialis 20mg cialis cialis 20mg

  2. vtgdretelf
    vtgdretelf
    回复

    buy cialis http://cialonlinesja.com/ - buy cialis cialis online generic cialis cialis online

  3. KimDes
    KimDes
    回复

    prozac buy india

  4. JackDes
    JackDes
    回复

    clomid coupon buy generic indocin lipitor daily order medrol robaxin 750 tabs amoxil 250 price keflex on line ampicillin tablet

  5. vthDrurb
    vthDrurb
    回复

    buy generic 100mg viagra online http://buyviagenonline.com/ - viagra womens viagra buy viagra online viagra

  6. Acorbjach
    Acorbjach
    回复

    how effective is antiviral medication for cold sores antiviral medications for herpes zoster online farmacia comprar viagra sin receta comprar viagra antiviral drugs for flu for toddlers. antiviral medication for cats Prevention of Coronavirus Disease 2019 20 3cccb37 , the antiviral drug is. the antiviral drug tamiflu, what yeast infection treatment should i use.
    En linea farmacia la comprar viagra generico is coronavirus curable in cats, antiviral medicines for cold sores. antiviral treatment for hiv, antiviral drug for influenza antiviral drug means. Online farmacia comprar tadalafil site what is an antiviral for flu, antiviral drug definition biology. what is a logical antiviral drug target in retroviruses, over the counter antiviral meds for herpes enter now, a high volume.

  7. vttqabaxy
    vttqabaxy
    回复

    buy generic 100mg viagra online http://buyviagricxr.com/ - generic viagra walmart generic viagra viagra for sale viagra for sale

  8. wdriLiene
    wdriLiene
    回复

    cialis buy http://cialishe.com/ - cialis 20mg п»їcialis cialis generic online cialis

  9. JackDes
    JackDes
    回复

    generic prozac online best generic paxil advair drug prices buy amoxicillin 500mg uk online buy sildenafil over the counter

  10. JaneDes
    JaneDes
    回复

    medication robaxin 750

  11. tadalafil canadian pharmacy

    Limit the hands in the service of two events. free slots online online casino real money

  12. JackDes
    JackDes
    回复

    trazodone pill anafranil over the counter atenolol 50 mg buy hydroxychloroquine

  13. vpkcrure
    vpkcrure
    回复

    buy viagra online [url=http://cheapgogenvia.com/]womens viagra[/url] viagra generic viagra side effects buy viagra online

  14. SueDes
    SueDes
    回复

    stromectol tablets price purchase flagyl dapoxetine 60 mg tablet price purchase plavix online lasix 100mg online xenical prescription coupon buy amoxil online usa generic bactrim ds avana 522 silagra 25 mg price

  15. SueDes
    SueDes
    回复

    rx medrol where to buy zithromax online cialis tablets where to buy colchicine tablets sildalis cheap wellbutrin brand sildenafil 20 mg online buspar 115mg india tadalafil comparison strattera online pharmacy

  16. JoeDes
    JoeDes
    回复

    ivermectin 3mg neurontin 800

  17. JimDes
    JimDes
    回复

    buspar canada 160 bactrim erythromycin australia ciprofloxacin mail online generic avana avodart cost uk

  18. LisaDes
    LisaDes
    回复

    finpecia tablets online

  19. vtgdretelf
    vtgdretelf
    回复

    cialis 20mg http://cialonlinesja.com/ - cialis buy cialis 20mg cialis buy online cialis

  20. SueDes
    SueDes
    回复

    amoxicillin cost in india can i buy colchicine dapoxetine 30 price in india buy hydrochlorothiazide 50 mg estrace cream medication cephalexin 750 mg tablets tretinoin prescription online canada buy kamagra 100 prozac canada price prednisolone 25mg tablet

博客已萌萌哒运行
© 2020 由 Typecho 强力驱动.Theme by Yodu
前篇 后篇
雷姆
拉姆