虫言虫语 虫言虫语

Python编写单线程爬虫

in Pythonread (27697) 文章转载请注明来源!
    #!/usr/bin/env/ python3
import re
import urllib.request
import os

def picurl(url,path):
    if os.path.exists(path):
        print(path + 'exist')
    else:
        os.makedirs(path)
    while True:
        html = loadurl(url)
        if html == '':
            print('load' + url + 'error')
            continue
        else:
            break
    pic_list(re_pic(url),path)

def save_pic(url,path):
    searchname = '.*/(.*?.jpg)|(.*?.gif)'
    name = re.findall(searchname,url)
    filename = path + "/" + name[0][0]
    print(filename + ':start')
    tryTimes = 3
    while tryTimes != 0:
        tryTimes -= 1
        if os.path.exists(filename):
            print(filename + 'exists,skip')
            return True
        else:
            print(filename)
            open(filename,'wb')
        if download(url,filename):
            break
    if tryTimes != 0:
        print(filename + ': Over')
    else:
        print(url + ': Failed to download')

def download(url,filename):
    req = urllib.request.Request(url)
    req.add_header('User-Agent','Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:36.0) Gecko/20100101 Firefox/36.0')
    try:
        conn = urllib.request.urlopen(req,timeout=5)
        with open(filename,'wb') as f:
            f.write(conn.read())
            f.close()
        return True
    except urllib.error.URLError:
        print('load' + url + 'error')
        return False
    except Exception:
        print('unkown exception in conn.read()')
        return ''
def pic_list(picList,path):
    a = ' '.join(picList)
    b = re.findall('(\w+://[^\s]+)',a)
    for picurl in b:
        save_pic(picurl,path)

def re_pic(url):
    name_third = []
    name_fourth = []
    searchname = '<img alt=\"([^\"]*)\"( class="scrollLoading")?(  style="width:100%;")?\s*src=\"(\w+://[^\s]+)\"( title=\"([^\"]*)\")? />'
    name = re.findall(searchname,loadurl(url))
    for i in range(len(name)):
        name_first = name[i]
        for e in range(len(name_first)):
            name_second = name_first[e]
            if 'class' in name_second or 'style' in name_second or 'title' in name_second:
                continue
            else:
                name_third.append(name_second)
    name_fourth = sorted(set(name_third),key=name_third.index)
    return(name_fourth)

def loadurl(url):
    req = urllib.request.Request(url)
    req.add_header('User-Agent','Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:36.0) Gecko/20100101 Firefox/36.0')
    try:
        conn = urllib.request.urlopen(req)
        html = conn.read()
        html = html.decode('GBK')
        return html
    except urllib.error.URLError:
        return ''
    except Exception:
        print('unkown exception in conn.read()')
        return ''

if __name__ == '__main__':
    url = 'http://www.meizitu.com/a/454.html'
    picurl(url,'D:/WW')
jrotty WeChat Pay

微信打赏

jrotty Alipay

支付宝打赏

文章二维码

扫描二维码,在手机上阅读!

发表新评论
已有 7550 条评论
  1. Dennvit
    Dennvit
    回复

    Parkas: Immovable this obligation is in a fouling at 2Р’РІ 8Р’C (36Р’РІ 46Р’F) tadalafil tablets generic viagra shipped from usa

  2. econcorenny
    econcorenny
    回复

    bonus casino vegas world free slots online casino slots parx online casino http://onlinecasinosgtx.com/ - 888 casino download

  3. RafSeinaclecoca

    http://onlinecasinosgtx.com/ casino games free slots best online casino free games online no download no registration free las vegas slot machines

  4. econcorenny
    econcorenny
    回复

    cbd oil full spectrum yaa health store cbd stocks 100 pure cbd hemp oil cbd oil where to buy cbd oil http://cbdoilwalm.com/ - hemp oil vs cbd oil

  5. innodield
    innodield
    回复

    http://buycbdoilwalm.com/ hemp oil vs cbd oil benefits of hemp how to use cbd oil sunmed cbd oil

  6. Amitsflormror
    Amitsflormror
    回复

    cbd oil walmart strongest cbd gummies for sale cbd stores near me cbd cream for arthritis pain http://buycbdoilwalm.com/ - buycbdoilcure best cbd oil

  7. Dennvit
    Dennvit
    回复

    and the gassy pull back on is cliff the pyelonephritis utmost Viagra 50 mg Age of leftward extensive http://norxedpill.com/#

  8. econcorenny
    econcorenny
    回复

    trubliss cbd cbd for dogs cbd spray cv sciences cbd oil http://cbdoilwalm.com/ - best hemp oil

  9. swimarumTauff
    swimarumTauff
    回复

    shikai cbd cream caligarden cbd oil reviews best cbd gummies for pain cbd vape cartridges http://buycbdoilwalm.com/ - vape cbd

  10. RafSeinaclecoca

    cbd for dogs cbd oil for cats ananda cbd oil best cbd oil and cbd oil for sale

  11. econcorenny
    econcorenny
    回复

    cbd wax http://cbdoilwalm.com/ - cannativa cbd cbd cream for pain cbd hemp oil walmart

  12. Dennvit
    Dennvit
    回复

    Degrade your conversion sildenafil online the profoundly unobstructed translucent get on cannot oligoclase with still a tympanic flushed with

  13. econcorenny
    econcorenny
    回复

    free online slots no download http://onlinecasinosgtx.com/ - free casino slots with bonus scatter slots 50 lions free slots hollywood casino free online games

  14. Dennvit
    Dennvit
    回复

    Do not pure or volume scollops Viagra overnight shipping In Aethrioscope http://canadianped.com/#

  15. econcorenny
    econcorenny
    回复

    casino games no download no registration no deposit win real cash slot machines for home entertainment free slot games download full version http://onlinecasinosgtx.com/ - free slots with no download or registration

  16. smagmevagype
    smagmevagype
    回复

    best cbd oil http://buycbdoilwalm.com/ - best cbd for pain just cbd shikai cbd cream cbd md

  17. econcorenny
    econcorenny
    回复

    free penny slots with bonus spins http://onlinecasinosgtx.com/ - cashman casino slots lady luck casino free games online casino

  18. Dennvit
    Dennvit
    回复

    For Brief Orchestra Dazed To Our Overhear sildenafil every day are an eye-opener in this clop

  19. econcorenny
    econcorenny
    回复

    cbd oil 500 mg 49 yaa health store http://cbdoilwalm.com/ - defy cbd drink cbd for dogs cbd isolate

  20. innodield
    innodield
    回复

    where to buy cbd cream for pain http://buycbdoilwalm.com/ side effects of cbd oil difference between cbd oil and hemp oil hemp oil for pain relief

博客已萌萌哒运行
© 2020 由 Typecho 强力驱动.Theme by Yodu
前篇 后篇
雷姆
拉姆