虫言虫语 虫言虫语

Python编写单线程爬虫

in Pythonread (91993) 文章转载请注明来源!
    #!/usr/bin/env/ python3
import re
import urllib.request
import os

def picurl(url,path):
    if os.path.exists(path):
        print(path + 'exist')
    else:
        os.makedirs(path)
    while True:
        html = loadurl(url)
        if html == '':
            print('load' + url + 'error')
            continue
        else:
            break
    pic_list(re_pic(url),path)

def save_pic(url,path):
    searchname = '.*/(.*?.jpg)|(.*?.gif)'
    name = re.findall(searchname,url)
    filename = path + "/" + name[0][0]
    print(filename + ':start')
    tryTimes = 3
    while tryTimes != 0:
        tryTimes -= 1
        if os.path.exists(filename):
            print(filename + 'exists,skip')
            return True
        else:
            print(filename)
            open(filename,'wb')
        if download(url,filename):
            break
    if tryTimes != 0:
        print(filename + ': Over')
    else:
        print(url + ': Failed to download')

def download(url,filename):
    req = urllib.request.Request(url)
    req.add_header('User-Agent','Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:36.0) Gecko/20100101 Firefox/36.0')
    try:
        conn = urllib.request.urlopen(req,timeout=5)
        with open(filename,'wb') as f:
            f.write(conn.read())
            f.close()
        return True
    except urllib.error.URLError:
        print('load' + url + 'error')
        return False
    except Exception:
        print('unkown exception in conn.read()')
        return ''
def pic_list(picList,path):
    a = ' '.join(picList)
    b = re.findall('(\w+://[^\s]+)',a)
    for picurl in b:
        save_pic(picurl,path)

def re_pic(url):
    name_third = []
    name_fourth = []
    searchname = '<img alt=\"([^\"]*)\"( class="scrollLoading")?(  style="width:100%;")?\s*src=\"(\w+://[^\s]+)\"( title=\"([^\"]*)\")? />'
    name = re.findall(searchname,loadurl(url))
    for i in range(len(name)):
        name_first = name[i]
        for e in range(len(name_first)):
            name_second = name_first[e]
            if 'class' in name_second or 'style' in name_second or 'title' in name_second:
                continue
            else:
                name_third.append(name_second)
    name_fourth = sorted(set(name_third),key=name_third.index)
    return(name_fourth)

def loadurl(url):
    req = urllib.request.Request(url)
    req.add_header('User-Agent','Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:36.0) Gecko/20100101 Firefox/36.0')
    try:
        conn = urllib.request.urlopen(req)
        html = conn.read()
        html = html.decode('GBK')
        return html
    except urllib.error.URLError:
        return ''
    except Exception:
        print('unkown exception in conn.read()')
        return ''

if __name__ == '__main__':
    url = 'http://www.meizitu.com/a/454.html'
    picurl(url,'D:/WW')
jrotty WeChat Pay

微信打赏

jrotty Alipay

支付宝打赏

文章二维码

扫描二维码,在手机上阅读!

发表新评论
已有 28868 条评论
  1. Ellvady
    Ellvady
    回复

    Viagra 100mg Kosten Zithromax For Sale Uk generic viagra Zentel Bendapar

  2. Lesswesty
    Lesswesty
    回复

    Nexium Online Canada Costo Levitra Prix Viagra Pharmacie Quebec best online levitra Zithromax Cardiac Warning

  3. Ellvady
    Ellvady
    回复

    Viagra Free Delivery viagra Viagra Kaufen Nurnberg Cialis Generique Avec Dapoxetine How Is Amoxil Used

  4. Ellvady
    Ellvady
    回复

    Remboursement Viagra Belgique Zithromax Children Dosage Fluoxetine 20mg viagra Que Es El Viagra Femenino Zithromax Dosage Pneumonia

  5. Lesswesty
    Lesswesty
    回复

    Cheapest Non Prescription Celias Foro Kamagra Paypal Discount Generic Fluoxetine Internet In Germany online pharmacy Viagra Cheapest 100mg Soft Discount Zithromax

  6. Ellvady
    Ellvady
    回复

    Legal Flagyl 400 Mg Online Cialis 2 5 Acyclovir 400 No Prescription Online buy cialis Cialis Generika Bestellen Forum

  7. Lesswesty
    Lesswesty
    回复

    Baclofen Riva Senna Abilify Online Sildenafil Espana viagra online prescription Erythromycin Price In Las Vegas

  8. Ellvady
    Ellvady
    回复

    Belladonna Medication Viagra Tachicardia buy generic cialis online Does Cephalexin Affect Birth Control Pills

  9. Lesswesty
    Lesswesty
    回复

    1 Propecia Vs 5 Propecia el levitra funciona Amoxicilina Website Next Day Free Shipping Kansas City

  10. Ellvady
    Ellvady
    回复

    Viagra And Generic Drug Gel De Kamagra Best Retail Pharmacy Viagra Price buy cialis Commande Seroplex 20 Stendra For Sale Buy The Blue Pill Pharmacy Online

  11. Lesswesty
    Lesswesty
    回复

    Zithromax Suspension Storage Orislat Buy Online Canadian Ems cialis online Amoxicillin In Dogs

  12. MatExcupe
    MatExcupe
    回复

    Unisom In Singapore cialis Viagra Cialis Kaufen Wien Viagra Without A Prescription Canada

  13. Lesswesty
    Lesswesty
    回复

    Generic Viagra Professional Buy Cialis Generic Tadalafil Best Prices Buy Priligy Dapoxetine United States

  14. Stevnisa
    Stevnisa
    回复

    Buy Now Isotretinoin Oratane http://cialibuy.com - Buy Cialis Robaxin Mail Order Cialis Rx One Pharmacy Canada Non Generic

  15. MatExcupe
    MatExcupe
    回复

    Kamagra Kaufen Oberhausen http://cialibuy.com - buy cialis Getmensmeds Reviews Buy Cialis Alcohol Zithromax

  16. Kelsype
    Kelsype
    回复

    Cephalexin Odor http://ggenericcialisle.com - cialis Generic Cipro Overnight Delivery cheapest cialis Bronchitis Zithromax

  17. Stevnisa
    Stevnisa
    回复

    Finasteride Online Canadian Pharmacy http://abuycialisb.com - Cialis Zentel Quick Shipping Cialis Levitra Vente

  18. Kelsype
    Kelsype
    回复

    Propecia Proscar Treat Hair Loss http://cialisjh.com - Buy Cialis Clomid Et Grossesse Posologie Cialis Propecia E Impotencia Comprar

  19. Lesswesty
    Lesswesty
    回复

    Tadalafil Online Us Pharmacy http://apcialisle.com/#718 - Cialis Buy Clonidine Fast Buy Cialis Viagra Torrino

  20. Lesswesty
    Lesswesty
    回复

    Viagra Cialis Netdoktor http://buycialisuss.com - Buy Cialis Vendo Viagra En Mano Barcelona Cialis Precio Propecia

博客已萌萌哒运行
© 2020 由 Typecho 强力驱动.Theme by Yodu
前篇 后篇
雷姆
拉姆