python多线程下载编程_Python多线程结合队列下载百度音乐代码详解
发布日期:2022-02-04 01:43:51 浏览次数:21 分类:技术文章

本文共 2947 字,大约阅读时间需要 9 分钟。

[Python]代码

#!/usr/bin/python

# -*- coding: utf-8 -*-

'''

百度中批量下载某歌手的歌(目前只下载第一页,可以自行拓展)

@author:admin

@qq: 1243385033

'''

import threading, urllib2, os,re,sys

from bs4 import BeautifulSoup

from Queue import Queue

'''目标歌手'''

SINGER = u'亚东'

'''保存路径'''

SAVE_FOLDER = 'F:/music/'

# 查询url

search_url = "http://music.baidu.com/search/song?key=%s&s=1"

# 百度音乐播放盒url

song_url = "http://box.zhangmen.baidu.com/x?op=12&count=1&mtype=1&title="

class Downloader(threading.Thread):

def __init__(self, task):

threading.Thread.__init__(self)

self.task = task

def run(self):

'''覆盖父类的run方法'''

while True:

url = self.task.get()

self.download(url)

self.task.task_done()

def build_path(self, filename):

join = os.path.join

parentPath=join(SAVE_FOLDER,SINGER)

filename = filename + '.mp3'

myPath = join(parentPath, filename)

return myPath

def download(self, url):

'''下载文件'''

sub_url = url.items()

f_name = sub_url[0][0]

req_url = sub_url[0][1]

handle = urllib2.urlopen(req_url)

# 保存路径

save_path = self.build_path(f_name)

with open(save_path, "wb") as handler:

while True:

chunk = handle.read(1024)

if not chunk:

break

handler.write(chunk)

msg = u"已经从 %s下载完成" % req_url

sys.stdout.write(msg)

sys.stdout.flush()

class HttpRequest:

def __init__(self):

self.task = []

self.reg_decode = re.compile('.*?CDATA\[(.*?)\]].*?')

self.reg_encode = re.compile('.*?CDATA\[(.*?)\]].*?')

self.init()

self.target_url = search_url % urllib2.quote(self.encode2utf8(SINGER))

def encode2utf8(self,source):

if source and isinstance(source,(str,unicode)):

source=source.encode("utf8")

return source

return source

def mkDir(self, dir_name):

if not os.path.exists(dir_name):

os.mkdir(dir_name)

def init(self):

self.mkDir(SAVE_FOLDER)

subPath = os.path.join(SAVE_FOLDER, SINGER)

self.mkDir(subPath)

def http_request(self):

global song_url

'''发起请求'''

response=urllib2.urlopen(self.target_url)

# 获取头信息

content = response.read()

response.close()

# 使用BeautifulSoup

html = BeautifulSoup(content, from_encoding="utf8")

# 提取HTML标签

span_tag = html.find_all('div', {"monkey":"song-list"})[0].find_all('span', class_='song-title')

# 遍历List

for a_tag in span_tag:

song_name = unicode(a_tag.find_all("a")[0].get_text())

song_url = song_url + urllib2.quote(self.encode2utf8(song_name))

song_url = song_url + '$$' + urllib2.quote(self.encode2utf8(SINGER)) + '$$$$&url=&listenreelect=0&.r=0.1696378872729838'

xmlfile = urllib2.urlopen(song_url)

xml_content = xmlfile.read()

xmlfile.close()

url1 = re.findall(self.reg_encode, xml_content)

url2 = re.findall(self.reg_decode, xml_content)

if not url1 or not url2:

continue

url = url1[0][:url1[0].rindex('/') + 1] + url2[0]

self.task.append({song_name:url})

return self.task

def start_download(urls):

#创建一个队列

quene=Queue()

#获取list的大小

size=len(urls)

#开启线程

for _ in xrange(size):

t=Downloader(quene)

t.setDaemon(True)

t.start()

#入队列

for url in urls:

quene.put(url)

quene.join()

if __name__=='__main__':

http=HttpRequest()

urls=http.http_request()

start_download(urls)

转载地址:https://blog.csdn.net/weixin_39603908/article/details/110078681 如侵犯您的版权,请留言回复原文章的地址,我们会给您删除此文章,给您带来不便请您谅解!

上一篇:python 通过模板生成文章_自动生成文章_python自动生成文章 - 云+社区 - 腾讯云
下一篇:python包管理工具pip_pip_python包管理工具(pip)下载 v9.0.1官方版 - 121下载站

发表评论

最新留言

哈哈,博客排版真的漂亮呢~
[***.90.31.176]2024年04月08日 20时24分42秒