最近想把网易云音乐中的英语听力下载到播放器中,方便随时听。但是只下载音频的话就少了些什么,最好连听力的歌词也下载进去。于是乎写了个小工具,可以批量下载歌单或者专辑中的音乐的歌词,可以省很多时间。
netease-lyric.py
- #coding=utf-8
- import requests
- from bs4 import BeautifulSoup
- import json
- import re,os
- from spider import *
- def get_html(url):
- headers = {
- 'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.77 Safari/537.36',
- 'Referer':'http://music.163.com',
- 'Host':'music.163.com'
- }
- try:
- response = requests.get(url,headers=headers)
- html = response.text
- return html
- except:
- print('request error')
- pass
- def get_lyric(song_id):
- url = 'http://music.163.com/api/song/lyric?' + 'id=' + str(song_id) + '&lv=1&kv=1&tv=-1'
- html = get_html(url)
- json_obj = json.loads(html)
- print(json_obj)
- lyric = json_obj['lrc']['lyric']
- return lyric
- def get_details(song_id):
- url = 'http://music.163.com/api/song/detail/?' + 'id=' + str(song_id) + '&ids=%5B' + str(song_id) + '%5D'
- html = get_html(url)
- print(html)
- json_obj = json.loads(html)
- song_name = json_obj['songs'][0]['name']
- song_artist = json_obj['songs'][0]['artists'][0]['name']
- return song_name,song_artist
- def output_file(song_id):
- song_name,song_artist = get_details(song_id)
- lyric = get_lyric(song_id)
- file_name = song_name + ' - ' + song_artist + '.lrc'
- #替换歌曲名中的斜杠
- file_name = file_name.replace('/','/')
- file = open(file_name,"w+",encoding="utf-8")
- file.writelines(lyric)
- file.close()
- url = "https://music.163.com/album?id=2817001"
- id_list = get_songlist(url)
- print(id_list)
- for iterm in id_list:
- output_file(iterm)
抓取歌单中的歌曲信息,获得id spider.py
- #coding=utf-8
- import requests
- from bs4 import BeautifulSoup
- import json
- import re,os
- def get_html(url):
- headers = {
- 'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.77 Safari/537.36',
- 'Referer':'http://music.163.com',
- 'Host':'music.163.com'
- }
- try:
- response = requests.get(url,headers=headers)
- html = response.text
- return html
- except:
- print('request error')
- pass
- def get_songlist(list_url):
- html = get_html(list_url)
- #print(html)
- soup = BeautifulSoup(html,'lxml')
- #print(soup.ul)
- pattern = re.compile(r'<a href.*?</a>')
- result = pattern.findall(str(soup.ul))
- list = []
- for iterm in result:
- print(iterm)
- song_id_group = re.finditer(r"\d+",iterm)
- song_name_group = re.finditer(r">.*?</a>",iterm)
- for song_id,song_name in zip(song_id_group,song_name_group):
- #print(song_id.group())
- list.append(song_id.group())
- # print(song_name.group()[1:-4])
- return list
效果:
comments powered by Disqus