将歌词处理成电子书
Feb. 10, 2019
后端的最后一部分。
-
- import json
- import re
-
-
- class Lyric2Book:
- support_format = ['txt', 'html']
- support_ts = ['parallel', 'chunk']
- html_template = """
- <section>
- <h2 class='header'>%(header)s</h2>
- <div class='info'>
- <div class='album'>%(album)s</div>
- <div class='artists'>%(artists)s</div>
- </div>
- <div class='content'>
- %(lyrics)s
- </div>
- </section>
- """
- html_frame = """<!DOCTYPE html>
- <html lang="zh-cn">
- <head>
- <meta charset="utf-8"/>
- <meta name="viewport" content="width=device-width, initial-scale=1" />
- <meta name="referrer" content="never" />
- <title>%s</title>
- </head>
- <body>
- %s
- </body>
- </html>
- """
- lyrics_template = "<div class='content-%(ver)d' >%(content)s</div>"
- txt_template = "%(header)s\n专辑:%(album)s\n作者:%(artists)s\n\n%(lyrics)s\n\n"
- txt_frame = "%s\n\n%s"
-
- def __init__(self, file_format='html', title='Lyrics', typesetting='parallel'):
- self.title = title
- if file_format in Lyric2Book.support_format:
- self.format = file_format
- else:
- raise Exception('Unsupported format: %s.' % file_format)
- self.res = ""
- self.data = ""
- if typesetting in Lyric2Book.support_ts:
- self.ts = typesetting
- else:
- raise Exception('Unsupported typesetting: %s' % typesetting)
-
- def chunk(self, lyrics):
- predlyric = []
-
- for item in lyrics:
- lines = []
- if item is not None:
-
- for line in item.split('\n'):
- line = re.sub('\(\d+,\d+\)', '', line)
- time_tag = ''.join(re.findall('\[.*?\]', line))
- line = [time_tag, line.strip(time_tag)]
- lines.append(line)
- predlyric.append(lines)
- output_section = ''
- if self.format == 'html':
- i = 1
- for item in predlyric:
- output_section += "<div class='content-%d'>\n" % i
- i += 1
- for line in item:
- output_section += "<p><span class='timetag'>%s</span>%s</p> \n" % (re.sub('[\[\]]', '-', line[0]), line[1])
- output_section += "</div>"
- elif self.format == 'txt':
- for item in predlyric:
- output_section += ""
- for line in item:
- format_tag = '-%s- ' % re.sub('[\[\]]', '-', line[0]) if line[0] else ''
- output_section += format_tag + line[1] + '\n'
- return output_section
-
- def parallel(self, lyrics):
- unpredlyric = []
- for item in lyrics:
- lines = {}
- if item is not None:
- for line in item.split('\n'):
- line = re.sub('\(\d+,\d+\)', '', line)
- time_tag = ''.join(re.findall('\[.*?\]', line))
- lines[time_tag] = line.strip(time_tag)
- unpredlyric.append(lines)
- predlyric = []
- for group_tag in list(unpredlyric[0].keys()):
- predlyric += [[group_tag] + [i.get(group_tag, unpredlyric[0][group_tag]) for i in unpredlyric]]
- dparallels = predlyric
- output_section = ''
- if self.format == 'html':
- for item in dparallels:
- output_section += "<div class='content'>\n"
- dtimetag = item[0]
- dcontent = item[1::]
- item_div = "<div class='timetag'>\n<p>{timetag}</p>\n</div>\n<div class='single-lyric'>\n{content}</div>\n"
- output_spar = ''.join("<p class='ver-%d'>%s</p>\n" % (c[0], c[1]) for c in enumerate(dcontent, 1))
- output_section += item_div.format(timetag=re.sub('[\[\]]', ' - ', dtimetag), content=output_spar) + '</div>'
- elif self.format == 'txt':
- for item in dparallels:
- dtimetag = item[0]
- dcontent = item[1::]
- item_div = "{timetag}\n{content}\n"
- output_spar = ''.join("%s\n" % c[1] for c in enumerate(dcontent))
- output_section += item_div.format(timetag=re.sub('[\[\]]', ' - ', dtimetag), content=output_spar)
- return output_section
-
- def doconv(self, sections):
- last_album = ''
- output = ''
- for item in sections:
- header = item['name']
- album = item['album']
- artists = ','.join(item['artists'])
- a = item['lyric']
- if a is not None:
- ly_res = {}
- lyrics = [a['0'], a['1'], a['2']]
- if self.ts == 'parallel':
- ly_res = self.parallel(lyrics)
- elif self.ts == 'chunk':
- ly_res = self.chunk(lyrics)
- if self.format == 'html':
- if last_album != album:
- last_album = album
- output += '<h1>%s</h1>' % album
- output += Lyric2Book.html_template % {'header': header, 'album': album, 'artists': artists, 'lyrics': ly_res}
- elif self.format == 'txt':
- if last_album != album:
- last_album = album
- output += album + '\n\n'
- output += Lyric2Book.txt_template % {'header': header, 'album': album, 'artists': artists, 'lyrics': ly_res}
- self.res = output
-
- def output(self):
- filename = self.title + '.' + self.format
- with open(filename, 'w+', encoding='utf-8') as f:
- res = self.res
- if self.format == 'html':
- res = Lyric2Book.html_frame % (self.title, res)
- elif self.format == 'txt':
- res = Lyric2Book.txt_frame % (self.title, res)
- f.write(res)
-
-
- if __name__ == '__main__':
- with open('bbc.txt', 'r', encoding='utf-8') as file:
- t = Lyric2Book(file_format='thjxt', title='BBC Documentary', typesetting='parallel')
- content = json.loads(file.read())
- t.doconv(content['result'])
- t.output()