-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrechapter.py
237 lines (199 loc) · 12 KB
/
rechapter.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
#!/usr/bin/python3
import glob
import json
import datetime
import os
import re
import sys
from subprocess import call, CalledProcessError, check_output, DEVNULL
# get all mkv files in current directory, sort episodes alphabetically
mkv_filenames = glob.glob("*.mkv")
mkv_filenames.sort()
# create and populate dictionary mapping op/ed segment UIDs to filenames
songs = {}
for filename in mkv_filenames:
if ("OP" in filename) or ("ED" in filename):
# write mkvinfo to a file, then grep for the segment UID
with open("mkv_info.txt", "w") as mkvinfo:
call(["mkvinfo", filename], stdout=mkvinfo, stderr=DEVNULL)
segment_uid = check_output('grep "Segment UID" mkv_info.txt', shell=True).decode("utf-8").split(":")[-1].strip()
# this way we can easily figure out which file the ordered chapter is linking in later
songs[segment_uid] = filename
# loop over all mkv files in the current directory
for mkv_file in mkv_filenames:
# ignore any files with "OP" or "ED" in the filename, since we already dealt with those
if ("OP" in mkv_file) or ("ED" in mkv_file):
continue
ordered_chapters = {} # dictionary mapping chapter UIDs to segment UIDs if chapter is ordered
chapter_timecodes = [] # array of timecode dicts containing "start" and "end" as keys
# once again, write mkvinfo to a file, but this time grep for a bunch of chapter info
with open("mkv_info.txt", "w") as mkvinfo:
call(["mkvinfo", mkv_file], stdout=mkvinfo, stderr=DEVNULL)
with open("mkv_info.txt", "r") as mkvinfo:
chapter_info = mkvinfo.read().split("|+ Chapters")[-1]
chapters = chapter_info.split("| + Chapter atom")
# loop through chapters, ignoring the edition info before the first instance of "Chapter atom"
for y in range(len(chapters)):
if y == 0:
continue
# write just this portion to a file so it can be grepped without interference
with open("chapter_info.txt", "w") as info:
info.write(chapters[y])
with open("chapter_info.txt", "r") as info:
chapter_uid = re.search(r'\d+', check_output('grep "Chapter UID" chapter_info.txt', shell=True).decode("utf-8")).group()
try:
segment_uid = check_output('grep "segment UID" chapter_info.txt', shell=True).decode("utf-8").split(":")[-1].strip()
ordered_chapters[chapter_uid] = segment_uid
except CalledProcessError:
# if there's no segment UID, then this chapter isn't referring to an external file
pass
start_time = check_output('grep "Chapter time start" chapter_info.txt', shell=True).decode("utf-8").split(": ")[-1][3:-6]
end_time = check_output('grep "Chapter time end" chapter_info.txt', shell=True).decode("utf-8").split(": ")[-1][3:-6]
timecodes = { "chapter_uid": chapter_uid, "start": start_time, "end": end_time }
chapter_timecodes.append(timecodes)
op_filename = ""
ed_filename = ""
op_timecode = ""
ed_timecode = ""
op_first = False
ed_last = False
# loop through chapters to find insert points
# NOTE: this will set the ED as the OP if there is no OP this episode (keep this in mind for mkvmerge)
for y in range(len(chapter_timecodes)):
try:
segment_uid = ordered_chapters[chapter_timecodes[y]["chapter_uid"]]
if y == len(chapter_timecodes) - 1:
ed_last = True
if len(op_filename) == 0:
op_filename = songs[segment_uid]
else:
ed_filename = songs[segment_uid]
# if this is not the first chapter, then the insert point is the end of the previous chapter
if (y > 0):
if len(ed_filename) == 0:
op_timecode = chapter_timecodes[y - 1]["end"]
else:
ed_timecode = chapter_timecodes[y - 1]["end"]
else:
# if this IS the first chapter, then the OP insert point is just 00:00
op_first = True
op_timecode = chapter_timecodes[y]["start"]
except KeyError:
# only here to prevent Python from throwing a fit when the key doesn't match
pass
# hard code styles for use later in sorting ass file
call(["ffmpeg", "-i", mkv_file, "main.ass", "-y"])
style = ""
with open("main.ass", "r") as main_ass:
style = "\n".join(main_ass.read().split("[V4+ Styles]\n")[-1].split("\n\n[Events]")[0].split("\n")[1:])
# split file by op/ed insert points (yes, this magically still works even if they're right at the edge of the file)
with open("concat_list.txt", "w") as concat_list:
call(["mkvmerge", "-o", "temp.mkv", "--split", "timecodes:" + op_timecode + "," + ed_timecode, "--no-chapters", mkv_file])
# if the episode contains both OP and ED
if len(ed_timecode) > 0:
# if there's a prologue and an epilogue, put them between those and the main content
if not (op_first or ed_last):
call(["mkvmerge", "-o", mkv_file, "temp-001.mkv", "+" + op_filename, "+temp-002.mkv", "+" + ed_filename, "+temp-003.mkv"])
concat_list.write("file 'temp-001.mkv'\n")
concat_list.write("file '" + op_filename + "'\n")
concat_list.write("file 'temp-002.mkv'\n")
concat_list.write("file '" + ed_filename + "'\n")
concat_list.write("file 'temp-003.mkv'\n")
# if there's neither a prologue nor an epilogue, put the main episode between OP and ED
elif (op_first and ed_last):
call(["mkvmerge", "-o", mkv_file, op_filename, "+temp-001.mkv", "+" + ed_filename])
concat_list.write("file '" + op_filename + "'\n")
concat_list.write("file 'temp-001.mkv'\n")
concat_list.write("file '" + ed_filename + "'\n")
# if there's an epilogue but not a prologue...
elif op_first:
call(["mkvmerge", "-o", mkv_file, op_filename, "+temp-001.mkv", "+" + ed_filename, "+temp-002.mkv"])
concat_list.write("file '" + op_filename + "'\n")
concat_list.write("file 'temp-001.mkv'\n")
concat_list.write("file '" + ed_filename + "'\n")
concat_list.write("file 'temp-002.mkv'\n")
# if there's a prologue but not an epilogue...
else:
call(["mkvmerge", "-o", mkv_file, "temp-001.mkv", "+" + op_filename, "+temp-002.mkv", "+" + ed_filename])
concat_list.write("file 'temp-001.mkv'\n")
concat_list.write("file '" + op_filename + "'\n")
concat_list.write("file 'temp-002.mkv'\n")
concat_list.write("file '" + ed_filename + "'\n")
# if the episode contains one or the other song, but not both
elif len(op_timecode) > 0:
# if there's a prologue and main episode (or a main episode and epilogue if this is the ED)
if not (op_first or ed_last):
call(["mkvmerge", "-o", mkv_file, "temp-001.mkv", "+" + op_filename, "+temp-002.mkv"])
concat_list.write("file 'temp-001.mkv'\n")
concat_list.write("file '" + op_filename + "'\n")
concat_list.write("file 'temp-002.mkv'\n")
# if there's a main episode and the song is last
elif ed_last:
call(["mkvmerge", "-o", mkv_file, "+temp-001.mkv", "+" + op_filename])
concat_list.write("file 'temp-001.mkv'\n")
concat_list.write("file '" + op_filename + "'\n")
# if there's a main episode and the song is first
# (technically the case where the episode is just the song would hit this block, but that should never happen)
else:
call(["mkvmerge", "-o", mkv_file, "+" + op_filename, "+temp-001.mkv"])
concat_list.write("file '" + op_filename + "'\n")
concat_list.write("file 'temp-001.mkv'\n")
# if both are missing, obviously we don't want to do anything with the original file (the mkvmerge command should error out in this case)
else:
# remove temp files (not absolutely necessary as they should be overwritten anyway, but just to make sure)
temp_files = glob.glob("temp-*.mkv")
for temp_file in temp_files:
call(["rm", temp_file])
call(["rm", "chapter_info.txt", "main.ass"])
# skip the rest of this iteration because if there's op/ed to process, then the original file should have no playback issues
continue
# run ffmpeg command to merge videos
ffmpeg_array = ["ffmpeg", "-f", "concat", "-safe", "0", "-i", "concat_list.txt", "-c:v", "copy", "-map", "0:v", "-c:a", "copy", "-map", "0:a", "-c:s", "copy", "-map", "0:s"]
ffmpeg_array.append("temp-000.mkv")
ffmpeg_array.append("-y")
call(ffmpeg_array)
# extracting subtitles so we can sort by timecodes and add the main style back in
call(["ffmpeg", "-i", "temp-000.mkv", "temp.ass", "-y"])
ass_start = ""
ass_end = []
with open("temp.ass", "r") as ass_file:
# we don't need to sort anything before the Dialogue lines
ass_array = ass_file.read().split("Dialogue:")
ass_start = ass_array[0]
# add in the style from the main episode
ass_start = ass_start.replace("\n\n[Events]", "\n" + style + "\n\n[Events]")
# sort the Dialogue lines
ass_end = ass_array[1:]
ass_end.sort()
# write the sorted subtitles back to a new ass file
with open("sorted.ass", "w") as ass_sort:
ass_sort.write(ass_start)
for dialogue in ass_end:
ass_sort.write("Dialogue:" + dialogue)
# need to rename file produced by mkvmerge so that the output file can have that name
# (ffmpeg will overwrite the input file and crash if the input and output filenames are the same)
call(["mv", mkv_file, "temp-004.mkv"])
"""
What we're putting into ffmpeg:
1. The combined (prologue)? OP main ED (epilogue)? file produced by mkvmerge
2. The subtitle file, sorted by timecodes with styles from all parts
3. The combined (prologue)? OP main ED (epilogue)? file produced by ffmpeg
What this command outputs:
1. The video stream from #3, because ffmpeg is better at concatenating video (mkvmerge produces artifacts)
2. The audio stream from #1, because it works
3. The subtitle stream from #2, because we needed a separate file to sort them and add the main styles back in
4. The attachment stream from #1, because mkvmerge is better at concatenating attachments (for lack of better phrasing)
"""
ffmpeg_array = ["ffmpeg", "-i", "temp-004.mkv", "-i", "sorted.ass", "-i", "temp-000.mkv",
"-c:v", "copy", "-map", "2:0", "-c:a", "copy", "-map", "0:1", "-map", "1:0", "-map", "0:t",
"-disposition:s:s:0", "default", "-metadata:s:s:0", "language=eng", mkv_file, "-y"]
call(ffmpeg_array)
# remove temp files (not absolutely necessary as they should be overwritten anyway, but just to make sure)
temp_files = glob.glob("temp-*.mkv")
for temp_file in temp_files:
call(["rm", temp_file])
call(["rm", "chapter_info.txt", "main.ass", "concat_list.txt"])
call(["rm", "mkv_info.txt"])
# remove the now unnecessary op/ed files
for song in songs:
call(["rm", songs[song]])