diff --git a/src/assets/node-ytdl-core/sig.js b/src/assets/node-ytdl-core/sig.js new file mode 100644 index 00000000..6ecb161f --- /dev/null +++ b/src/assets/node-ytdl-core/sig.js @@ -0,0 +1,151 @@ +/* Copyright (C) 2012-present by fent + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +const jsVarStr = '[a-zA-Z_\\$][a-zA-Z_0-9]*'; +const jsSingleQuoteStr = `'[^'\\\\]*(:?\\\\[\\s\\S][^'\\\\]*)*'`; +const jsDoubleQuoteStr = `"[^"\\\\]*(:?\\\\[\\s\\S][^"\\\\]*)*"`; +const jsQuoteStr = `(?:${jsSingleQuoteStr}|${jsDoubleQuoteStr})`; +const jsKeyStr = `(?:${jsVarStr}|${jsQuoteStr})`; +const jsPropStr = `(?:\\.${jsVarStr}|\\[${jsQuoteStr}\\])`; +const jsEmptyStr = `(?:''|"")`; +const reverseStr = ':function\\(a\\)\\{' + + '(?:return )?a\\.reverse\\(\\)' + +'\\}'; +const sliceStr = ':function\\(a,b\\)\\{' + + 'return a\\.slice\\(b\\)' + +'\\}'; +const spliceStr = ':function\\(a,b\\)\\{' + + 'a\\.splice\\(0,b\\)' + +'\\}'; +const swapStr = ':function\\(a,b\\)\\{' + + 'var c=a\\[0\\];a\\[0\\]=a\\[b(?:%a\\.length)?\\];a\\[b(?:%a\\.length)?\\]=c(?:;return a)?' + +'\\}'; +const actionsObjRegexp = new RegExp( + `var (${jsVarStr})=\\{((?:(?:${ + jsKeyStr}${reverseStr}|${ + jsKeyStr}${sliceStr}|${ + jsKeyStr}${spliceStr}|${ + jsKeyStr}${swapStr + }),?\\r?\\n?)+)\\};`); +const actionsFuncRegexp = new RegExp(`${`function(?: ${jsVarStr})?\\(a\\)\\{` + + `a=a\\.split\\(${jsEmptyStr}\\);\\s*` + + `((?:(?:a=)?${jsVarStr}`}${ + jsPropStr +}\\(a,\\d+\\);)+)` + + `return a\\.join\\(${jsEmptyStr}\\)` + + `\\}`); +const reverseRegexp = new RegExp(`(?:^|,)(${jsKeyStr})${reverseStr}`, 'm'); +const sliceRegexp = new RegExp(`(?:^|,)(${jsKeyStr})${sliceStr}`, 'm'); +const spliceRegexp = new RegExp(`(?:^|,)(${jsKeyStr})${spliceStr}`, 'm'); +const swapRegexp = new RegExp(`(?:^|,)(${jsKeyStr})${swapStr}`, 'm'); + +const swapHeadAndPosition = (arr, position) => { + const first = arr[0]; + arr[0] = arr[position % arr.length]; + arr[position] = first; + + return arr; +} + +function decipher(sig, tokens) { + sig = sig.split(''); + tokens = tokens.split(','); + + for(let i = 0, len = tokens.length; i < len; i++) { + let token = tokens[i], pos; + switch (token[0]) { + case 'r': + sig = sig.reverse(); + break; + case 'w': + pos = ~~token.slice(1); + sig = swapHeadAndPosition(sig, pos); + break; + case 's': + pos = ~~token.slice(1); + sig = sig.slice(pos); + break; + case 'p': + pos = ~~token.slice(1); + sig.splice(0, pos); + break; + } + } + + return sig.join(''); +}; + +function extractActions(body) { + const objResult = actionsObjRegexp.exec(body); + const funcResult = actionsFuncRegexp.exec(body); + + if(!objResult || !funcResult) + return null; + + const obj = objResult[1].replace(/\$/g, '\\$'); + const objBody = objResult[2].replace(/\$/g, '\\$'); + const funcBody = funcResult[1].replace(/\$/g, '\\$'); + + let result = reverseRegexp.exec(objBody); + const reverseKey = result && result[1] + .replace(/\$/g, '\\$') + .replace(/\$|^'|^"|'$|"$/g, ''); + result = sliceRegexp.exec(objBody); + const sliceKey = result && result[1] + .replace(/\$/g, '\\$') + .replace(/\$|^'|^"|'$|"$/g, ''); + result = spliceRegexp.exec(objBody); + const spliceKey = result && result[1] + .replace(/\$/g, '\\$') + .replace(/\$|^'|^"|'$|"$/g, ''); + result = swapRegexp.exec(objBody); + const swapKey = result && result[1] + .replace(/\$/g, '\\$') + .replace(/\$|^'|^"|'$|"$/g, ''); + + const keys = `(${[reverseKey, sliceKey, spliceKey, swapKey].join('|')})`; + const myreg = `(?:a=)?${obj + }(?:\\.${keys}|\\['${keys}'\\]|\\["${keys}"\\])` + + `\\(a,(\\d+)\\)`; + const tokenizeRegexp = new RegExp(myreg, 'g'); + const tokens = []; + + while((result = tokenizeRegexp.exec(funcBody)) !== null) { + const key = result[1] || result[2] || result[3]; + const pos = result[4]; + switch (key) { + case swapKey: + tokens.push(`w${result[4]}`); + break; + case reverseKey: + tokens.push('r'); + break; + case sliceKey: + tokens.push(`s${result[4]}`); + break; + case spliceKey: + tokens.push(`p${result[4]}`); + break; + } + } + + return tokens.join(','); +} diff --git a/src/youtube.js b/src/youtube.js index aed15e9d..be6a8323 100644 --- a/src/youtube.js +++ b/src/youtube.js @@ -1,6 +1,7 @@ const { GLib, GObject, Gst, Soup } = imports.gi; const ByteArray = imports.byteArray; const Debug = imports.src.debug; +const YTDL = imports.src.assets['node-ytdl-core']; const { debug } = Debug; @@ -21,7 +22,6 @@ var YouTubeClient = GObject.registerClass({ /* videoID of current active download */ this.downloadingVideoId = null; - this.downloadAborted = false; this.lastInfo = null; } @@ -47,9 +47,9 @@ var YouTubeClient = GObject.registerClass({ debug(`obtaining YouTube video info: ${videoId}`); this.downloadingVideoId = videoId; - const info = await this._getInfoPromise(videoId).catch(debug); + const [info, isAborted] = await this._getInfoPromise(videoId).catch(debug); if(!info) { - if(this.downloadAborted) + if(isAborted) return reject(new Error('download aborted')); debug(`failed, remaining tries: ${tries}`); @@ -62,15 +62,71 @@ var YouTubeClient = GObject.registerClass({ ) ? 'video is not playable' : (!info.streamingData) - ? 'video response data is missing URIs' + ? 'video response data is missing streaming data' : null; if(invalidInfoMsg) { this.lastInfo = null; - this.emit('info-resolved', false); - this.downloadingVideoId = null; - return reject(new Error(invalidInfoMsg)); + debug(new Error(invalidInfoMsg)); + break; + } + + /* Make sure we have all formats arrays, + * so we will not have to keep checking */ + if(!info.streamingData.formats) + info.streamingData.formats = []; + if(!info.streamingData.adaptiveFormats) + info.streamingData.adaptiveFormats = []; + + const isCipher = this._getIsCipher(info.streamingData); + if(isCipher) { + debug('video requires deciphering'); + + const embedUri = `https://www.youtube.com/embed/${videoId}`; + const [body, isAbortedBody] = + await this._downloadDataPromise(embedUri).catch(debug); + + if(isAbortedBody) + break; + + /* We need matching info, so start from beginning */ + if(!body) + continue; + + const ytPath = body.match(/(?<=jsUrl\":\").*?(?=\")/gs)[0]; + if(!ytPath) { + debug(new Error('could not find YouTube player URI')); + break; + } + const ytUri = `https://www.youtube.com${ytPath}`; + debug(`found player URI: ${ytUri}`); + + /* TODO: cache */ + let actions; + + if(!actions) { + const [pBody, isAbortedPlayer] = + await this._downloadDataPromise(ytUri).catch(debug); + if(!pBody || isAbortedPlayer) { + debug(new Error('could not download player body')); + break; + } + actions = YTDL.sig.extractActions(pBody); + } + + if(!actions || !actions.length) { + debug(new Error('could not extract decipher actions')); + break; + } + debug('successfully obtained decipher actions'); + const isDeciphered = this._decipherStreamingData( + info.streamingData, actions + ); + if(!isDeciphered) { + debug('streaming data could not be deciphered'); + break; + } } this.lastInfo = info; @@ -80,7 +136,7 @@ var YouTubeClient = GObject.registerClass({ return resolve(info); } - /* Do not clear video info here, as we still have + /* Do not clear video info here, as we might still have * valid info from last video that can be reused */ this.emit('info-resolved', false); this.downloadingVideoId = null; @@ -91,10 +147,7 @@ var YouTubeClient = GObject.registerClass({ getBestCombinedUri(info) { - if( - !info.streamingData.formats - || !info.streamingData.formats.length - ) + if(!info.streamingData.formats.length) return null; const combinedStream = info.streamingData.formats[ @@ -123,6 +176,41 @@ var YouTubeClient = GObject.registerClass({ return true; } + _downloadDataPromise(url) + { + return new Promise((resolve, reject) => { + const message = Soup.Message.new('GET', url); + let data = ''; + + const chunkSignal = message.connect('got-chunk', (msg, chunk) => { + debug(`got chunk of data, length: ${chunk.length}`); + + const chunkData = chunk.get_data(); + data += (chunkData instanceof Uint8Array) + ? ByteArray.toString(chunkData) + : chunkData; + }); + + this.queue_message(message, (session, msg) => { + msg.disconnect(chunkSignal); + + debug('got message response'); + const statusCode = msg.status_code; + + /* Internal Soup codes mean download aborted + * or some other error that cannot be handled + * and we do not want to retry in such case */ + if(statusCode < 10) + return resolve([null, true]); + + if(statusCode !== 200) + return reject(new Error(`response code: ${statusCode}`)); + + resolve([data, false]); + }); + }); + } + _getCurrentDownloadPromise() { debug('resolving after current download finishes'); @@ -145,36 +233,20 @@ var YouTubeClient = GObject.registerClass({ _getInfoPromise(videoId) { return new Promise((resolve, reject) => { - const url = `https://www.youtube.com/get_video_info?video_id=${videoId}&el=embedded`; - const message = Soup.Message.new('GET', url); - let data = ''; + const query = [ + `video_id=${videoId}`, + `el=embedded`, + `eurl=https://youtube.googleapis.com/v/${videoId}`, + ].join('&'); + const url = `https://www.youtube.com/get_video_info?${query}`; - const chunkSignal = message.connect('got-chunk', (msg, chunk) => { - debug(`got chunk of data, length: ${chunk.length}`); - - const chunkData = chunk.get_data(); - data += (chunkData instanceof Uint8Array) - ? ByteArray.toString(chunkData) - : chunkData; - }); - - this.queue_message(message, (session, msg) => { - msg.disconnect(chunkSignal); - - debug('got message response'); - - const statusCode = msg.status_code; - - /* Internal Soup codes mean download abort - * or some other error that cannot be handled */ - this.downloadAborted = (statusCode < 10); - - if(statusCode !== 200) - return reject(new Error(`response code: ${statusCode}`)); + this._downloadDataPromise(url).then(res => { + if(res[1]) + return resolve([null, true]); debug('parsing video info JSON'); - const gstUri = Gst.Uri.from_string('?' + data); + const gstUri = Gst.Uri.from_string('?' + res[0]); if(!gstUri) return reject(new Error('could not convert query to URI')); @@ -193,11 +265,126 @@ var YouTubeClient = GObject.registerClass({ return reject(new Error('could not parse video info JSON')); debug('successfully parsed video info JSON'); - - resolve(info); - }); + resolve([info, false]); + }) + .catch(err => reject(err)); }); } + + _getIsCipher(data) + { + /* Check only first best combined, + * AFAIK there are no videos without it */ + if(data.formats[0].url) + return false; + + if( + data.formats[0].signatureCipher + || data.formats[0].cipher + ) + return true; + + /* FIXME: no URLs and no cipher, what now? */ + debug(new Error('no url or cipher in streams')); + + return false; + } + + _decipherStreamingData(data, actions) + { + debug('checking cipher query keys'); + + /* Cipher query keys should be the same for all + * streams, so parse any stream to get their names */ + const anyStream = data.formats[0] || data.adaptiveFormats[0]; + const sigQuery = anyStream.signatureCipher || anyStream.cipher; + + if(!sigQuery) + return false; + + const gstUri = Gst.Uri.from_string('?' + sigQuery); + const queryKeys = gstUri.get_query_keys(); + + const cipherKey = queryKeys.find(key => { + const value = gstUri.get_query_value(key); + /* A long value that is not URI */ + return ( + value.length > 32 + && !Gst.Uri.is_valid(value) + ); + }); + if(!cipherKey) { + debug('no stream cipher key name'); + return false; + } + + const sigKey = queryKeys.find(key => { + const value = gstUri.get_query_value(key); + /* A short value that is not URI */ + return ( + value.length < 32 + && !Gst.Uri.is_valid(value) + ); + }); + if(!sigKey) { + debug('no stream signature key name'); + return false; + } + + const urlKey = queryKeys.find(key => + Gst.Uri.is_valid(gstUri.get_query_value(key)) + ); + if(!urlKey) { + debug('no stream URL key name'); + return false; + } + + const cipherKeys = { + url: urlKey, + sig: sigKey, + cipher: cipherKey, + }; + + debug('deciphering streams'); + + for(let format of [data.formats, data.adaptiveFormats]) { + for(let stream of format) { + const formatUrl = this._getDecipheredUrl( + stream, actions, cipherKeys + ); + if(!formatUrl) { + debug('undecipherable stream'); + debug(stream); + + return false; + } + stream.url = formatUrl; + } + } + debug('all streams deciphered'); + + return true; + } + + _getDecipheredUrl(stream, actions, queryKeys) + { + debug(`deciphering stream id: ${stream.itag}`); + + const sigQuery = stream.signatureCipher || stream.cipher; + if(!sigQuery) return null; + + const gstUri = Gst.Uri.from_string('?' + sigQuery); + + const url = gstUri.get_query_value(queryKeys.url); + const cipher = gstUri.get_query_value(queryKeys.cipher); + const sig = gstUri.get_query_value(queryKeys.sig); + + const key = YTDL.sig.decipher(cipher, actions); + + debug('stream deciphered'); + + return `${url}&${sig}=${key}`; + } }); function checkYouTubeUri(uri)