2021-01-10 13:46:02 -06:00
5 changed files with 96 additions and 14 deletions
--- a/docs/CONFIGURATION.md
+++ b/docs/CONFIGURATION.md
@ -38,17 +38,16 @@ transcode:
 # satyr will generate one source quality variant, and the remaining
 # variants will be of incrementally lower quality and bitrate

+# having more than 4-5 variants will start giving diminishing returns on stream quality for cpu load
+# if you can't afford to generate at least 3 variants, it's recommended to leave adaptive streaming off
+
  inputflags: ""
 # additional flags to apply to the input during transcoding
  outputflags: ""
 # additional flags to apply to the output during transcoding

-# hardware acceleration is a bit difficult to configure programmatically
-# this is a good place to do so for your system
-# https://trac.ffmpeg.org/wiki/HWAccelIntro is a good place to start
-
-# having more than 4-5 variants will start giving diminishing returns on stream quality for cpu load
-# if you can't afford to generate at least 3 variants, it's recommended to leave adaptive streaming off
+hwaccel:
+# See HWACCEL.md for information on configuring hardware acceleration.

 crypto:
  saltRounds: 12
--- a/docs/HWACCEL.md
+++ b/docs/HWACCEL.md
@ -0,0 +1,53 @@
+## Configuration Hardware Acceleration
+Satyr supports the NVENC and VA-API hardware acceleration APIs. If you've configured your system correctly (the hard part) it should be enough to set the type and use the default device setting if you only have one hardware acceleration device.
+
+### System
+Configuring the system for any hardware acceleration API involves three main steps: selecting the right drivers, installing the API libraries, and configuring ffmpeg.
+
+#### NVENC
+NVENC in ffmpeg can work with either open-source drivers (nouvea) or nvidia's proprietary drivers. The documentation for your distribution should have instructions for installing these.
+
+The only system library you should need is the CUDA toolkit, general named cudatoolkit, nvidia-cuda-toolkit, or some variation in your system repositories.
+You can also try installing manually from [here](https://developer.nvidia.com/cuda-downloads).
+
+Most binary distributions provide a version of ffmpeg with NVENC already enabled. If not you can try compiling ffmpeg from source with the `--enable-nvenc` flag. If you use a source based distribution you should be familiar with enabling optional compile flags.
+
+You can verify that ffmpeg has been set up correctly by checking the output of `ffmpeg -hide_banner -hwaccels | grep cuvid` and `ffmpeg -hide_banner -encoders | grep nvenc`. If you don't see anything, something is wrong.
+
+#### VA-API
+VA-API is an extremely generic API. Although the package names might be different in your distribution, the arch wiki page for hardware acceleration has good information on [driver selection](https://wiki.archlinux.org/index.php/Hardware_video_acceleration#Installation) and [verifying](https://wiki.archlinux.org/index.php/Hardware_video_acceleration#Verifying_VA-API) a VA-API install for a wide range of devices.
+
+Regardless of driver selection, you will also need libva or the equivalent from your distrubtion, and libva-utils can be helpful as well.
+
+Most binary distributions provide a version of ffmpeg with VA-API already enabled. If not you can try compiling ffmpeg from source with the `--enable-vaapi` flag. If you use a source based distribution you should be familiar with enabling optional compile flags.
+
+You can verify that ffmpeg has been set up correctly by checking the output of `ffmpeg -hide_banner -hwaccels | grep vaapi` and `ffmpeg -hide_banner -encoders | grep vaapi`. If you don't see anything, something is wrong.
+
+### Satyr
+```
+# Decoding
+hwaccel:
+# Enable hardware acceleration for decoding as well as encoding.
+# Probably not worth it, hardware decoding won't be any faster compared to software on a vaguely modern CPU
+# Hardware decoding also may not support the input format, in which case transcoding will fail
+  decode: true
+
+# Only supported for VA-API
+# Fall back to software decoding if hardware decoding fails
+hwaccel:
+  decode: 'fallback'
+
+
+# NVENC
+hwaccel:
+  type: 'nvenc'
+# device is optional for nvenc
+  device: 0
+# nvenc wants a device number instead of a path, set to null to use the default
+
+# VA-API
+hwaccel:
+  type: 'vaapi'
+# device is mandatory for va-api
+  device: '/dev/dri/renderD128'
+```
--- a/install/config.example.yml
+++ b/install/config.example.yml
@ -32,6 +32,10 @@ transcode:
  #unused right now, will always transcode to dash
  format: dash

+hwaccel:
+  # see docs/HWACCEL.md for instructions on configuring hardware acceleration
+  type: null
+
 chat:

  irc:
--- a/src/config.ts
+++ b/src/config.ts
@ -39,6 +39,11 @@ const config: Object = {
 	   connectionTimeout: '1000',
 	   insecureAuth: false,
 	   debug: false }, localconfig['database']),
+	hwaccel: Object.assign({
+		type: null,
+		device: null,
+		decode: false
+	}, localconfig['hwaccel']),
 	rtmp: Object.assign({
 	  cluster: false,
 	  port: 1935,
--- a/src/server.ts
+++ b/src/server.ts
@ -42,9 +42,10 @@ function init () {
 					if(session.audioCodec !== 0 && session.videoCodec !== 0){
 						transCommand(results[0].username, key).then((r) => {
 							execFile(config['media']['ffmpeg'], r, {maxBuffer: Infinity}, (err, stdout, stderr) => {
-								/*console.log(err);
-								console.log(stdout);
-								console.log(stderr);*/
+								//console.log(r);
+								//console.log(err);
+								//console.log(stdout);
+								//console.log(stderr);
 							});
 						});
 						break;
@ -126,29 +127,49 @@ function init () {

 async function transCommand(user: string, key: string): Promise<string[]>{
 	let args: string[] = ['-loglevel', 'fatal', '-y'];
+	let vcodec: string = 'libx264';
+	if(config['hwaccel']['type'] === 'nvenc'){
+		vcodec = 'h264_nvenc';
+		if(config['hwaccel']['decode']){
+			args = args.concat(['-hwaccel', 'cuda']);
+			if(config['hwaccel']['device'])
+				args = args.concat(['-hwaccel_device', config['hwaccel']['device']]);
+			args = args.concat(['-hwaccel_output_format', 'cuda']);
+		}
+	}
+	else if (config['hwaccel']['type'] === 'vaapi') {
+		vcodec = 'h264_vaapi';
+		if(config['hwaccel']['decode'] === 'fallback'){
+			args = args.concat('init_hw_device', 'vaapi=foo:'+config['hwaccel']['device'], '-hwaccel vaapi', '-hwaccel_output_format', 'vaapi', '-hwaccel_device', 'foo');
+		} else if (config['hwaccel']['decode']) {
+			args = args.concat(['-hwaccel', 'vaapi', '-hwaccel_output_format', 'vaapi', '-vaapi_device', config['hwaccel']['device']]);
+		}
+	}
 	if(config['transcode']['inputflags'] !== null && config['transcode']['inputflags'] !== "") args = args.concat(config['transcode']['inputflags'].split(" "));
 	args = args.concat(['-i', 'rtmp://127.0.0.1:'+config['rtmp']['port']+'/'+config['media']['privateEndpoint']+'/'+key, '-movflags', '+faststart']);
 	if(config['transcode']['adaptive']===true && config['transcode']['variants'] > 1) {
 		for(let i=0;i<config['transcode']['variants'];i++){
 			args = args.concat(['-map', '0:2']);
 		}
-		args = args.concat(['-map', '0:1', '-c:a', 'aac', '-c:v:0', 'libx264']);
+		args = args.concat(['-map', '0:1', '-c:a', 'aac', '-c:v:0', vcodec]);
 		for(let i=1;i<config['transcode']['variants'];i++){
-			args = args.concat(['-c:v:'+i, 'libx264',]);
+			args = args.concat(['-c:v:'+i, vcodec,]);
 		}
 		for(let i=1;i<config['transcode']['variants'];i++){
 			let crf: number = Math.floor(18 + (i * 8)) > 51 ? 51 : Math.floor(18 + (i * 7));
 			args = args.concat(['-crf:'+i, ''+crf]);
 		}
 		for(let i=1;i<config['transcode']['variants'];i++){
-			let bv: number = Math.floor((5000 / config['transcode']['variants']) * (config['transcode']['variants'] - i));
+			let bv: number = Math.floor((10000 / config['transcode']['variants']) * (config['transcode']['variants'] - i));
 			args = args.concat(['-b:v:'+i, ''+bv]);
 		}
 	}
 	else {
-		args = args.concat(['-c:a', 'aac', '-c:v', 'libx264']);
+		args = args.concat(['-c:a', 'aac', '-c:v', vcodec]);
 	}
-	args = args.concat(['-preset', 'veryfast', '-tune', 'zerolatency']);
+	if(!config['hwaccel']['type'])
+		args = args.concat(['-preset', 'veryfast']);
+	args = args.concat(['-tune', 'zerolatency']);
 	//if(config['transcode']['format'] === 'dash')
 	args = args.concat(['-remove_at_exit', '1', '-seg_duration', '1', '-window_size', '30']);
 	if(config['transcode']['outputflags'] !== null && config['transcode']['outputflags'] !== "") args = args.concat(config['transcode']['outputflags'].split(" "));