experimental hardware acceleration #21

Merged
knotteye merged 4 commits from hwaccel into develop 3 years ago
  1. 11
      docs/CONFIGURATION.md
  2. 53
      docs/HWACCEL.md
  3. 4
      install/config.example.yml
  4. 5
      src/config.ts
  5. 37
      src/server.ts

@ -38,17 +38,16 @@ transcode:
# satyr will generate one source quality variant, and the remaining
# variants will be of incrementally lower quality and bitrate
# having more than 4-5 variants will start giving diminishing returns on stream quality for cpu load
# if you can't afford to generate at least 3 variants, it's recommended to leave adaptive streaming off
inputflags: ""
# additional flags to apply to the input during transcoding
outputflags: ""
# additional flags to apply to the output during transcoding
# hardware acceleration is a bit difficult to configure programmatically
# this is a good place to do so for your system
# https://trac.ffmpeg.org/wiki/HWAccelIntro is a good place to start
# having more than 4-5 variants will start giving diminishing returns on stream quality for cpu load
# if you can't afford to generate at least 3 variants, it's recommended to leave adaptive streaming off
hwaccel:
# See HWACCEL.md for information on configuring hardware acceleration.
crypto:
saltRounds: 12

@ -0,0 +1,53 @@
## Configuration Hardware Acceleration
Satyr supports the NVENC and VA-API hardware acceleration APIs. If you've configured your system correctly (the hard part) it should be enough to set the type and use the default device setting if you only have one hardware acceleration device.
### System
Configuring the system for any hardware acceleration API involves three main steps: selecting the right drivers, installing the API libraries, and configuring ffmpeg.
#### NVENC
NVENC in ffmpeg can work with either open-source drivers (nouvea) or nvidia's proprietary drivers. The documentation for your distribution should have instructions for installing these.
The only system library you should need is the CUDA toolkit, general named cudatoolkit, nvidia-cuda-toolkit, or some variation in your system repositories.
You can also try installing manually from [here](https://developer.nvidia.com/cuda-downloads).
Most binary distributions provide a version of ffmpeg with NVENC already enabled. If not you can try compiling ffmpeg from source with the `--enable-nvenc` flag. If you use a source based distribution you should be familiar with enabling optional compile flags.
You can verify that ffmpeg has been set up correctly by checking the output of `ffmpeg -hide_banner -hwaccels | grep cuvid` and `ffmpeg -hide_banner -encoders | grep nvenc`. If you don't see anything, something is wrong.
#### VA-API
VA-API is an extremely generic API. Although the package names might be different in your distribution, the arch wiki page for hardware acceleration has good information on [driver selection](https://wiki.archlinux.org/index.php/Hardware_video_acceleration#Installation) and [verifying](https://wiki.archlinux.org/index.php/Hardware_video_acceleration#Verifying_VA-API) a VA-API install for a wide range of devices.
Regardless of driver selection, you will also need libva or the equivalent from your distrubtion, and libva-utils can be helpful as well.
Most binary distributions provide a version of ffmpeg with VA-API already enabled. If not you can try compiling ffmpeg from source with the `--enable-vaapi` flag. If you use a source based distribution you should be familiar with enabling optional compile flags.
You can verify that ffmpeg has been set up correctly by checking the output of `ffmpeg -hide_banner -hwaccels | grep vaapi` and `ffmpeg -hide_banner -encoders | grep vaapi`. If you don't see anything, something is wrong.
### Satyr
```
# Decoding
hwaccel:
# Enable hardware acceleration for decoding as well as encoding.
# Probably not worth it, hardware decoding won't be any faster compared to software on a vaguely modern CPU
# Hardware decoding also may not support the input format, in which case transcoding will fail
decode: true
# Only supported for VA-API
# Fall back to software decoding if hardware decoding fails
hwaccel:
decode: 'fallback'
# NVENC
hwaccel:
type: 'nvenc'
# device is optional for nvenc
device: 0
# nvenc wants a device number instead of a path, set to null to use the default
# VA-API
hwaccel:
type: 'vaapi'
# device is mandatory for va-api
device: '/dev/dri/renderD128'
```

@ -32,6 +32,10 @@ transcode:
#unused right now, will always transcode to dash
format: dash
hwaccel:
# see docs/HWACCEL.md for instructions on configuring hardware acceleration
type: null
chat:
irc:

@ -39,6 +39,11 @@ const config: Object = {
connectionTimeout: '1000',
insecureAuth: false,
debug: false }, localconfig['database']),
hwaccel: Object.assign({
type: null,
device: null,
decode: false
}, localconfig['hwaccel']),
rtmp: Object.assign({
cluster: false,
port: 1935,

@ -42,9 +42,10 @@ function init () {
if(session.audioCodec !== 0 && session.videoCodec !== 0){
transCommand(results[0].username, key).then((r) => {
execFile(config['media']['ffmpeg'], r, {maxBuffer: Infinity}, (err, stdout, stderr) => {
/*console.log(err);
console.log(stdout);
console.log(stderr);*/
//console.log(r);
//console.log(err);
//console.log(stdout);
//console.log(stderr);
});
});
break;
@ -126,29 +127,49 @@ function init () {
async function transCommand(user: string, key: string): Promise<string[]>{
let args: string[] = ['-loglevel', 'fatal', '-y'];
let vcodec: string = 'libx264';
if(config['hwaccel']['type'] === 'nvenc'){
vcodec = 'h264_nvenc';
if(config['hwaccel']['decode']){
args = args.concat(['-hwaccel', 'cuda']);
if(config['hwaccel']['device'])
args = args.concat(['-hwaccel_device', config['hwaccel']['device']]);
args = args.concat(['-hwaccel_output_format', 'cuda']);
}
}
else if (config['hwaccel']['type'] === 'vaapi') {
vcodec = 'h264_vaapi';
if(config['hwaccel']['decode'] === 'fallback'){
args = args.concat('init_hw_device', 'vaapi=foo:'+config['hwaccel']['device'], '-hwaccel vaapi', '-hwaccel_output_format', 'vaapi', '-hwaccel_device', 'foo');
} else if (config['hwaccel']['decode']) {
args = args.concat(['-hwaccel', 'vaapi', '-hwaccel_output_format', 'vaapi', '-vaapi_device', config['hwaccel']['device']]);
}
}
if(config['transcode']['inputflags'] !== null && config['transcode']['inputflags'] !== "") args = args.concat(config['transcode']['inputflags'].split(" "));
args = args.concat(['-i', 'rtmp://127.0.0.1:'+config['rtmp']['port']+'/'+config['media']['privateEndpoint']+'/'+key, '-movflags', '+faststart']);
if(config['transcode']['adaptive']===true && config['transcode']['variants'] > 1) {
for(let i=0;i<config['transcode']['variants'];i++){
args = args.concat(['-map', '0:2']);
}
args = args.concat(['-map', '0:1', '-c:a', 'aac', '-c:v:0', 'libx264']);
args = args.concat(['-map', '0:1', '-c:a', 'aac', '-c:v:0', vcodec]);
for(let i=1;i<config['transcode']['variants'];i++){
args = args.concat(['-c:v:'+i, 'libx264',]);
args = args.concat(['-c:v:'+i, vcodec,]);
}
for(let i=1;i<config['transcode']['variants'];i++){
let crf: number = Math.floor(18 + (i * 8)) > 51 ? 51 : Math.floor(18 + (i * 7));
args = args.concat(['-crf:'+i, ''+crf]);
}
for(let i=1;i<config['transcode']['variants'];i++){
let bv: number = Math.floor((5000 / config['transcode']['variants']) * (config['transcode']['variants'] - i));
let bv: number = Math.floor((10000 / config['transcode']['variants']) * (config['transcode']['variants'] - i));
args = args.concat(['-b:v:'+i, ''+bv]);
}
}
else {
args = args.concat(['-c:a', 'aac', '-c:v', 'libx264']);
args = args.concat(['-c:a', 'aac', '-c:v', vcodec]);
}
args = args.concat(['-preset', 'veryfast', '-tune', 'zerolatency']);
if(!config['hwaccel']['type'])
args = args.concat(['-preset', 'veryfast']);
args = args.concat(['-tune', 'zerolatency']);
//if(config['transcode']['format'] === 'dash')
args = args.concat(['-remove_at_exit', '1', '-seg_duration', '1', '-window_size', '30']);
if(config['transcode']['outputflags'] !== null && config['transcode']['outputflags'] !== "") args = args.concat(config['transcode']['outputflags'].split(" "));