GVM User Suite
User tools for the GVM open source project.
manager.c
Go to the documentation of this file.
1 /*
2  * Copyright (C) 2022 2666680 Ontario Inc.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public License
6  * as published by the Free Software Foundation; either version 2
7  * of the License, or (at your option) any later version.
8  *
9  * This program is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program; if not, write to the Free Software
16  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
17  *
18  */
19 #include <gpu/nvidia/device.h>
20 #include <gpu/nvidia/manager.h>
21 #include <gpu/nvidia/resman/api.h>
22 
24 
25 #include <stdlib.h>
26 #include <string.h>
27 #include <unistd.h>
28 
30 static const char SIGN[128] = {
31 0x47, 0xab, 0x8d, 0x39, 0xb3, 0xaf, 0xd0, 0x2c, 0x79, 0x6f, 0xd0, 0xc7, 0x7a, 0x78, 0x84, 0x68,
32 0xf0, 0x9b, 0x69, 0xe8, 0xb6, 0xc2, 0xc5, 0x05, 0x59, 0x97, 0xf2, 0x0f, 0x77, 0x3a, 0x94, 0x91,
33 0x92, 0x56, 0x2d, 0xf0, 0x4f, 0xae, 0xa6, 0x4d, 0xcc, 0x51, 0x32, 0x17, 0xe5, 0xda, 0xf0, 0x94,
34 0x42, 0x93, 0x51, 0x05, 0x49, 0xe9, 0x61, 0xfd, 0x22, 0x24, 0x6c, 0x8f, 0x88, 0xe3, 0x16, 0x63,
35 0x91, 0x04, 0x20, 0x6a, 0xea, 0x27, 0xc4, 0xe7, 0x11, 0xfc, 0x88, 0x24, 0xb9, 0xaa, 0x1b, 0x85,
36 0xc5, 0x1a, 0x48, 0x7f, 0x99, 0xf4, 0x8e, 0xda, 0x55, 0x2b, 0x4a, 0xfe, 0x48, 0x79, 0x75, 0x78,
37 0x48, 0x16, 0x02, 0x0f, 0x22, 0xc2, 0x9d, 0x20, 0xfa, 0xbb, 0x21, 0x39, 0x56, 0x78, 0xd8, 0x80,
38 0x96, 0x5c, 0x5c, 0xe4, 0x7c, 0xad, 0x87, 0x24, 0x20, 0x70, 0xad, 0x63, 0x84, 0x96, 0x92, 0x3f
39 };
40 
42 static inline uint32_t compose_manager_id(uint32_t client, uint32_t gpu_id, uint32_t handle)
43 {
44  return ((client + gpu_id) << 8) | (handle & ((1 << 8) - 1));
45 }
46 
49 struct NvMdev create_nv_mgr()
50 {
51  struct NvMdev ret = {};
52  struct Nv0000CtrlGpuGetProbedIdsParams probed_ids = {};
53 
54  ret.fd = nv_open_dev(255);
55 
56  ret.res = rm_alloc_res(ret.fd, NULL, 0, 0, NULL);
57 
58  if (ret.res == NULL)
59  goto failure;
60 
61  if (RM_CTRL(ret.fd, ret.res, NV0000_GET_PROBED_IDS, probed_ids) == NULL)
62  goto free_failure;
63 
64  for (int i = 0; i < 32 && probed_ids.gpu_ids[i] != 0xFFFFFFFF; ++i) {
65  struct NvMdevGpu *mgpu = calloc(1, sizeof(struct NvMdevGpu));
66 
67  mgpu->ctl_fd = ret.fd;
68  mgpu->gpu = calloc(1, sizeof(struct Gpu));
69  mgpu->root = ret.res->object;
70  mgpu->device = compose_manager_id(ret.res->object, probed_ids.gpu_ids[i], 1);
71  mgpu->sub_device = compose_manager_id(ret.res->object, probed_ids.gpu_ids[i], 2);
72  mgpu->mdev_config = compose_manager_id(ret.res->object, probed_ids.gpu_ids[i], 3);
73 
74  struct Nv0000CtrlGpuGetPciInfoParams pci_info = {
75  .gpu_id = probed_ids.gpu_ids[i]
76  };
77  struct Nv0000CtrlGpuAttachIdsParams attach_ids = {};
78 
79  attach_ids.gpu_ids[0] = probed_ids.gpu_ids[i];
80  attach_ids.gpu_ids[1] = 0xFFFFFFFF;
81 
82  RM_CTRL(ret.fd, ret.res, NV0000_GET_PCI_INFO, pci_info);
83  RM_CTRL(ret.fd, ret.res, NV0000_ATTACH_IDS, attach_ids);
84 
85  mgpu->dev_fd = nv_open_dev(i);
86 
87  struct Nv0000CtrlGpuGetIdInfoParams gpu_info = {};
88  struct Nv0080AllocParams dev_alloc = {};
89  uint32_t sub_dev_alloc = 0;
90 
91  gpu_info.gpu_id = probed_ids.gpu_ids[i];
92 
93  RM_CTRL(ret.fd, ret.res, NV0000_GET_GPU_INFO, gpu_info);
94 
95  dev_alloc.deviceId = gpu_info.dev_inst;
96  dev_alloc.hClientShare = ret.res->object;
97 
98  mgpu->dev = rm_alloc_res(mgpu->ctl_fd, ret.res, mgpu->device, NV0080_CLASS, &dev_alloc);
99  mgpu->sdev = rm_alloc_res(mgpu->ctl_fd, mgpu->dev, mgpu->sub_device, NV2080_CLASS, &sub_dev_alloc);
100 
101  struct BusGetPciInfo bus_info = {};
102 
103  RM_CTRL(ret.fd, mgpu->sdev, NV2080_GET_BUS_PCI_INFO, bus_info);
104 
105  mgpu->mdev = rm_alloc_res(mgpu->ctl_fd, mgpu->sdev, mgpu->mdev_config, NVA081_CLASS, NULL);
106 
107  mgpu->dev->class_info = mgpu->gpu;
108 
109  mgpu->gpu->identifier = probed_ids.gpu_ids[i];
110  mgpu->gpu->domain = pci_info.domain;
111  mgpu->gpu->bus = pci_info.bus;
112  mgpu->gpu->slot = pci_info.slot;
113  mgpu->gpu->vendor_id = 0x10DE;
114  mgpu->gpu->device_id = bus_info.dev_id >> 16;
115  mgpu->gpu->sub_vendor_id = 0x10DE;
116  mgpu->gpu->sub_device_id = bus_info.sub_dev_id >> 16;
117 
118  printf(
119  "Created gpu: 0x%.8X (0x%.4X, 0x%.4X, 0x%.4X, 0x%.4X)\n",
120  probed_ids.gpu_ids[i],
121  mgpu->gpu->vendor_id,
122  mgpu->gpu->device_id,
123  mgpu->gpu->sub_vendor_id,
124  mgpu->gpu->sub_device_id
125  );
126 
127  ret.gpus[i] = mgpu;
128  }
129 
130  return ret;
131 
132 free_failure:
133  for (int i = 0; i < 32 && ret.gpus[i] != NULL; ++i) {
134  printf(
135  "Destroyed gpu: 0x%.8X (0x%.4X, 0x%.4X, 0x%.4X, 0x%.4X)\n",
136  ret.gpus[i]->gpu->identifier,
137  ret.gpus[i]->gpu->vendor_id,
138  ret.gpus[i]->gpu->device_id,
139  ret.gpus[i]->gpu->sub_vendor_id,
140  ret.gpus[i]->gpu->sub_device_id
141  );
142  free(ret.gpus[i]);
143  ret.gpus[i] = NULL;
144  }
145 
146  rm_free_tree(ret.fd, ret.res);
147 
148  ret.res = NULL;
149 
150 failure:
151  close(ret.fd);
152  ret.fd = -1;
153  return ret;
154 }
155 
158 void free_nv_mgr(struct NvMdev *mgr)
159 {
160  if (mgr->fd == -1)
161  return;
162 
163  for (int i = 0; i < 32 && mgr->gpus[i] != NULL; ++i) {
164  printf(
165  "Destroyed gpu: 0x%.8X (0x%.4X, 0x%.4X, 0x%.4X, 0x%.4X)\n",
166  mgr->gpus[i]->gpu->identifier,
167  mgr->gpus[i]->gpu->vendor_id,
168  mgr->gpus[i]->gpu->device_id,
169  mgr->gpus[i]->gpu->sub_vendor_id,
170  mgr->gpus[i]->gpu->sub_device_id
171  );
172  free(mgr->gpus[i]);
173  mgr->gpus[i] = NULL;
174  }
175 
176  rm_free_tree(mgr->fd, mgr->res);
177 
178  mgr->res = NULL;
179 
180  close(mgr->fd);
181  mgr->fd = -1;
182 }
183 
185  struct NvMdev *mgr,
186  struct Gpu* limited,
187  size_t gpu_size,
188  struct MDevRequest* requested,
189  size_t mdev_size
190 )
191 {
192  for (int i = 0; i < 32 && mgr->gpus[i] != NULL; ++i) {
193  struct NvMdevGpu* gpu = mgr->gpus[i];
194  struct Gpu *ggpu = gpu->gpu;
195  uint8_t valid = 0;
196 
197  for (size_t j = 0; j < gpu_size && !valid; ++j) {
198  struct Gpu req_gpu = limited[j];
199 
200  valid =
201  (req_gpu.domain == 0xFFFFFFFF ||
202  req_gpu.domain == ggpu->domain) &&
203  (req_gpu.bus == 0xFFFFFFFF ||
204  req_gpu.bus == ggpu->bus) &&
205  (req_gpu.slot == 0xFFFFFFFF ||
206  req_gpu.slot == ggpu->slot) &&
207  (req_gpu.function == 0xFFFFFFFF ||
208  req_gpu.function == ggpu->function) &&
209  (req_gpu.vendor_id == 0xFFFFFFFF ||
210  req_gpu.vendor_id == ggpu->vendor_id) &&
211  (req_gpu.device_id == 0xFFFFFFFF ||
212  req_gpu.device_id == ggpu->device_id) &&
213  (req_gpu.sub_vendor_id == 0xFFFFFFFF ||
214  req_gpu.sub_vendor_id == ggpu->sub_vendor_id) &&
215  (req_gpu.sub_device_id == 0xFFFFFFFF ||
216  req_gpu.sub_device_id == ggpu->sub_device_id) &&
217  (req_gpu.identifier == 0xFFFFFFFF ||
218  req_gpu.identifier == ggpu->identifier);
219  }
220 
221  if (!valid && limited != NULL && gpu_size > 0)
222  continue;
223 
224  for (size_t j = 0; j < mdev_size; ++j) {
225  struct MDevRequest request = requested[j];
226  struct RmMdevConfig mdev = {};
227 
228  mdev.discard = j == 0;
229  mdev.mdev_type = request.num;
230  strcpy(mdev.name, request.name);
231  strcpy(mdev.class, request.gpu_class);
232  memcpy(mdev.sign, SIGN, 128);
233  strcpy(mdev.pact, "NVIDIA-vComputeServer,9.0;Quadro-Virtual-DWS,5.0");
234  mdev.max_instances = request.max_inst;
235 
236  if (request.disp != NULL) {
237  mdev.num_heads = request.disp->num_heads;
238  mdev.max_res_x = request.disp->max_res_x;
239  mdev.max_res_y = request.disp->max_res_y;
240  mdev.max_pixel = request.disp->max_res_x * request.disp->max_res_y;
241  mdev.frl_config = request.disp->frl_config;
242  mdev.frl_enable = request.disp->frl_enable;
243  }
244 
245  mdev.cuda = 1;
246  mdev.ecc_support = request.ecc_support;
247  mdev.gpu_instance_size = 0;
248  mdev.multi_mdev = request.multi_mdev;
249  mdev.enc_cap = request.enc_cap;
250  mdev.v_dev_id =
251  request.v_dev_id == 0xFFFFFFFFFFFFFFFF ?
252  (ggpu->device_id << 16 || ggpu->sub_device_id) :
253  request.v_dev_id;
254  mdev.p_dev_id =
255  request.p_dev_id == 0xFFFFFFFFFFFFFFFF ?
256  ggpu->device_id : request.p_dev_id;
257  mdev.fb_len = (uint64_t) request.fb_len * 1024 * 1024;
258  mdev.map_video = request.map_vid_size * 1024 * 1024;
259  mdev.fb_res = (uint64_t) request.fb_res * 1024 * 1024;
260  mdev.bar1_len = request.bar1_len;
261 
262  RM_CTRL(gpu->ctl_fd, gpu->mdev, NVA081_ADD_MDEV, mdev);
263  }
264  }
265 }
266 
267 void register_nv_mgr_mdevs(struct NvMdev *mgr)
268 {
269  for (int i = 0; i < 32 && mgr->gpus[i] != NULL; ++i) {
270  struct NvMdevGpu* gpu = mgr->gpus[i];
271 
272  rm_ctrl_res(
273  gpu->ctl_fd,
274  gpu->root,
275  gpu->mdev_config,
277  NULL,
278  0
279  );
280  }
281 }
void * rm_ctrl_res(int fd, uint32_t client, uint32_t device, uint32_t command, void *data, uint32_t size)
Control resource command.
Definition: api.c:164
void rm_free_tree(int fd, struct NvResource *root)
Frees a resource tree for the system.
Definition: api.c:138
#define RM_CTRL(fd, res, cmd, data)
Controls a RM Resource.
Definition: api.h:133
struct NvResource * rm_alloc_res(int fd, struct NvResource *parent, uint32_t object, uint32_t rm_class, void *data)
Allocates a Node for a resource.
Definition: api.c:65
int nv_open_dev(uint16_t minor)
Opens a traditional NVIDIA device.
Definition: device.c:65
void register_nv_mgr_mdevs(struct NvMdev *mgr)
Registers mdevs on the OS.
Definition: manager.c:267
void create_nv_mgr_mdevs(struct NvMdev *mgr, struct Gpu *limited, size_t gpu_size, struct MDevRequest *requested, size_t mdev_size)
Creates necessary mediated devices on GPUs.
Definition: manager.c:184
struct NvMdev create_nv_mgr()
Creates a NVIDIA manager object.
Definition: manager.c:49
void free_nv_mgr(struct NvMdev *mgr)
Deletes a NVIDIA manager object.
Definition: manager.c:158
static const char SIGN[128]
Hardcoded signature.
Definition: manager.c:30
static uint32_t compose_manager_id(uint32_t client, uint32_t gpu_id, uint32_t handle)
Inline function to create device ids.
Definition: manager.c:42
#define NV0000_GET_PCI_INFO
Command for rm control res to get pci info for the gpu id.
Definition: nv0000.h:41
#define NV0000_GET_GPU_INFO
Command to get the GPU Info.
Definition: nv0000.h:78
#define NV0000_ATTACH_IDS
Command to attach a gpu id to the driver.
Definition: nv0000.h:55
#define NV0000_GET_PROBED_IDS
Command for rm control res to get a list of all probed ids.
Definition: nv0000.h:29
#define NV0080_CLASS
Command to allocate a Nv0080 device.
Definition: nv0080.h:29
#define NV2080_CLASS
Command to allocate a Nv2080 device.
Definition: nv2080.h:29
#define NV2080_GET_BUS_PCI_INFO
Command to get the BUS PCI info.
Definition: nv2080.h:32
#define NVA081_REG_MDEV
Command for rm control res to register mdevs.
Definition: nvA081.h:35
#define NVA081_CLASS
Command to allocate a NvA081 device.
Definition: nvA081.h:29
#define NVA081_ADD_MDEV
Command for rm control res to add a mdev to the list of mdevs.
Definition: nvA081.h:32
Gets the BUS PCI info.
Definition: nv2080.h:38
uint32_t sub_dev_id
Sub device ID.
Definition: nv2080.h:40
uint32_t dev_id
Device ID.
Definition: nv2080.h:39
GPU Base Structure.
Definition: mdev.h:33
uint32_t identifier
Identifier for the GPU.
Definition: mdev.h:42
uint32_t device_id
Device id for the PCI device.
Definition: mdev.h:39
uint32_t slot
Slot for the PCI device.
Definition: mdev.h:36
uint32_t sub_vendor_id
Sub vendor id for the PCI device.
Definition: mdev.h:40
uint32_t bus
Bus for the PCI device.
Definition: mdev.h:35
uint32_t sub_device_id
Sub device id for the PCI device.
Definition: mdev.h:41
uint32_t vendor_id
Vendor id for the PCI device.
Definition: mdev.h:38
uint32_t function
Function for the PCI device.
Definition: mdev.h:37
uint32_t domain
Domain for the PCI device.
Definition: mdev.h:34
Mediated Device Request Structure.
Definition: mdev.h:62
uint32_t map_vid_size
Mappable video size (IN MEGABYTES).
Definition: mdev.h:74
const char * name
Name of the GPU.
Definition: mdev.h:66
uint8_t ecc_support
If the Mediated GPU has ECC supported.
Definition: mdev.h:70
uint32_t bar1_len
Bar 1 Length.
Definition: mdev.h:77
uint8_t multi_mdev
If multiple mdevs supported.
Definition: mdev.h:71
uint32_t max_inst
Max number of mediated GPUs.
Definition: mdev.h:68
uint64_t v_dev_id
Virtual device id.
Definition: mdev.h:64
uint32_t fb_res
Frame buffer reserved (IN MEGABYTES).
Definition: mdev.h:73
uint64_t p_dev_id
Physical device id.
Definition: mdev.h:65
uint32_t fb_len
Frame buffer length (IN MEGABYTES).
Definition: mdev.h:72
uint32_t num
Number of the mdev.
Definition: mdev.h:63
uint32_t enc_cap
Definition: mdev.h:75
const char * gpu_class
GPU Class structure.
Definition: mdev.h:67
struct VirtDisplay * disp
Virtual display structure.
Definition: mdev.h:69
Attaches the following ids to the GPU driver.
Definition: nv0000.h:61
uint32_t gpu_ids[32]
List of GPU Ids to attach.
Definition: nv0000.h:62
Gets GPU specific information.
Definition: nv0000.h:84
uint32_t gpu_id
GPU Id to get the information from.
Definition: nv0000.h:85
uint32_t dev_inst
Device instance for the GPU.
Definition: nv0000.h:87
Gets the PCI info for a given gpu.
Definition: nv0000.h:47
uint16_t bus
Bus for the GPU.
Definition: nv0000.h:50
uint32_t gpu_id
GPU Id to get PCI info from.
Definition: nv0000.h:48
uint32_t domain
Domain for the GPU.
Definition: nv0000.h:49
uint16_t slot
Slot for the GPU.
Definition: nv0000.h:51
Gets a list of all probed Ids.
Definition: nv0000.h:35
uint32_t gpu_ids[32]
GPU IDs available.
Definition: nv0000.h:36
Allocates a Device.
Definition: nv0080.h:35
uint32_t hClientShare
Client share flag.
Definition: nv0080.h:37
uint32_t deviceId
Device ID.
Definition: nv0080.h:36
Control Mechanism for the NVIDIA GPU.
Definition: resources.h:56
struct NvResource * sdev
Subdevice.
Definition: resources.h:66
uint32_t device
Device id for controlling the physical gpu.
Definition: resources.h:62
uint32_t sub_device
Sub device id.
Definition: resources.h:63
struct Gpu * gpu
GPU structure corresponding to the GPU.
Definition: resources.h:60
int ctl_fd
Control Nvidia control file description.
Definition: resources.h:57
uint32_t mdev_config
Configurator for mdev devices.
Definition: resources.h:64
uint32_t root
Initial client.
Definition: resources.h:61
int dev_fd
Device Nvidia file descriptor.
Definition: resources.h:58
struct NvResource * mdev
Mdev device.
Definition: resources.h:67
struct NvResource * dev
Device.
Definition: resources.h:65
Structure for managing the mediated stack.
Definition: resources.h:74
int fd
Control file descriptor.
Definition: resources.h:75
struct NvMdevGpu * gpus[32]
Available GPUs.
Definition: resources.h:76
struct NvResource * res
Resource tree.
Definition: resources.h:77
void * class_info
Class info for the resource.
Definition: resources.h:47
uint32_t object
Object of the resource.
Definition: resources.h:45
Creates an MDev Config to be sent into the RM core.
Definition: nvA081.h:42
uint64_t p_dev_id
Definition: nvA081.h:61
uint32_t gpu_instance_size
Definition: nvA081.h:57
uint32_t max_res_y
Definition: nvA081.h:52
uint64_t bar1_len
Definition: nvA081.h:65
uint64_t fb_res
Definition: nvA081.h:64
char sign[128]
Definition: nvA081.h:47
uint64_t fb_len
Definition: nvA081.h:62
uint32_t max_instances
Definition: nvA081.h:49
uint32_t max_res_x
Definition: nvA081.h:51
uint64_t enc_cap
Definition: nvA081.h:59
uint32_t ecc_support
Definition: nvA081.h:56
uint64_t map_video
Definition: nvA081.h:63
uint64_t discard
Definition: nvA081.h:43
char pact[132]
Definition: nvA081.h:48
char class[32]
Definition: nvA081.h:46
char name[32]
Definition: nvA081.h:45
uint32_t cuda
Definition: nvA081.h:55
uint32_t mdev_type
Definition: nvA081.h:44
uint32_t multi_mdev
Definition: nvA081.h:58
uint32_t frl_config
Definition: nvA081.h:54
uint32_t frl_enable
Definition: nvA081.h:66
uint32_t num_heads
Definition: nvA081.h:50
uint32_t max_pixel
Definition: nvA081.h:53
uint64_t v_dev_id
Definition: nvA081.h:60
uint32_t frl_config
Frame rate limiter.
Definition: mdev.h:53
uint32_t max_res_y
Max resolution Y.
Definition: mdev.h:52
uint32_t frl_enable
If we use the frame rate limiter.
Definition: mdev.h:54
uint32_t max_res_x
Max resolution X.
Definition: mdev.h:51
uint32_t num_heads
Number of monitor heads.
Definition: mdev.h:50
#define calloc(x, y)
Definition: toml.c:53
#define free(x)
Definition: toml.c:52