8000 PCIe/NVMe Driver by midnightveil · Pull Request #283 · au-ts/sddf · GitHub
[go: up one dir, main page]
More Web Proxy on the site http://driver.im/
Skip to content

PCIe/NVMe Driver #283

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 60 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
60 commits
Select commit Hold shift + click to select a range
36c0298
init
Ivan-Velickovic Sep 13, 2024
6d3e306
Basic PCIE bus enumeration
midnightveil Sep 17, 2024
955f35a
Use a pcie_header struct
midnightveil Sep 17, 2024
6ab1913
Handle JH7110/PLDA pcie errata
midnightveil Sep 17, 2024
c506105
type 0 pcie header decoding
midnightveil Sep 17, 2024
e44d2b0
Remove leftover crud from previous example
midnightveil Sep 20, 2024
3060828
print 64 bit addresses if we have them
midnightveil Sep 20, 2024
5f855a9
pcie print size
midnightveil Sep 20, 2024
dc8722a
basic nvme regs read
midnightveil Sep 20, 2024
4db2533
add -Werror for compiling pcie driver
Ivan-Velickovic Sep 23, 2024
0d09482
curr
midnightveil Sep 23, 2024
943ad67
dodgy ready admin controller?
midnightveil Sep 23, 2024
f0cbd80
Basic working NVMe queue ready
midnightveil Sep 27, 2024
7df95e9
add a page size define
midnightveil Sep 27, 2024
a6161e0
try to get a actual command
midnightveil Sep 27, 2024
50de472
actually get admin queue commands working
midnightveil Sep 30, 2024
14297c1
move nvme stuff (mostly) out of the pcie fifle
midnightveil Sep 30, 2024
7f5899c
weird broken bug
midnightveil Sep 30, 2024
a33ad2f
fixed bug :)
midnightveil Oct 1, 2024
b580119
working nvme queue abstraction?
midnightveil Oct 2, 2024
c770276
Properly bracketise macro :)
midnightveil Oct 4, 2024
68657bf
create I/O completion queue
midnightveil Oct 4, 2024
dcc9703
broken i/o submission queeue
midnightveil Oct 4, 2024
fcfb83d
add poll command helper - still broken i/o submission queue
midnightveil Oct 7, 2024
38f944f
figured out the issue... ish... but why??
midnightveil Oct 7, 2024
d9b25d1
fix MPS; it's a log2-value not just a shifted by 2^12 value
midnightveil Oct 7, 2024
aeeead8
Move debug code to separate header (for now?)
midnightveil Oct 7, 2024
9c8fea8
yes this is a whole another specification
midnightveil Oct 7, 2024
1335d3f
try to read data
midnightveil Oct 7, 2024
ba692ec
i spent way too long trying to figure out why this was trapping...
midnightveil Oct 7, 2024
c3d49b1
use PRP entries as this is w hat they are
midnightveil Oct 8, 2024
ae0ed8a
it works!!!
midnightveil Oct 8, 2024
9be3a6e
fix queue lengths for I/O
midnightveil Oct 8, 2024
d45dc1e
read & write works
midnightveil Oct 8, 2024
8ae78a2
Get a PCIe interrupt
midnightveil Oct 9, 2024
c28b168
debug register fix
midnightveil Oct 14, 2024
5762e34
some extra asserts
midnightveil Oct 14, 2024
a974e18
dodgy stuff
midnightveil Oct 15, 2024
88cec0f
try to setup qemu
midnightveil Oct 15, 2024
1f64a05
qemu reading 0xfffff from nvme
midnightveil Oct 15, 2024
0ed4446
get back to original state on star64 (still not working tho)
midnightveil Oct 15, 2024
7cf9509
nvme regs on qemu have to be above 0x4000_0000
midnightveil Oct 15, 2024
c993734
remove unaligned reads & writes to doorbells
midnightveil Oct 15, 2024
28aa488
controller caps broken in qemu
midnightveil Oct 15, 2024
53e53ba
qemu IRQ!
midnightveil Oct 15, 2024
4aada70
qemu interrupt-y style. I have no idea how this works (it only interr…
midnightveil Oct 15, 2024
e0be776
ifdef the QEMU-specific behaviour
midnightveil Oct 18, 2024
e516801
check LE cpu
midnightveil Oct 18, 2024
216c402
interrupts work in QEMU, at least
midnightveil Oct 18, 2024
0f68bdc
yes QEMU on riscv is broken in multiple ways
midnightveil Oct 18, 2024
9f9fcc2
Document all the weird behaviours?!??!
midnightveil Oct 18, 2024
5533994
add TODO
midnightveil Oct 18, 2024
296db6f
fix IRQ comment
midnightveil Oct 28, 2024
b74618b
set microkit sdk env var in zig build
midnightveil Oct 28, 2024
b051451
mostly-broken rockpro
midnightveil Oct 29, 2024
2f3213c
nvme boots ish on rockpro
midnightveil Oct 29, 2024
f49734b
imx8mm pcie evk worky
midnightveil Nov 5, 2024
20a63ec
working nvme on the imx8
midnightveil Nov 7, 2024
a06f903
qemu riscv pci size
midnightveil Nov 8, 2024
da57ff0
config size of qemu aarch64
midnightveil Nov 12, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 35 additions & 0 deletions build.zig
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,10 @@ const DriverClass = struct {
const Gpu = enum {
virtio,
};

const Pcie = enum {
starfive,
};
};

const util_src = [_][]const u8{
Expand Down Expand Up @@ -248,7 +252,32 @@ fn addNetworkDriver(
});
driver.addIncludePath(net_config_include);
driver.addIncludePath(b.path(b.fmt("drivers/network/{s}/", .{ @tagName(class) })));
driver.linkLibrary(util);

return driver;
}

fn addPcieDriver(
b: *std.Build,
util: *std.Build.Step.Compile,
class: DriverClass.Pcie,
target: std.Build.ResolvedTarget,
optimize: std.builtin.OptimizeMode,
) *std.Build.Step.Compile {
const driver = addPd(b, .{
.name = b.fmt("driver_pcie_{s}.elf", .{@tagName(class)}),
.target = target,
.optimize = optimize,
.strip = false,
});
const source = b.fmt("drivers/pcie/{s}/pcie.c", .{@tagName(class)});
driver.addCSourceFile(.{ .file = b.path(source), .flags = &.{"-Werror"} });
driver.addIncludePath(b.path(b.fmt("drivers/pcie/{s}/", .{@tagName(class)})));
driver.addIncludePath(b.path("include"));

driver.addCSourceFile(.{ .file = b.path("drivers/nvme/nvme.c"), .flags = &.{"-Werror"} });
driver.addIncludePath(b.path("drivers/nvme/"));

driver.linkLibrary(util);

return driver;
Expand Down Expand Up @@ -534,4 +563,10 @@ pub fn build(b: *std.Build) void {
net_copy.linkLibrary(util);
net_copy.linkLibrary(util_putchar_debug);
b.installArtifact(net_copy);

inline for (std.meta.fields(DriverClass.Pcie)) |class| {
const driver = addPcieDriver(b, util, @enumFromInt(class.value), target, optimize);
driver.linkLibrary(util_putchar_debug);
b.installArtifact(driver);
}
}
280 changes: 280 additions & 0 deletions drivers/nvme/nvme.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,280 @@
#include <sddf/util/printf.h>

#include "nvme.h"
#include "nvme_queue.h"

#define DEBUG_DRIVER
#ifdef DEBUG_DRIVER
#include "nvme_debug.h"
#endif

#if __BYTE_ORDER__ != __ORDER_LITTLE_ENDIAN__
#error "code assumes little endian CPU as NVMe/PCIe is little endian"
#endif

volatile nvme_controller_t *nvme_controller;
nvme_submission_queue_entry_t *nvme_asq_region;
nvme_completion_queue_entry_t *nvme_acq_region;
uintptr_t nvme_asq_region_paddr;
uintptr_t nvme_acq_region_paddr;
nvme_submission_queue_entry_t *nvme_io_sq_region;
nvme_completion_queue_entry_t *nvme_io_cq_region;
uintptr_t nvme_io_sq_region_paddr;
uintptr_t nvme_io_cq_region_paddr;
#define NVME_ADMIN_QUEUE_SIZE 0x1000
#define NVME_IO_QUEUE_SIZE 0x1000

static nvme_queue_info_t admin_queue;
static nvme_queue_info_t io_queue;

uintptr_t data_region_paddr;
volatile uint8_t *data_region;

#define NVME_ASQ_CAPACITY (NVME_ADMIN_QUEUE_SIZE / sizeof(nvme_submission_queue_entry_t))
#define NVME_ACQ_CAPACITY (NVME_ADMIN_QUEUE_SIZE / sizeof(nvme_completion_queue_entry_t))
_Static_assert(NVME_ASQ_CAPACITY <= 0x1000, "capacity of ASQ must be <=4096 (entries)");
_Static_assert(NVME_ACQ_CAPACITY <= 0x1000, "capacity of ACQ must be <=4096 (entries)");
#define NVME_IO_SQ_CAPACITY (NVME_IO_QUEUE_SIZE / sizeof(nvme_submission_queue_entry_t))
#define NVME_IO_CQ_CAPACITY (NVME_IO_QUEUE_SIZE / sizeof(nvme_completion_queue_entry_t))
// §3.3.3.1
_Static_assert(NVME_IO_SQ_CAPACITY <= 0x10000, "capacity of IO SQ must be <=65536 (entries)");
_Static_assert(NVME_IO_CQ_CAPACITY <= 0x10000, "capacity of IO CQ must be <=65536 (entries)");

void nvme_irq_mask(void)
{
/* [NVMe-Transport-PCIe-1.1] 3.5.1.1 Differences between Pin Based and MSI Interrupts
> Pin-based and single MSI only use one interrupt vector.
> Multiple MSI may use up to 32 interrupt vectors.

[NVMe-2.1] 3.1.4.10 Admin Completion Queue Base Address
> This queue is always associated with interrupt vector 0.
*/

/* For now -- we mask out every interrupt vector) */
nvme_controller->intms = 0xffffffff;
}

void nvme_irq_unmask(void)
{
/* [NVMe-Transport-PCIe-1.1] 3.5.1.1 Differences between Pin Based and MSI Interrupts
> Pin-based and single MSI only use one interrupt vector.
> Multiple MSI may use up to 32 interrupt vectors.

[NVMe-2.1] 3.1.4.10 Admin Completion Queue Base Address
> This queue is always associated with interrupt vector 0.
*/

/* For now -- we mask in only vector 0, as it's the only one */
nvme_controller->intmc = 0xffffffff;
}

/* [NVMe-2.1] 3.5.1 Memory-based Controller Initialization (PCIe) */
void nvme_controller_init()
{
LOG_NVME("CAP: %016lx\n", nvme_controller->cap);
// TODO: alignment 32-bit.
// LOG_NVME("VS: major: %u, minor: %u, tertiary: %u\n", nvme_controller->vs.mjr, nvme_controller->vs.mnr,
// nvme_controller->vs.ter);
LOG_NVME("CC: %08x\n", nvme_controller->cc);

nvme_controller->cc &= ~NVME_CC_EN;

// 1. Wait for CSTS.RDY to become '0' (i.e. not ready)
int i = 100;
while (nvme_controller->csts & NVME_CSTS_RDY && i != 0) i--;
if (i == 0) {
sddf_dprintf("time out\n");
return;
}

// 2. Configure Admin Queue(s);
nvme_queues_init(&admin_queue, /* y */ 0, nvme_controller, nvme_asq_region, NVME_ASQ_CAPACITY, nvme_acq_region,
NVME_ACQ_CAPACITY);
nvme_irq_mask();
assert(nvme_asq_region_paddr != 0x0);
assert(nvme_acq_region_paddr != 0x0);
nvme_controller->asq = nvme_asq_region_paddr;
nvme_controller->acq = nvme_acq_region_paddr;
nvme_controller->aqa &= ~(NVME_AQA_ACQS_MASK | NVME_AQA_ASQS_MASK);
nvme_controller->aqa |= ((NVME_ASQ_CAPACITY - 1) << NVME_AQA_ASQS_SHIFT)
| ((NVME_ACQ_CAPACITY - 1) << NVME_AQA_ACQS_SHIFT);

// 3. Initialise Command Support Sets.
nvme_controller->cc &= ~(NVME_CC_CSS_MASK);
if (nvme_controller->cap & NVME_CAP_NOIOCSS) {
nvme_controller->cc |= 0b111 << NVME_CC_CSS_SHIFT;
} else if (nvme_controller->cap & NVME_CAP_IOCSS) {
nvme_controller->cc |= 0b110 << NVME_CC_CSS_SHIFT;
} else if (nvme_controller->cap & NVME_CAP_NCSS) {
nvme_controller->cc |= 0b000 << NVME_CC_CSS_SHIFT;
}

#if defined(CONFIG_PLAT_QEMU_RISCV_VIRT) || defined(CONFIG_PLAT_QEMU_ARM_VIRT)
/*
QEMU deviates from the NVMe specification:
https://gitlab.com/qemu-project/qemu/-/issues/1691
*/
nvme_controller->cc &= ~(NVME_CC_CSS_MASK);
nvme_controller->cc |= 0b000 << NVME_CC_CSS_SHIFT;
#endif

// 4a. Arbitration Mechanism (TODO)
// 4b. Memory Page Size
// TODO: Check CAP.MPSMAX/CAP.MPSMIN fields
nvme_controller->cc &= ~NVME_CC_MPS_MASK;
/* n.b. page size = 2 ^ (12 + MPS) */
uint8_t page_size_log2 = 12; /* all architectures we care about have page size 2^12. */
nvme_controller->cc |= ((page_size_log2 - 12) << NVME_CC_MPS_SHIFT) & NVME_CC_MPS_MASK;

// TODO: See initialisation note under §4.2.4; fine since already that way.

// 5. Enable the controller
nvme_controller->cc |= NVME_CC_EN;

// 6. Wait for ready
LOG_NVME("waiting ready...\n");
while (!(nvme_controller->csts & NVME_CSTS_RDY));
LOG_NVME("\tdone\n");

// 7. Send the Identify Controller command (Identify with CNS = 01h); §5.1.13
// TODO: What do we actually need this for????
// sudo nvme admin-passthru /dev/nvme0 --opcode=0x06 --cdw10=0x0001 --data-len=4096 -r -s
nvme_completion_queue_entry_t entry;
entry = nvme_queue_submit_and_consume_poll(&admin_queue, &(nvme_submission_queue_entry_t){
.cdw0 = /* CID */ (0b1111 << 16) | /* PSDT */ 0 | /* FUSE */ 0 | /* OPC */ 0x6,
.cdw10 = /* CNTID[31:16] */ 0x0 | /* CNS */ 0x01,
.prp2 = 0,
.prp1 = data_region_paddr, /* TEMP */
});

assert((entry.phase_tag_and_status & _MASK(1, 15)) == 0x0); // §4.2.3 Status Field

// 8. The host determines any I/O Command Set specific configuration information
// TODO: Why???

// 9. Determine the number of I/O Submission Queues and I/O Completion Queues
// supported using the Set Features command with the Number of Queues feature identifier.
// After determining the number of I/O Queues, the NVMe Transport specific interrupt registers
// (e.g., MSI and/or MSI-X registers) should be configured
// TODO: interrupts. & don't ignore # but we always use one, so.
uint16_t io_queue_id = 1;
assert(nvme_io_sq_region != 0x0);
assert(nvme_io_cq_region != 0x0);
assert(nvme_io_sq_region_paddr != 0x0);
assert(nvme_io_cq_region_paddr != 0x0);
nvme_queues_init(&io_queue, io_queue_id, nvme_controller, nvme_io_sq_region, NVME_IO_SQ_CAPACITY, nvme_io_cq_region,
NVME_IO_CQ_CAPACITY);

// §3.3.1.1 Queue Seutp & Initialization
// => Configures the size of the I/O Submission Queues (CC.IOSQES) and I/O Completion Queues (CC.IOCQES)
// nvme_controller->cc &= ~(NVME_CC_IOCQES_MASK | NVME_CC_IOSQES_MASK);
/* n.b. CQ/SQ entry sizes are specified as 2^n; i.e. 2^4 = 16 and 2^6 = 64. */
nvme_controller->cc |= (4 << NVME_CC_IOCQES_SHIFT) | (6 << NVME_CC_IOSQES_SHIFT);

// 10. Allocate the appropriate number of I/O Completion Queues [...]
// The I/O Completion Queues are allocated using the Create I/O Completion Queue command.
// §5.2.1
entry = nvme_queue_submit_and_consume_poll(&admin_queue, &(nvme_submission_queue_entry_t){
.cdw0 = /* CID */ (0b1010 << 16) | /* PSDT */ 0 | /* FUSE */ 0 | /* OPC */ 0x5,
.cdw10 = /* QSIZE */ ((NVME_IO_CQ_CAPACITY - 1U) << 16) | /* QID */ io_queue_id,
.cdw11 = /* IV */ (0x0 << 16) | /* IEN */ 1 << 1 | /* PC */ 0x1,
.prp2 = 0,
.prp1 = nvme_io_cq_region_paddr,
});

assert((entry.phase_tag_and_status & _MASK(1, 15)) == 0x0); // §4.2.3 Status Field

// 11. Allocate the appropriate number of I/O Submission Queues [...]
// The I/O Submission Queues are allocated using the Create I/O Submission Queue command.
// §5.2.2
entry = nvme_queue_submit_and_consume_poll(&admin_queue, &(nvme_submission_queue_entry_t){
.cdw0 = /* CID */ (0b1110 << 16) | /* PSDT */ 0 | /* FUSE */ 0 | /* OPC */ 0x1,
.cdw10 = /* QSIZE */ ((NVME_IO_SQ_CAPACITY - 1U) << 16) | /* QID */ io_queue_id,
.cdw11 = /* CQID */ (io_queue_id << 16) | /* QPRIO */ (0b00 << 1) | /* PC */ 0b1,
.cdw12 = 0,
.prp2 = 0,
.prp1 = nvme_io_sq_region_paddr,
});

assert((entry.phase_tag_and_status & _MASK(1, 15)) == 0x0); // §4.2.3 Status Field

// 12. To enable asynchronous notification of optional events, the host should issue a Set Features
// command specifying the events to enable. To enable asynchronous notification of events, the host
// should submit an appropriate number of Asynchronous Event Request commands. This step may
// be done at any point after the controller signals that the controller is ready (i.e., CSTS.RDY is set to ‘1’).
// TODO: ???

nvme_irq_unmask();
}

void nvme_continue(int z);
void nvme_init()
{
LOG_NVME("Starting NVME initialisation... (%s)\n", microkit_name);

// We should do a Function Level Reset as defined by [PCIe-2.0] spec §6.6.2

// https://github.com/bootreer/vroom/blob/d8bbe9db2b1cfdfc38eec31f3b48f5eb167879a9/src/nvme.rs#L220

nvme_controller_init();
LOG_NVME("NVME initialised\n");

/* TODO: Don't send via this */
nvme_continue(0);
}

#define NUMBER_BLOCKS 1
void nvme_continue(int z)
{
if (z == 0) {
/* [NVMe-CommandSet-1.1] 3.3.4 Read command */
nvme_queue_submit(&io_queue, &(nvme_submission_queue_entry_t){
.cdw0 = /* CID */ (0b1011 << 16) | /* PSDT */ 0 | /* FUSE */ 0 | /* OPC */ 0x2,
.nsid = 0x1, // TOOD: Why is NSID 1 now ????
.cdw10 = /* SLBA[31:00] */ 0x0,
.cdw11 = /* SLBA[63:32] */ 0x0,
.cdw12 = /* LR */ (0b1U << 31) | /* others */ 0 | /* NLB */ (NUMBER_BLOCKS - 1),
.prp2 = 0x0,
.prp1 = data_region_paddr,
});
} else if (z == 1) {
sddf_dprintf("doing nothing :P -- should get another IRQ \n");
/*
So this works fine on QEMU AArch64...
but on QEMU RISCV level interrupts only get triggerred once
... this caused issues in linux
... https://www.mail-archive.com/qemu-devel@nongnu.org/msg931360.html
*/
} else if (z == 2) {
nvme_completion_queue_entry_t cq_entry;
int ret = nvme_queue_consume(&io_queue, &cq_entry);
assert(ret == 0);
assert((cq_entry.phase_tag_and_status & _MASK(1, 15)) == 0x0); // §4.2.3 Status Field

for (int i = 0; i < 8; i++) {
LOG_NVME("Data [%02x]: %02x\n", i, data_region[i]);
}

for (int i = 0; i < 4096; i++) {
data_region[i] = data_region[i] ^ 0xbb;
}

/* [NVMe-CommandSet-1.1] ??????? write */
nvme_queue_submit(&io_queue, &(nvme_submission_queue_entry_t){
.cdw0 = /* CID */ (0b1101 << 16) | /* PSDT */ 0 | /* FUSE */ 0 | /* OPC */ 0x1,
.nsid = 0x1, // TOOD: Why is NSID 1 now ????
.cdw10 = /* SLBA[31:00] */ 0x0,
.cdw11 = /* SLBA[63:32] */ 0x0,
.cdw12 = /* LR */ (0b1U << 31) | /* others */ 0 | /* NLB */ (NUMBER_BLOCKS - 1),
.prp2 = 0x0,
.prp1 = data_region_paddr,
});
} else if (z == 3) {
nvme_completion_queue_entry_t cq_entry;
int ret = nvme_queue_consume(&io_queue, &cq_entry);
assert(ret == 0);
assert((cq_entry.phase_tag_and_status & _MASK(1, 15)) == 0x0); // §4.2.3 Status Field

LOG_NVME("Got response for write!\n");
}
}
Loading
Loading
0