BE ENGINEERING INSIGHTS: Doing File I/O From A Device Driver
By Dmitriy Budko

Many BeOS developers ask if it is possible to do a file or /dev/* I/O from a kernel driver. This is a very reasonable question. On other OSes it's complicated: one has to use special unfamiliar functions like Windows 95/98 IFSMgr_Ring0_FileIO set (OpenCreateFile(), ReadFile(), WriteAbsoluteDisk(), etc.) or Windows NT ZwCreatefile(), ZwReadFile(), etc. A small example from the Microsoft Windows NT DDK:

ntStatus = ZwCreateFile( &NtFileHandle,
  SYNCHRONIZE | FILE_READ_DATA,
  &ObjectAttributes,
  &IoStatus,
  NULL,   // alloc size = none
  FILE_ATTRIBUTE_NORMAL,
  FILE_SHARE_READ,
  FILE_OPEN,
  FILE_SYNCHRONOUS_IO_NONALERT,
  NULL, // eabuffer
  0 );  // ealength

Can you understand without extensive comments what is going on here?

Or from the NT DDK documentation:

NTSTATUS ZwReadFile(
  IN HANDLE FileHandle,
  IN HANDLE Event OPTIONAL,
  IN PIO_APC_ROUTINE ApcRoutine OPTIONAL,
  IN PVOID ApcContext OPTIONAL,
  OUT PIO_STATUS_BLOCK IoStatusBlock,
  OUT PVOID Buffer,
  IN ULONG Length,
  IN PLARGE_INTEGER ByteOffset OPTIONAL,
  IN PULONG Key OPTIONAL
  );

Under BeOS it's much easier: a driver can call the standard POSIX low-level I/O functions: open(), close(), read(), write(), etc.

Here is the simple driver that uses these functions and provides very simple encryption capabilities. It publishes a "secure" device in /dev/misc/cryptodevice. Programs can read/write to it as it was a normal file but the data is scrambled and stored in the normal /boot/home/cryptod_storage file. The source code, makefile, installation script, PPC and x86 BeIDE projects are at

ftp://ftp.be.com/pub/samples/drivers/cryptodevice.zip

#include <OS.h>
#include <KernelExport.>h
#include <Drivers.h>
#include <unistd.h>
#include <string.h>

int fh;
const char*  file_name  = "/boot/home/cryptod_storage";
const char*  key_string = "VERY lousy encryption";

static void
encrypt(uchar* buf, size_t len, off_t pos)
{
  size_t i;
  const size_t  key_len = strlen(key_string);

  for(i=0; i<len; i++)
    buf[i] ^= key_string[((unsigned)(pos+i))%key_len];
}

static void
decrypt(uchar* buf, size_t len, off_t pos)
{
  encrypt(buf, len, pos);
}

static status_t
cryptod_open (const char *name, uint32 flags, void **cookie)
{
  dprintf("cryptod: open()\n");
  return B_OK;
}

static status_t
cryptod_close (void *cookie)
{
  dprintf("cryptod: close()\n");
  return B_OK;
}

static status_t
cryptod_free (void *cookie)
{
  dprintf("cryptod: free()\n");
  return B_OK;
}

static status_t
cryptod_read (void *cookie, off_t pos, void *buf, size_t *len)
{
  dprintf("cryptod: read(%Ld, %d)\n", pos, *len);

  if(-1 == lseek(fh, pos, SEEK_SET))
  {
    *len = 0;
    return B_DEV_SEEK_ERROR;
  }

  if (-1 == (*len = read(fh, buf, *len)))
  {
    *len = 0;
    return B_DEV_READ_ERROR;
  }
  decrypt((uchar*)buf, *len, pos);
  return B_OK;
}

static status_t
cryptod_write (void *cookie, off_t pos, const void *buf,
  size_t *len)
{
  dprintf("cryptod: write(%Ld, %Ld)\n", pos, *len);

  if(-1 == lseek(fh, pos, SEEK_SET))
  {
    *len = 0;
    return B_DEV_SEEK_ERROR;
  }

  encrypt((uchar*)buf, *len, pos);

  if (-1 == (*len = write(fh, buf, *len)))
  {
    *len = 0;
    return B_DEV_WRITE_ERROR;
  }
  return B_OK;
}

static status_t
cryptod_readv (void *cookie, off_t pos, const iovec *vec,
  size_t count, size_t *len)
{
  size_t  i;
  off_t  cur_pos;

  dprintf("cryptod: readv(%Ld, %d, %d)\n", pos, count, *len);

  if(-1 == lseek(fh, pos, SEEK_SET))
  {
    *len = 0;
    return B_DEV_SEEK_ERROR;
  }

  if (-1 == (*len = readv(fh, vec, count)))
  {
    *len = 0;
    return B_DEV_READ_ERROR;
  }

  for(cur_pos=pos,i=0; i<count; i++)
  {
    decrypt((uchar*)vec[i].iov_base, vec[i].iov_len, cur_pos);
    cur_pos += vec[i].iov_len;
  }
  return B_OK;
}

static status_t
cryptod_writev (void *cookie, off_t pos, const iovec *vec,
  size_t count, size_t *len)
{
  size_t  i;
  off_t  cur_pos;

  dprintf("cryptod: writev(%Ld, %d, %d)\n", pos, count, *len);

  if(-1 == lseek(fh, pos, SEEK_SET))
  {
    *len = 0;
    return B_DEV_SEEK_ERROR;
  }

  if (-1 == (*len = writev(fh, vec, count)))
  {
    *len = 0;
    return B_DEV_WRITE_ERROR;
  }

  for(cur_pos=pos,i=0; i<count; i++)
  {
    encrypt((uchar*)vec[i].iov_base, vec[i].iov_len, cur_pos);
    cur_pos += vec[i].iov_len;
  }
  return B_OK;
}

static status_t
cryptod_control(void *cookie, uint32 msg, void *buf,
  size_t len)
{
  return B_DEV_INVALID_IOCTL;
}

static device_hooks cryptod_device = {
  cryptod_open,
  cryptod_close,
  cryptod_free,
  cryptod_control,
  cryptod_read,
  cryptod_write,
  NULL,      /* select */
  NULL,      /* deselect */
  cryptod_readv,
  cryptod_writev
};

static char *cryptod_name[] = {
  "misc/cryptodevice",
  NULL
};

status_t
init_driver()
{
  dprintf("cryptod: init_driver(), %s, %s\n",
    __DATE__, __TIME__);

  if(-1 == (fh=open(file_name, O_RDWR| O_CREAT)))
    return B_ERROR;
  return B_OK;
}

void
uninit_driver()
{
  dprintf("cryptod: uninit_driver()\n");
  close(fh);
}

const char **
publish_devices()
{
  return (const char **)&cryptod_name;
}

device_hooks *
find_device(const char *name)
{
  return &cryptod_device;
}

The driver just passes all read/write request to the file system. Everything should be obvious to any C/DOS/POSIX programmer except two functions: readv()/writev(). They are common extensions to POSIX and are used to read/write contiguous portion of a file from/to many buffers in one system (or file system) call. These functions may provide better performance in many cases then multiple calls are done to read()/write().

And from Linux man pages, with a few changes:

#include <sys/uio.h>

int readv(
  int fd, const struct iovec *vector, size_t count);

int writev(
  int fd, const struct iovec *vector, size_t count);

struct iovec {
  __ptr_t iov_base; /* Starting address. */
  size_t iov_len; /* Length in bytes. */
  };

Description

readv reads data from file descriptor fd, and puts the the order specified. Operates just like read except that data is put in vector instead of a contiguous buffer.

writev writes data to file descriptor fd, and from the buffers described by vector. The number of buffers is specified by count. The buffers are used in the order specified. Operates just like write except that data is taken from vector instead of a contiguous buffer.

Return Value

On success readv returns the number of bytes read. On success writev returns the number of bytes written. On error, -1 is returned, and errno is set appropriately.

If you want to initialize and mount a file system on the encrypted device then you will have to:

  1. Use a raw device or partition like /dev/disk/ide/ata/1/master/0/0_1 as the backing storage for the data.
  2. Change the published device name to /dev/disk/foo/bar.
  3. Implement ioctl() handlers for the standard requests for a mass storage device. See old RAMDrive as an example:

ftp://ftp.be.com/pub/samples/drivers/obsolete/ramdrive.zip

Unfortunately, in current versions of the BeOS you can not mount a file system over such device if it uses a file on a file system as the backing storage. A deadlock will occur. This will be fixed in a future version of the BeOS.

Copyright ©1999 Be, Inc. Be is a registered trademark, and BeOS, BeBox, BeWare, GeekPort, the Be logo and the BeOS logo are trademarks of Be, Inc. All other trademarks mentioned are the property of their respective owners.