From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from localhost (localhost [127.0.0.1]) by turing.freelists.org (Avenir Technologies Mail Multiplex) with ESMTP id 7035F2633D for ; Thu, 28 Jun 2018 11:21:33 -0400 (EDT) Received: from turing.freelists.org ([127.0.0.1]) by localhost (turing.freelists.org [127.0.0.1]) (amavisd-new, port 10024) with ESMTP id GX_UgUKuDw_y for ; Thu, 28 Jun 2018 11:21:33 -0400 (EDT) Received: from smtpng1.m.smailru.net (smtpng1.m.smailru.net [94.100.181.251]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by turing.freelists.org (Avenir Technologies Mail Multiplex) with ESMTPS id 4BD7E26337 for ; Thu, 28 Jun 2018 11:21:31 -0400 (EDT) From: Kirill Shcherbatov Subject: [tarantool-patches] [PATCH v1 1/1] sql: drop useless code from os_unix.c Date: Thu, 28 Jun 2018 18:21:19 +0300 Message-Id: <443107fc62c650cea3007bf46975e5965518b02a.1530199154.git.kshcherbatov@tarantool.org> Sender: tarantool-patches-bounce@freelists.org Errors-to: tarantool-patches-bounce@freelists.org Reply-To: tarantool-patches@freelists.org List-help: List-unsubscribe: List-software: Ecartis version 1.0.0 List-Id: tarantool-patches List-subscribe: List-owner: List-post: List-archive: To: tarantool-patches@freelists.org Cc: v.shpilevoy@tarantool.org, Kirill Shcherbatov Resolves #3284. --- https://github.com/tarantool/tarantool/tree/kshch/gh-3284-no-os-unix https://github.com/tarantool/tarantool/issues/3284 src/box/sql/os.c | 168 -- src/box/sql/os_unix.c | 6195 ++++++++++------------------------------------- src/box/sql/sqliteInt.h | 1 - 3 files changed, 1342 insertions(+), 5022 deletions(-) diff --git a/src/box/sql/os.c b/src/box/sql/os.c index 38d3585..5a04edb 100644 --- a/src/box/sql/os.c +++ b/src/box/sql/os.c @@ -123,123 +123,12 @@ sqlite3OsWrite(sqlite3_file * id, const void *pBuf, int amt, i64 offset) return id->pMethods->xWrite(id, pBuf, amt, offset); } -int -sqlite3OsTruncate(sqlite3_file * id, i64 size) -{ - return id->pMethods->xTruncate(id, size); -} - -int -sqlite3OsSync(sqlite3_file * id, int flags) -{ - DO_OS_MALLOC_TEST(id); - return id->pMethods->xSync(id, flags); -} - -int -sqlite3OsFileSize(sqlite3_file * id, i64 * pSize) -{ - DO_OS_MALLOC_TEST(id); - return id->pMethods->xFileSize(id, pSize); -} - -int -sqlite3OsLock(sqlite3_file * id, int lockType) -{ - DO_OS_MALLOC_TEST(id); - return id->pMethods->xLock(id, lockType); -} - -int -sqlite3OsUnlock(sqlite3_file * id, int lockType) -{ - return id->pMethods->xUnlock(id, lockType); -} - -int -sqlite3OsCheckReservedLock(sqlite3_file * id, int *pResOut) -{ - DO_OS_MALLOC_TEST(id); - return id->pMethods->xCheckReservedLock(id, pResOut); -} - -/* - * Use sqlite3OsFileControl() when we are doing something that might fail - * and we need to know about the failures. Use sqlite3OsFileControlHint() - * when simply tossing information over the wall to the VFS and we do not - * really care if the VFS receives and understands the information since it - * is only a hint and can be safely ignored. The sqlite3OsFileControlHint() - * routine has no return value since the return value would be meaningless. - */ -int -sqlite3OsFileControl(sqlite3_file * id, int op, void *pArg) -{ -#ifdef SQLITE_TEST - if (op != SQLITE_FCNTL_COMMIT_PHASETWO) { - /* Faults are not injected into COMMIT_PHASETWO because, assuming SQLite - * is using a regular VFS, it is called after the corresponding - * transaction has been committed. Injecting a fault at this point - * confuses the test scripts - the COMMIT comand returns SQLITE_NOMEM - * but the transaction is committed anyway. - * - * The core must call OsFileControl() though, not OsFileControlHint(), - * as if a custom VFS (e.g. zipvfs) returns an error here, it probably - * means the commit really has failed and an error should be returned - * to the user. - */ - DO_OS_MALLOC_TEST(id); - } -#endif - return id->pMethods->xFileControl(id, op, pArg); -} - void sqlite3OsFileControlHint(sqlite3_file * id, int op, void *pArg) { (void)id->pMethods->xFileControl(id, op, pArg); } -int -sqlite3OsSectorSize(sqlite3_file * id) -{ - int (*xSectorSize) (sqlite3_file *) = id->pMethods->xSectorSize; - return (xSectorSize ? xSectorSize(id) : SQLITE_DEFAULT_SECTOR_SIZE); -} - -int -sqlite3OsDeviceCharacteristics(sqlite3_file * id) -{ - return id->pMethods->xDeviceCharacteristics(id); -} - -int -sqlite3OsShmLock(sqlite3_file * id, int offset, int n, int flags) -{ - return id->pMethods->xShmLock(id, offset, n, flags); -} - -void -sqlite3OsShmBarrier(sqlite3_file * id) -{ - id->pMethods->xShmBarrier(id); -} - -int -sqlite3OsShmUnmap(sqlite3_file * id, int deleteFlag) -{ - return id->pMethods->xShmUnmap(id, deleteFlag); -} - -int -sqlite3OsShmMap(sqlite3_file * id, /* Database file handle */ - int iPage, int pgsz, int bExtend, /* True to extend file if necessary */ - void volatile **pp /* OUT: Pointer to mapping */ - ) -{ - DO_OS_MALLOC_TEST(id); - return id->pMethods->xShmMap(id, iPage, pgsz, bExtend, pp); -} - #if SQLITE_MAX_MMAP_SIZE>0 /* The real implementation of xFetch and xUnfetch */ int @@ -296,53 +185,6 @@ sqlite3OsOpen(sqlite3_vfs * pVfs, } int -sqlite3OsDelete(sqlite3_vfs * pVfs, const char *zPath, int dirSync) -{ - DO_OS_MALLOC_TEST(0); - assert(dirSync == 0 || dirSync == 1); - return pVfs->xDelete(pVfs, zPath, dirSync); -} - -int -sqlite3OsAccess(sqlite3_vfs * pVfs, const char *zPath, int flags, int *pResOut) -{ - DO_OS_MALLOC_TEST(0); - return pVfs->xAccess(pVfs, zPath, flags, pResOut); -} - -int -sqlite3OsFullPathname(sqlite3_vfs * pVfs, - const char *zPath, int nPathOut, char *zPathOut) -{ - DO_OS_MALLOC_TEST(0); - zPathOut[0] = 0; - return pVfs->xFullPathname(pVfs, zPath, nPathOut, zPathOut); -} - -#ifndef SQLITE_OMIT_LOAD_EXTENSION -void * -sqlite3OsDlOpen(sqlite3_vfs * pVfs, const char *zPath) -{ - return pVfs->xDlOpen(pVfs, zPath); -} - -void -sqlite3OsDlError(sqlite3_vfs * pVfs, int nByte, char *zBufOut) -{ - pVfs->xDlError(pVfs, nByte, zBufOut); -} - -void (*sqlite3OsDlSym(sqlite3_vfs * pVfs, void *pHdle, const char *zSym)) (void) { - return pVfs->xDlSym(pVfs, pHdle, zSym); -} - -void -sqlite3OsDlClose(sqlite3_vfs * pVfs, void *pHandle) -{ - pVfs->xDlClose(pVfs, pHandle); -} -#endif /* SQLITE_OMIT_LOAD_EXTENSION */ -int sqlite3OsRandomness(sqlite3_vfs * pVfs, int nByte, char *zBufOut) { return pVfs->xRandomness(pVfs, nByte, zBufOut); @@ -503,13 +345,3 @@ sqlite3_vfs_register(sqlite3_vfs * pVfs, int makeDflt) assert(vfsList); return SQLITE_OK; } - -/* - * Unregister a VFS so that it is no longer accessible. - */ -int -sqlite3_vfs_unregister(sqlite3_vfs * pVfs) -{ - vfsUnlink(pVfs); - return SQLITE_OK; -} diff --git a/src/box/sql/os_unix.c b/src/box/sql/os_unix.c index 9cb8a54..fb990f6 100644 --- a/src/box/sql/os_unix.c +++ b/src/box/sql/os_unix.c @@ -29,80 +29,7 @@ * SUCH DAMAGE. */ -/* - * - * This file contains the VFS implementation for unix-like operating systems. - * - * There are actually several different VFS implementations in this file. - * The differences are in the way that file locking is done. The default - * implementation uses Posix Advisory Locks. Alternative implementations - * use flock(), dot-files, various proprietary locking schemas, or simply - * skip locking all together. - * - * This source file is organized into divisions where the logic for various - * subfunctions is contained within the appropriate division. PLEASE - * KEEP THE STRUCTURE OF THIS FILE INTACT. New code should be placed - * in the correct division and should be clearly labeled. - * - * The layout of divisions is as follows: - * - * * General-purpose declarations and utility functions. - * * Various locking primitive implementations (all except proxy locking): - * + for Posix Advisory Locks - * + for no-op locks - * + for dot-file locks - * + for flock() locking - * + for AFP filesystem locks (MacOSX only) - * * sqlite3_file methods not associated with locking. - * * Definitions of sqlite3_io_methods objects for all locking - * methods plus "finder" functions for each locking method. - * * sqlite3_vfs method implementations. - * * Locking primitives for the proxy uber-locking-method. (MacOSX only) - * * Definitions of sqlite3_vfs objects for all locking methods - * plus implementations of sqlite3_os_init() and sqlite3_os_end(). - */ #include "sqliteInt.h" - -/* - * There are various methods for file locking used for concurrency - * control: - * - * 1. POSIX locking (the default), - * 2. No locking, - * 3. Dot-file locking, - * 4. flock() locking, - * 5. AFP locking (OSX only), - * 6. proxy locking. (OSX only) - * - * Styles 4, 5, and 7 are only available of SQLITE_ENABLE_LOCKING_STYLE - * is defined to 1. The SQLITE_ENABLE_LOCKING_STYLE also enables automatic - * selection of the appropriate locking style based on the filesystem - * where the database is located. - */ -#if !defined(SQLITE_ENABLE_LOCKING_STYLE) -#if defined(__APPLE__) -#define SQLITE_ENABLE_LOCKING_STYLE 1 -#else -#define SQLITE_ENABLE_LOCKING_STYLE 0 -#endif -#endif - -/* Use pread() and pwrite() if they are available */ -#if defined(__APPLE__) -#define HAVE_PREAD 1 -#define HAVE_PWRITE 1 -#endif -#if defined(HAVE_PREAD64) && defined(HAVE_PWRITE64) -#undef USE_PREAD -#define USE_PREAD64 1 -#elif defined(HAVE_PREAD) && defined(HAVE_PWRITE) -#undef USE_PREAD64 -#define USE_PREAD 1 -#endif - -/* - * standard include files. - */ #include #include #include @@ -110,28 +37,8 @@ #include #include #include -#if SQLITE_MAX_MMAP_SIZE>0 #include -#endif - -#if SQLITE_ENABLE_LOCKING_STYLE -#include -#include -#include -#endif /* SQLITE_ENABLE_LOCKING_STYLE */ - -#if defined(__APPLE__) || SQLITE_ENABLE_LOCKING_STYLE -#include -#endif - -#ifdef HAVE_UTIME -#include -#endif -/* - * Allowed values of unixFile.fsFlags - */ -#define SQLITE_FSFLAGS_IS_MSDOS 0x1 /* * Default permissions when creating a new file @@ -152,16 +59,6 @@ */ #define MAX_PATHNAME 512 -/* - * Maximum supported symbolic links - */ -#define SQLITE_MAX_SYMLINKS 100 - -/* - * Only set the lastErrno if the error code is a real error and not - * a normal expected return code of SQLITE_BUSY or SQLITE_OK - */ -#define IS_LOCK_ERROR(x) ((x != SQLITE_OK) && (x != SQLITE_BUSY)) /* Forward references */ typedef struct unixShm unixShm; /* Connection shared memory */ @@ -194,44 +91,14 @@ struct unixFile { unsigned char eFileLock; /* The type of lock held on this fd */ unsigned short int ctrlFlags; /* Behavioral bits. UNIXFILE_* flags */ int lastErrno; /* The unix errno from last I/O error */ - void *lockingContext; /* Locking style specific state */ UnixUnusedFd *pUnused; /* Pre-allocated UnixUnusedFd */ const char *zPath; /* Name of the file */ - unixShm *pShm; /* Shared memory segment information */ int szChunk; /* Configured by FCNTL_CHUNK_SIZE */ -#if SQLITE_MAX_MMAP_SIZE>0 int nFetchOut; /* Number of outstanding xFetch refs */ sqlite3_int64 mmapSize; /* Usable size of mapping at pMapRegion */ sqlite3_int64 mmapSizeActual; /* Actual size of mapping at pMapRegion */ sqlite3_int64 mmapSizeMax; /* Configured FCNTL_MMAP_SIZE value */ void *pMapRegion; /* Memory mapped region */ -#endif -#if SQLITE_ENABLE_LOCKING_STYLE - int openFlags; /* The flags specified at open() */ -#endif -#if SQLITE_ENABLE_LOCKING_STYLE || defined(__APPLE__) - unsigned fsFlags; /* cached details from statfs() */ -#endif -#ifdef SQLITE_DEBUG - /* The next group of variables are used to track whether or not the - * transaction counter in bytes 24-27 of database files are updated - * whenever any part of the database changes. An assertion fault will - * occur if a file is updated without also updating the transaction - * counter. This test is made to avoid new problems similar to the - * one described by ticket #3584. - */ - unsigned char transCntrChng; /* True if the transaction counter changed */ - unsigned char dbUpdate; /* True if any part of database file changed */ - unsigned char inNormalWrite; /* True if in a normal write operation */ - -#endif - -#ifdef SQLITE_TEST - /* In test mode, increase the size of this structure a bit so that - * it is larger than the struct CrashFile defined in test6.c. - */ - char aPadding[32]; -#endif }; /* This variable holds the process id (pid) from when the xRandomness() @@ -245,11 +112,7 @@ static pid_t randomnessPid = 0; */ #define UNIXFILE_EXCL 0x01 /* Connections from one process only */ #define UNIXFILE_RDONLY 0x02 /* Connection is read only */ -#ifndef SQLITE_DISABLE_DIRSYNC #define UNIXFILE_DIRSYNC 0x08 /* Directory sync needed */ -#else -#define UNIXFILE_DIRSYNC 0x00 -#endif #define UNIXFILE_DELETE 0x20 /* Delete on close */ #define UNIXFILE_URI 0x40 /* Filename might have query parameters */ #define UNIXFILE_NOLOCK 0x80 /* Do no file locking */ @@ -262,305 +125,11 @@ static pid_t randomnessPid = 0; /* * Define various macros that are missing from some systems. */ -#ifndef O_LARGEFILE -#define O_LARGEFILE 0 -#endif -#ifdef SQLITE_DISABLE_LFS -#undef O_LARGEFILE -#define O_LARGEFILE 0 -#endif -#ifndef O_NOFOLLOW -#define O_NOFOLLOW 0 -#endif #ifndef O_BINARY #define O_BINARY 0 #endif /* - * HAVE_MREMAP defaults to true on Linux and false everywhere else. - */ -#if !defined(HAVE_MREMAP) -#if defined(__linux__) && defined(_GNU_SOURCE) -#define HAVE_MREMAP 1 -#else -#define HAVE_MREMAP 0 -#endif -#endif - - - -/* - * Different Unix systems declare open() in different ways. Same use - * open(const char*,int,mode_t). Others use open(const char*,int,...). - * The difference is important when using a pointer to the function. - * - * The safest way to deal with the problem is to always use this wrapper - * which always has the same well-defined interface. - */ -static int -posixOpen(const char *zFile, int flags, int mode) -{ - return open(zFile, flags, mode); -} - -/* Forward reference */ -static int openDirectory(const char *, int *); -#if SQLITE_MAX_MMAP_SIZE > 0 -static int unixGetpagesize(void); -#endif - -/* - * Many system calls are accessed through pointer-to-functions so that - * they may be overridden at runtime to facilitate fault injection during - * testing and sandboxing. The following array holds the names and pointers - * to all overrideable system calls. - */ -static struct unix_syscall { - const char *zName; /* Name of the system call */ - sqlite3_syscall_ptr pCurrent; /* Current value of the system call */ - sqlite3_syscall_ptr pDefault; /* Default value */ -} aSyscall[] = { - { - "open", (sqlite3_syscall_ptr) posixOpen, 0}, -#define osOpen ((int(*)(const char*,int,int))aSyscall[0].pCurrent) - { - "close", (sqlite3_syscall_ptr) close, 0}, -#define osClose ((int(*)(int))aSyscall[1].pCurrent) - { - "access", (sqlite3_syscall_ptr) access, 0}, -#define osAccess ((int(*)(const char*,int))aSyscall[2].pCurrent) - { - "getcwd", (sqlite3_syscall_ptr) getcwd, 0}, -#define osGetcwd ((char*(*)(char*,size_t))aSyscall[3].pCurrent) - { - "stat", (sqlite3_syscall_ptr) stat, 0}, -#define osStat ((int(*)(const char*,struct stat*))aSyscall[4].pCurrent) - { - "fstat", (sqlite3_syscall_ptr) fstat, 0}, -#define osFstat ((int(*)(int,struct stat*))aSyscall[5].pCurrent) - { - "ftruncate", (sqlite3_syscall_ptr) ftruncate, 0}, -#define osFtruncate ((int(*)(int,off_t))aSyscall[6].pCurrent) - { - "fcntl", (sqlite3_syscall_ptr) fcntl, 0}, -#define osFcntl ((int(*)(int,int,...))aSyscall[7].pCurrent) - { - "read", (sqlite3_syscall_ptr) read, 0}, -#define osRead ((ssize_t(*)(int,void*,size_t))aSyscall[8].pCurrent) -#if defined(USE_PREAD) || SQLITE_ENABLE_LOCKING_STYLE - { - "pread", (sqlite3_syscall_ptr) pread, 0}, -#else - { - "pread", (sqlite3_syscall_ptr) 0, 0}, -#endif -#define osPread ((ssize_t(*)(int,void*,size_t,off_t))aSyscall[9].pCurrent) -#if defined(USE_PREAD64) - { - "pread64", (sqlite3_syscall_ptr) pread64, 0}, -#else - { - "pread64", (sqlite3_syscall_ptr) 0, 0}, -#endif -#define osPread64 ((ssize_t(*)(int,void*,size_t,off64_t))aSyscall[10].pCurrent) - { - "write", (sqlite3_syscall_ptr) write, 0}, -#define osWrite ((ssize_t(*)(int,const void*,size_t))aSyscall[11].pCurrent) -#if defined(USE_PREAD) || SQLITE_ENABLE_LOCKING_STYLE - { - "pwrite", (sqlite3_syscall_ptr) pwrite, 0}, -#else - { - "pwrite", (sqlite3_syscall_ptr) 0, 0}, -#endif -#define osPwrite ((ssize_t(*)(int,const void*,size_t,off_t))\ - aSyscall[12].pCurrent) -#if defined(USE_PREAD64) - { - "pwrite64", (sqlite3_syscall_ptr) pwrite64, 0}, -#else - { - "pwrite64", (sqlite3_syscall_ptr) 0, 0}, -#endif -#define osPwrite64 ((ssize_t(*)(int,const void*,size_t,off64_t))\ - aSyscall[13].pCurrent) - { - "fchmod", (sqlite3_syscall_ptr) fchmod, 0}, -#define osFchmod ((int(*)(int,mode_t))aSyscall[14].pCurrent) -#if defined(HAVE_POSIX_FALLOCATE) && HAVE_POSIX_FALLOCATE - { - "fallocate", (sqlite3_syscall_ptr) posix_fallocate, 0}, -#else - { - "fallocate", (sqlite3_syscall_ptr) 0, 0}, -#endif -#define osFallocate ((int(*)(int,off_t,off_t))aSyscall[15].pCurrent) - { - "unlink", (sqlite3_syscall_ptr) unlink, 0}, -#define osUnlink ((int(*)(const char*))aSyscall[16].pCurrent) - { - "openDirectory", (sqlite3_syscall_ptr) openDirectory, 0}, -#define osOpenDirectory ((int(*)(const char*,int*))aSyscall[17].pCurrent) - { - "mkdir", (sqlite3_syscall_ptr) mkdir, 0}, -#define osMkdir ((int(*)(const char*,mode_t))aSyscall[18].pCurrent) - { - "rmdir", (sqlite3_syscall_ptr) rmdir, 0}, -#define osRmdir ((int(*)(const char*))aSyscall[19].pCurrent) -#if defined(HAVE_FCHOWN) - { - "fchown", (sqlite3_syscall_ptr) fchown, 0}, -#else - { - "fchown", (sqlite3_syscall_ptr) 0, 0}, -#endif -#define osFchown ((int(*)(int,uid_t,gid_t))aSyscall[20].pCurrent) - { - "geteuid", (sqlite3_syscall_ptr) geteuid, 0}, -#define osGeteuid ((uid_t(*)(void))aSyscall[21].pCurrent) - -#if SQLITE_MAX_MMAP_SIZE>0 - { - "mmap", (sqlite3_syscall_ptr) mmap, 0}, -#else - { - "mmap", (sqlite3_syscall_ptr) 0, 0}, -#endif -#define osMmap ((void*(*)(void*,size_t,int,int,int,off_t))aSyscall[22].pCurrent) -#if SQLITE_MAX_MMAP_SIZE>0 - { - "munmap", (sqlite3_syscall_ptr) munmap, 0}, -#else - { - "munmap", (sqlite3_syscall_ptr) 0, 0}, -#endif -#define osMunmap ((void*(*)(void*,size_t))aSyscall[23].pCurrent) -#if HAVE_MREMAP && (SQLITE_MAX_MMAP_SIZE>0) - { - "mremap", (sqlite3_syscall_ptr) mremap, 0}, -#else - { - "mremap", (sqlite3_syscall_ptr) 0, 0}, -#endif -#define osMremap ((void*(*)(void*,size_t,size_t,int,...))aSyscall[24].pCurrent) -#if SQLITE_MAX_MMAP_SIZE>0 - { - "getpagesize", (sqlite3_syscall_ptr) unixGetpagesize, 0}, -#else - { - "getpagesize", (sqlite3_syscall_ptr) 0, 0}, -#endif -#define osGetpagesize ((int(*)(void))aSyscall[25].pCurrent) -#if defined(HAVE_READLINK) - { - "readlink", (sqlite3_syscall_ptr) readlink, 0}, -#else - { - "readlink", (sqlite3_syscall_ptr) 0, 0}, -#endif -#define osReadlink ((ssize_t(*)(const char*,char*,size_t))aSyscall[26].pCurrent) -#if defined(HAVE_LSTAT) - { - "lstat", (sqlite3_syscall_ptr) lstat, 0}, -#else - { - "lstat", (sqlite3_syscall_ptr) 0, 0}, -#endif -#define osLstat ((int(*)(const char*,struct stat*))aSyscall[27].pCurrent) -}; /* End of the overrideable system calls */ - - -/* - * This is the xSetSystemCall() method of sqlite3_vfs for all of the - * "unix" VFSes. Return SQLITE_OK opon successfully updating the - * system call pointer, or SQLITE_NOTFOUND if there is no configurable - * system call named zName. - */ -static int -unixSetSystemCall(sqlite3_vfs * pNotUsed, /* The VFS pointer. Not used */ - const char *zName, /* Name of system call to override */ - sqlite3_syscall_ptr pNewFunc /* Pointer to new system call value */ - ) -{ - unsigned int i; - int rc = SQLITE_NOTFOUND; - - UNUSED_PARAMETER(pNotUsed); - if (zName == 0) { - /* If no zName is given, restore all system calls to their default - * settings and return NULL - */ - rc = SQLITE_OK; - for (i = 0; i < sizeof(aSyscall) / sizeof(aSyscall[0]); i++) { - if (aSyscall[i].pDefault) { - aSyscall[i].pCurrent = aSyscall[i].pDefault; - } - } - } else { - /* If zName is specified, operate on only the one system call - * specified. - */ - for (i = 0; i < sizeof(aSyscall) / sizeof(aSyscall[0]); i++) { - if (strcmp(zName, aSyscall[i].zName) == 0) { - if (aSyscall[i].pDefault == 0) { - aSyscall[i].pDefault = - aSyscall[i].pCurrent; - } - rc = SQLITE_OK; - if (pNewFunc == 0) - pNewFunc = aSyscall[i].pDefault; - aSyscall[i].pCurrent = pNewFunc; - break; - } - } - } - return rc; -} - -/* - * Return the value of a system call. Return NULL if zName is not a - * recognized system call name. NULL is also returned if the system call - * is currently undefined. - */ -static sqlite3_syscall_ptr -unixGetSystemCall(sqlite3_vfs * pNotUsed, const char *zName) -{ - unsigned int i; - - UNUSED_PARAMETER(pNotUsed); - for (i = 0; i < sizeof(aSyscall) / sizeof(aSyscall[0]); i++) { - if (strcmp(zName, aSyscall[i].zName) == 0) - return aSyscall[i].pCurrent; - } - return 0; -} - -/* - * Return the name of the first system call after zName. If zName==NULL - * then return the name of the first system call. Return NULL if zName - * is the last system call or if zName is not the name of a valid - * system call. - */ -static const char * -unixNextSystemCall(sqlite3_vfs * p, const char *zName) -{ - int i = -1; - - UNUSED_PARAMETER(p); - if (zName) { - for (i = 0; i < ArraySize(aSyscall) - 1; i++) { - if (strcmp(zName, aSyscall[i].zName) == 0) - break; - } - } - for (i++; i < ArraySize(aSyscall); i++) { - if (aSyscall[i].pCurrent != 0) - return aSyscall[i].zName; - } - return 0; -} - -/* * Do not accept any file descriptor less than this value, in order to avoid * opening database file using file descriptors that are commonly used for * standard input, output, and error. @@ -592,11 +161,7 @@ robust_open(const char *z, int f, mode_t m) int fd; mode_t m2 = m ? m : SQLITE_DEFAULT_FILE_PERMISSIONS; while (1) { -#if defined(O_CLOEXEC) - fd = osOpen(z, f | O_CLOEXEC, m2); -#else - fd = osOpen(z, f, m2); -#endif + fd = open(z, f | O_CLOEXEC, m2); if (fd < 0) { if (errno == EINTR) continue; @@ -604,95 +169,26 @@ robust_open(const char *z, int f, mode_t m) } if (fd >= SQLITE_MINIMUM_FILE_DESCRIPTOR) break; - osClose(fd); + close(fd); sqlite3_log(SQLITE_WARNING, "attempt to open \"%s\" as file descriptor %d", z, fd); fd = -1; - if (osOpen("/dev/null", f, m) < 0) + if (open("/dev/null", f, m) < 0) break; } if (fd >= 0) { if (m != 0) { struct stat statbuf; - if (osFstat(fd, &statbuf) == 0 - && statbuf.st_size == 0 - && (statbuf.st_mode & 0777) != m) { - osFchmod(fd, m); - } + if (fstat(fd, &statbuf) == 0 && + statbuf.st_size == 0 && + (statbuf.st_mode & 0777) != m) + fchmod(fd, m); } -#if defined(FD_CLOEXEC) && (!defined(O_CLOEXEC) || O_CLOEXEC==0) - osFcntl(fd, F_SETFD, osFcntl(fd, F_GETFD, 0) | FD_CLOEXEC); -#endif } return fd; } -#ifdef SQLITE_LOCK_TRACE -/* - * Print out information about all locking operations. - * - * This routine is used for troubleshooting locks on multithreaded - * platforms. Enable by compiling with the -DSQLITE_LOCK_TRACE - * command-line option on the compiler. This code is normally - * turned off. - */ -static int -lockTrace(int fd, int op, struct flock *p) -{ - char *zOpName, *zType; - int s; - int savedErrno; - if (op == F_GETLK) { - zOpName = "GETLK"; - } else if (op == F_SETLK) { - zOpName = "SETLK"; - } else { - s = osFcntl(fd, op, p); - sqlite3DebugPrintf("fcntl unknown %d %d %d\n", fd, op, s); - return s; - } - if (p->l_type == F_RDLCK) { - zType = "RDLCK"; - } else if (p->l_type == F_WRLCK) { - zType = "WRLCK"; - } else if (p->l_type == F_UNLCK) { - zType = "UNLCK"; - } else { - assert(0); - } - assert(p->l_whence == SEEK_SET); - s = osFcntl(fd, op, p); - savedErrno = errno; - sqlite3DebugPrintf("fcntl %d %s %s %d %d %d %d\n", - fd, zOpName, zType, (int)p->l_start, - (int)p->l_len, (int)p->l_pid, s); - if (s == (-1) && op == F_SETLK - && (p->l_type == F_RDLCK || p->l_type == F_WRLCK)) { - struct flock l2; - l2 = *p; - osFcntl(fd, F_GETLK, &l2); - if (l2.l_type == F_RDLCK) { - zType = "RDLCK"; - } else if (l2.l_type == F_WRLCK) { - zType = "WRLCK"; - } else if (l2.l_type == F_UNLCK) { - zType = "UNLCK"; - } else { - assert(0); - } - sqlite3DebugPrintf("fcntl-failure-reason: %s %d %d %d\n", - zType, (int)l2.l_start, (int)l2.l_len, - (int)l2.l_pid); - } - errno = savedErrno; - return s; -} - -#undef osFcntl -#define osFcntl lockTrace -#endif /* SQLITE_LOCK_TRACE */ - /* * Retry ftruncate() calls that fail due to EINTR * @@ -704,7 +200,7 @@ robust_ftruncate(int h, sqlite3_int64 sz) { int rc; do { - rc = osFtruncate(h, sz); + rc = ftruncate(h, sz); } while (rc < 0 && errno == EINTR); return rc; } @@ -861,9 +357,6 @@ struct unixInodeInfo { UnixUnusedFd *pUnused; /* Unused file descriptors to close */ unixInodeInfo *pNext; /* List of all unixInodeInfo objects */ unixInodeInfo *pPrev; /* .... doubly linked */ -#if SQLITE_ENABLE_LOCKING_STYLE - unsigned long long sharedByte; /* for AFP simulated shared lock */ -#endif }; /* @@ -925,7 +418,7 @@ unixLogErrorAtLine(int errcode, /* SQLite error code */ static void robust_close(unixFile * pFile, int h, int lineno) { - if (osClose(h)) { + if (close(h) != 0) { unixLogErrorAtLine(SQLITE_IOERR_CLOSE, "close", pFile ? pFile->zPath : 0, lineno); } @@ -1008,38 +501,11 @@ findInodeInfo(unixFile * pFile, /* Unix file with file desc used in the key */ * create a unique name for the file. */ fd = pFile->h; - rc = osFstat(fd, &statbuf); + rc = fstat(fd, &statbuf); if (rc != 0) { storeLastErrno(pFile, errno); return SQLITE_IOERR; } -#ifdef __APPLE__ - /* On OS X on an msdos filesystem, the inode number is reported - * incorrectly for zero-size files. See ticket #3260. To work - * around this problem (we consider it a bug in OS X, not SQLite) - * we always increase the file size to 1 by writing a single byte - * prior to accessing the inode number. The one byte written is - * an ASCII 'S' character which also happens to be the first byte - * in the header of every SQLite database. In this way, if there - * is a race condition such that another thread has already populated - * the first page of the database, no damage is done. - */ - if (statbuf.st_size == 0 - && (pFile->fsFlags & SQLITE_FSFLAGS_IS_MSDOS) != 0) { - do { - rc = osWrite(fd, "S", 1); - } while (rc < 0 && errno == EINTR); - if (rc != 1) { - storeLastErrno(pFile, errno); - return SQLITE_IOERR; - } - rc = osFstat(fd, &statbuf); - if (rc != 0) { - storeLastErrno(pFile, errno); - return SQLITE_IOERR; - } - } -#endif memset(&fileId, 0, sizeof(fileId)); fileId.dev = statbuf.st_dev; @@ -1075,9 +541,9 @@ static int fileHasMoved(unixFile * pFile) { struct stat buf; - return pFile->pInode != 0 && - (osStat(pFile->zPath, &buf) != 0 - || (u64) buf.st_ino != pFile->pInode->fileId.ino); + return pFile->pInode != NULL && (stat(pFile->zPath, &buf) != 0 || + (u64) buf.st_ino != + pFile->pInode->fileId.ino); } /* @@ -1099,7 +565,7 @@ verifyDbFile(unixFile * pFile) if (pFile->ctrlFlags & UNIXFILE_NOLOCK) return; - rc = osFstat(pFile->h, &buf); + rc = fstat(pFile->h, &buf); if (rc != 0) { sqlite3_log(SQLITE_WARNING, "cannot fstat db file %s", pFile->zPath); @@ -1123,34 +589,6 @@ verifyDbFile(unixFile * pFile) } /* - * This routine checks if there is a RESERVED lock held on the specified - * file by this or any other process. If such a lock is held, set *pResOut - * to a non-zero value otherwise *pResOut is set to zero. The return value - * is set to SQLITE_OK unless an I/O error occurs during lock checking. - */ -static int -unixCheckReservedLock(sqlite3_file * id, int *pResOut) -{ - int rc = SQLITE_OK; - int reserved = 0; - unixFile *pFile = (unixFile *) id; - - SimulateIOError(return SQLITE_IOERR_CHECKRESERVEDLOCK; - ); - - assert(pFile); - assert(pFile->eFileLock <= SHARED_LOCK); - - /* Check if a thread in this process holds such a lock */ - if (pFile->pInode->eFileLock > SHARED_LOCK) { - reserved = 1; - } - - *pResOut = reserved; - return rc; -} - -/* * Attempt to set a system-lock on the file pFile. The lock is * described by pLock. * @@ -1184,7 +622,7 @@ unixFileLock(unixFile * pFile, struct flock *pLock) lock.l_start = SHARED_FIRST; lock.l_len = SHARED_SIZE; lock.l_type = F_WRLCK; - rc = osFcntl(pFile->h, F_SETLK, &lock); + rc = fcntl(pFile->h, F_SETLK, &lock); if (rc < 0) return rc; pInode->bProcessLock = 1; @@ -1193,7 +631,7 @@ unixFileLock(unixFile * pFile, struct flock *pLock) rc = 0; } } else { - rc = osFcntl(pFile->h, F_SETLK, pLock); + rc = fcntl(pFile->h, F_SETLK, pLock); } return rc; } @@ -1295,9 +733,8 @@ unixLock(sqlite3_file * id, int eFileLock) * has a SHARED or RESERVED lock, then increment reference counts and * return SQLITE_OK. */ - if (eFileLock == SHARED_LOCK && - (pInode->eFileLock == SHARED_LOCK - || pInode->eFileLock == RESERVED_LOCK)) { + if (eFileLock == SHARED_LOCK && (pInode->eFileLock == SHARED_LOCK || + pInode->eFileLock == RESERVED_LOCK)) { assert(eFileLock == SHARED_LOCK); assert(pFile->eFileLock == 0); assert(pInode->nShared > 0); @@ -1399,20 +836,6 @@ unixLock(sqlite3_file * id, int eFileLock) } } -#ifdef SQLITE_DEBUG - /* Set up the transaction-counter change checking flags when - * transitioning from a SHARED to a RESERVED lock. The change - * from SHARED to RESERVED marks the beginning of a normal - * write operation. - */ - if (rc == SQLITE_OK - && pFile->eFileLock <= SHARED_LOCK && eFileLock == RESERVED_LOCK) { - pFile->transCntrChng = 0; - pFile->dbUpdate = 0; - pFile->inNormalWrite = 1; - } -#endif - if (rc == SQLITE_OK) { pFile->eFileLock = eFileLock; pInode->eFileLock = eFileLock; @@ -1472,18 +895,6 @@ posixUnlock(sqlite3_file * id, int eFileLock, int handleNFSUnlock) if (pFile->eFileLock > SHARED_LOCK) { assert(pInode->eFileLock == pFile->eFileLock); -#ifdef SQLITE_DEBUG - /* When reducing a lock such that other processes can start - * reading the database file again, make sure that the - * transaction counter was updated if any part of the database - * file changed. If the transaction counter is not updated, - * other connections to the same file might not realize that - * the file has changed and hence might not know to flush their - * cache. The use of a stale cache can lead to database corruption. - */ - pFile->inNormalWrite = 0; -#endif - /* downgrading to a shared lock on NFS involves clearing the write lock * before establishing the readlock - to avoid a race condition we downgrade * the lock in 2 blocks, so that part of the range will be covered by a @@ -1494,50 +905,8 @@ posixUnlock(sqlite3_file * id, int eFileLock, int handleNFSUnlock) * 4: [RRRR.] */ if (eFileLock == SHARED_LOCK) { -#if !defined(__APPLE__) || !SQLITE_ENABLE_LOCKING_STYLE (void)handleNFSUnlock; assert(handleNFSUnlock == 0); -#endif -#if defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE - if (handleNFSUnlock) { - int tErrno; /* Error code from system call errors */ - off_t divSize = SHARED_SIZE - 1; - - lock.l_type = F_UNLCK; - lock.l_whence = SEEK_SET; - lock.l_start = SHARED_FIRST; - lock.l_len = divSize; - if (unixFileLock(pFile, &lock) == (-1)) { - tErrno = errno; - rc = SQLITE_IOERR_UNLOCK; - storeLastErrno(pFile, tErrno); - goto end_unlock; - } - lock.l_type = F_RDLCK; - lock.l_whence = SEEK_SET; - lock.l_start = SHARED_FIRST; - lock.l_len = divSize; - if (unixFileLock(pFile, &lock) == (-1)) { - tErrno = errno; - rc = sqliteErrorFromPosixError(tErrno, - SQLITE_IOERR_RDLOCK); - if (IS_LOCK_ERROR(rc)) { - storeLastErrno(pFile, tErrno); - } - goto end_unlock; - } - lock.l_type = F_UNLCK; - lock.l_whence = SEEK_SET; - lock.l_start = SHARED_FIRST + divSize; - lock.l_len = SHARED_SIZE - divSize; - if (unixFileLock(pFile, &lock) == (-1)) { - tErrno = errno; - rc = SQLITE_IOERR_UNLOCK; - storeLastErrno(pFile, tErrno); - goto end_unlock; - } - } else -#endif /* defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE */ { lock.l_type = F_RDLCK; lock.l_whence = SEEK_SET; @@ -1617,16 +986,12 @@ posixUnlock(sqlite3_file * id, int eFileLock, int handleNFSUnlock) static int unixUnlock(sqlite3_file * id, int eFileLock) { -#if SQLITE_MAX_MMAP_SIZE>0 assert(eFileLock == SHARED_LOCK || ((unixFile *) id)->nFetchOut == 0); -#endif return posixUnlock(id, eFileLock, 0); } -#if SQLITE_MAX_MMAP_SIZE>0 static int unixMapfile(unixFile * pFd, i64 nByte); static void unixUnmapfile(unixFile * pFd); -#endif /* * This function performs the parts of the "close file" operation @@ -1645,13 +1010,6 @@ closeUnixFile(sqlite3_file * id) robust_close(pFile, pFile->h, __LINE__); pFile->h = -1; } -#ifdef SQLITE_UNLINK_AFTER_CLOSE - if (pFile->ctrlFlags & UNIXFILE_DELETE) { - osUnlink(pFile->zPath); - sqlite3_free(*(char **)&pFile->zPath); - pFile->zPath = 0; - } -#endif OpenCounter(-1); sqlite3_free(pFile->pUnused); memset(pFile, 0, sizeof(unixFile)); @@ -1707,14 +1065,6 @@ unixClose(sqlite3_file * id) */ static int -nolockCheckReservedLock(sqlite3_file * NotUsed, int *pResOut) -{ - UNUSED_PARAMETER(NotUsed); - *pResOut = 0; - return SQLITE_OK; -} - -static int nolockLock(sqlite3_file * NotUsed, int NotUsed2) { UNUSED_PARAMETER2(NotUsed, NotUsed2); @@ -1737,4465 +1087,1647 @@ nolockClose(sqlite3_file * id) return closeUnixFile(id); } -/******************* End of the no-op lock implementation ********************* + +/******************* End of the non-op lock implementation ******************* *****************************************************************************/ /****************************************************************************** - ************************ Begin dot-file Locking ****************************** - * - * The dotfile locking implementation uses the existence of separate lock - * files (really a directory) to control access to the database. This works - * on just about every filesystem imaginable. But there are serious downsides: - * - * (1) There is zero concurrency. A single reader blocks all other - * connections from reading or writing the database. - * - * (2) An application crash or power loss can leave stale lock files - * sitting around that need to be cleared manually. - * - * Nevertheless, a dotlock is an appropriate locking mode for use if no - * other locking strategy is available. + *************** Non-locking sqlite3_file methods ***************************** * - * Dotfile locking works by creating a subdirectory in the same directory as - * the database and with the same name but with a ".lock" extension added. - * The existence of a lock directory implies an EXCLUSIVE lock. All other - * lock types (SHARED, RESERVED, PENDING) are mapped into EXCLUSIVE. - */ - -/* - * The file suffix added to the data base filename in order to create the - * lock directory. + * The next division contains implementations for all methods of the + * sqlite3_file object other than the locking methods. The locking + * methods were defined in divisions above (one locking method per + * division). Those methods that are common to all locking modes + * are gather together into this division. */ -#define DOTLOCK_SUFFIX ".lock" /* - * This routine checks if there is a RESERVED lock held on the specified - * file by this or any other process. If such a lock is held, set *pResOut - * to a non-zero value otherwise *pResOut is set to zero. The return value - * is set to SQLITE_OK unless an I/O error occurs during lock checking. + * Seek to the offset passed as the second argument, then read cnt + * bytes into pBuf. Return the number of bytes actually read. * - * In dotfile locking, either a lock exists or it does not. So in this - * variation of CheckReservedLock(), *pResOut is set to true if any lock - * is held on the file and false if the file is unlocked. + * NB: If you define USE_PREAD or USE_PREAD64, then it might also + * be necessary to define _XOPEN_SOURCE to be 500. This varies from + * one system to another. Since SQLite does not define USE_PREAD + * in any form by default, we will not attempt to define _XOPEN_SOURCE. + * See tickets #2741 and #2681. + * + * To avoid stomping the errno value on a failed read the lastErrno value + * is set before returning. */ static int -dotlockCheckReservedLock(sqlite3_file * id, int *pResOut) +seekAndRead(unixFile * id, sqlite3_int64 offset, void *pBuf, int cnt) { - int rc = SQLITE_OK; - int reserved = 0; - unixFile *pFile = (unixFile *) id; - - SimulateIOError(return SQLITE_IOERR_CHECKRESERVEDLOCK; - ); - - assert(pFile); - reserved = osAccess((const char *)pFile->lockingContext, 0) == 0; - *pResOut = reserved; - return rc; -} + int got; + int prior = 0; + i64 newOffset; -/* - * Lock the file with the lock specified by parameter eFileLock - one - * of the following: - * - * (1) SHARED_LOCK - * (2) RESERVED_LOCK - * (3) PENDING_LOCK - * (4) EXCLUSIVE_LOCK - * - * Sometimes when requesting one lock state, additional lock states - * are inserted in between. The locking might fail on one of the later - * transitions leaving the lock state different from what it started but - * still short of its goal. The following chart shows the allowed - * transitions and the inserted intermediate states: - * - * UNLOCKED -> SHARED - * SHARED -> RESERVED - * SHARED -> (PENDING) -> EXCLUSIVE - * RESERVED -> (PENDING) -> EXCLUSIVE - * PENDING -> EXCLUSIVE - * - * This routine will only increase a lock. Use the sqlite3OsUnlock() - * routine to lower a locking level. - * - * With dotfile locking, we really only support state (4): EXCLUSIVE. - * But we track the other locking levels internally. - */ -static int -dotlockLock(sqlite3_file * id, int eFileLock) -{ - unixFile *pFile = (unixFile *) id; - char *zLockFile = (char *)pFile->lockingContext; - int rc; - - /* If we have any lock, then the lock file already exists. All we have - * to do is adjust our internal record of the lock level. - */ - if (pFile->eFileLock > NO_LOCK) { - pFile->eFileLock = eFileLock; - /* Always update the timestamp on the old file */ -#ifdef HAVE_UTIME - utime(zLockFile, NULL); -#else - utimes(zLockFile, NULL); -#endif - return SQLITE_OK; - } - - /* grab an exclusive lock */ - rc = osMkdir(zLockFile, 0777); - if (rc < 0) { - /* failed to open/create the lock directory */ - int tErrno = errno; - if (EEXIST == tErrno) { - rc = SQLITE_BUSY; - } else { - rc = sqliteErrorFromPosixError(tErrno, - SQLITE_IOERR_LOCK); - if (rc != SQLITE_BUSY) { - storeLastErrno(pFile, tErrno); + assert(cnt == (cnt & 0x1ffff)); + assert(id->h > 2); + do { + newOffset = lseek(id->h, offset, SEEK_SET); + SimulateIOError(newOffset = -1); + if (newOffset < 0) { + storeLastErrno((unixFile *) id, errno); + return -1; + } + got = read(id->h, pBuf, cnt); + if (got == cnt) + break; + if (got < 0) { + if (errno == EINTR) { + got = 1; + continue; } + prior = 0; + storeLastErrno((unixFile *) id, errno); + break; + } else if (got > 0) { + cnt -= got; + offset += got; + prior += got; + pBuf = (void *)(got + (char *)pBuf); } - return rc; - } - - /* got it, set the type and return ok */ - pFile->eFileLock = eFileLock; - return rc; + } while (got > 0); + return got + prior; } /* - * Lower the locking level on file descriptor pFile to eFileLock. eFileLock - * must be either NO_LOCK or SHARED_LOCK. - * - * If the locking level of the file descriptor is already at or below - * the requested locking level, this routine is a no-op. - * - * When the locking level reaches NO_LOCK, delete the lock file. + * Read data from a file into a buffer. Return SQLITE_OK if all + * bytes were read successfully and SQLITE_IOERR if anything goes + * wrong. */ static int -dotlockUnlock(sqlite3_file * id, int eFileLock) +unixRead(sqlite3_file * id, void *pBuf, int amt, sqlite3_int64 offset) { unixFile *pFile = (unixFile *) id; - char *zLockFile = (char *)pFile->lockingContext; - int rc; - - assert(pFile); - assert(eFileLock <= SHARED_LOCK); - - /* no-op if possible */ - if (pFile->eFileLock == eFileLock) { - return SQLITE_OK; - } + int got; + assert(id); + assert(offset >= 0); + assert(amt > 0); - /* To downgrade to shared, simply update our internal notion of the - * lock state. No need to mess with the file on disk. +#if SQLITE_MAX_MMAP_SIZE>0 + /* Deal with as much of this read request as possible by transfering + * data from the memory mapping using memcpy(). */ - if (eFileLock == SHARED_LOCK) { - pFile->eFileLock = SHARED_LOCK; - return SQLITE_OK; - } - - /* To fully unlock the database, delete the lock file */ - assert(eFileLock == NO_LOCK); - rc = osRmdir(zLockFile); - if (rc < 0) { - int tErrno = errno; - if (tErrno == ENOENT) { - rc = SQLITE_OK; + if (offset < pFile->mmapSize) { + if (offset + amt <= pFile->mmapSize) { + memcpy(pBuf, &((u8 *) (pFile->pMapRegion))[offset], + amt); + return SQLITE_OK; } else { - rc = SQLITE_IOERR_UNLOCK; - storeLastErrno(pFile, tErrno); + int nCopy = pFile->mmapSize - offset; + memcpy(pBuf, &((u8 *) (pFile->pMapRegion))[offset], + nCopy); + pBuf = &((u8 *) pBuf)[nCopy]; + amt -= nCopy; + offset += nCopy; } - return rc; } - pFile->eFileLock = NO_LOCK; - return SQLITE_OK; +#endif + + got = seekAndRead(pFile, offset, pBuf, amt); + if (got == amt) { + return SQLITE_OK; + } else if (got < 0) { + /* lastErrno set by seekAndRead */ + return SQLITE_IOERR_READ; + } else { + storeLastErrno(pFile, 0); /* not a system error */ + /* Unread parts of the buffer must be zero-filled */ + memset(&((char *)pBuf)[got], 0, amt - got); + return SQLITE_IOERR_SHORT_READ; + } } /* - * Close a file. Make sure the lock has been released before closing. + * Attempt to seek the file-descriptor passed as the first argument to + * absolute offset iOff, then attempt to write nBuf bytes of data from + * pBuf to it. If an error occurs, return -1 and set *piErrno. Otherwise, + * return the actual number of bytes written (which may be less than + * nBuf). */ static int -dotlockClose(sqlite3_file * id) +seekAndWriteFd(int fd, /* File descriptor to write to */ + i64 iOff, /* File offset to begin writing at */ + const void *pBuf, /* Copy data from this buffer to the file */ + int nBuf, /* Size of buffer pBuf in bytes */ + int *piErrno /* OUT: Error number if error occurs */ + ) { - unixFile *pFile = (unixFile *) id; - assert(id != 0); - dotlockUnlock(id, NO_LOCK); - sqlite3_free(pFile->lockingContext); - return closeUnixFile(id); -} + int rc = 0; /* Value returned by system call */ -/****************** End of the dot-file lock implementation ******************* - *****************************************************************************/ + assert(nBuf == (nBuf & 0x1ffff)); + assert(fd > 2); + assert(piErrno != 0); + nBuf &= 0x1ffff; + do { + i64 iSeek = lseek(fd, iOff, SEEK_SET); + SimulateIOError(iSeek = -1); + if (iSeek < 0) { + rc = -1; + break; + } + rc = write(fd, pBuf, nBuf); + } while (rc < 0 && errno == EINTR); -/****************************************************************************** - ************************* Begin flock Locking ******************************** - * - * Use the flock() system call to do file locking. - * - * flock() locking is like dot-file locking in that the various - * fine-grain locking levels supported by SQLite are collapsed into - * a single exclusive lock. In other words, SHARED, RESERVED, and - * PENDING locks are the same thing as an EXCLUSIVE lock. SQLite - * still works when you do this, but concurrency is reduced since - * only a single process can be reading the database at a time. - * - * Omit this section if SQLITE_ENABLE_LOCKING_STYLE is turned off - */ -#if SQLITE_ENABLE_LOCKING_STYLE + if (rc < 0) + *piErrno = errno; + return rc; +} /* - * Retry flock() calls that fail with EINTR + * Seek to the offset in id->offset then read cnt bytes into pBuf. + * Return the number of bytes actually read. Update the offset. + * + * To avoid stomping the errno value on a failed write the lastErrno value + * is set before returning. */ -#ifdef EINTR static int -robust_flock(int fd, int op) +seekAndWrite(unixFile * id, i64 offset, const void *pBuf, int cnt) { - int rc; - do { - rc = flock(fd, op); - } while (rc < 0 && errno == EINTR); - return rc; + return seekAndWriteFd(id->h, offset, pBuf, cnt, &id->lastErrno); } -#else -#define robust_flock(a,b) flock(a,b) -#endif /* - * This routine checks if there is a RESERVED lock held on the specified - * file by this or any other process. If such a lock is held, set *pResOut - * to a non-zero value otherwise *pResOut is set to zero. The return value - * is set to SQLITE_OK unless an I/O error occurs during lock checking. + * Write data from a buffer into a file. Return SQLITE_OK on success + * or some other error code on failure. */ static int -flockCheckReservedLock(sqlite3_file * id, int *pResOut) +unixWrite(sqlite3_file * id, const void *pBuf, int amt, sqlite3_int64 offset) { - int rc = SQLITE_OK; - int reserved = 0; unixFile *pFile = (unixFile *) id; + int wrote = 0; + assert(id); + assert(amt > 0); - SimulateIOError(return SQLITE_IOERR_CHECKRESERVEDLOCK; - ); - - assert(pFile); - - /* Check if a thread in this process holds such a lock */ - if (pFile->eFileLock > SHARED_LOCK) { - reserved = 1; + while ((wrote = seekAndWrite(pFile, offset, pBuf, amt)) < amt + && wrote > 0) { + amt -= wrote; + offset += wrote; + pBuf = &((char *)pBuf)[wrote]; } + SimulateIOError((wrote = (-1), amt = 1)); + SimulateDiskfullError((wrote = 0, amt = 1)); - /* Otherwise see if some other process holds it. */ - if (!reserved) { - /* attempt to get the lock */ - int lrc = robust_flock(pFile->h, LOCK_EX | LOCK_NB); - if (!lrc) { - /* got the lock, unlock it */ - lrc = robust_flock(pFile->h, LOCK_UN); - if (lrc) { - int tErrno = errno; - /* unlock failed with an error */ - lrc = SQLITE_IOERR_UNLOCK; - storeLastErrno(pFile, tErrno); - rc = lrc; - } + if (amt > wrote) { + if (wrote < 0 && pFile->lastErrno != ENOSPC) { + /* lastErrno set by seekAndWrite */ + return SQLITE_IOERR_WRITE; } else { - int tErrno = errno; - reserved = 1; - /* someone else might have it reserved */ - lrc = - sqliteErrorFromPosixError(tErrno, - SQLITE_IOERR_LOCK); - if (IS_LOCK_ERROR(lrc)) { - storeLastErrno(pFile, tErrno); - rc = lrc; - } + storeLastErrno(pFile, 0); /* not a system error */ + return SQLITE_FULL; } } -#ifdef SQLITE_IGNORE_FLOCK_LOCK_ERRORS - if ((rc & SQLITE_IOERR) == SQLITE_IOERR) { - rc = SQLITE_OK; - reserved = 1; - } -#endif /* SQLITE_IGNORE_FLOCK_LOCK_ERRORS */ - *pResOut = reserved; - return rc; + return SQLITE_OK; } +#ifdef SQLITE_TEST /* - * Lock the file with the lock specified by parameter eFileLock - one - * of the following: - * - * (1) SHARED_LOCK - * (2) RESERVED_LOCK - * (3) PENDING_LOCK - * (4) EXCLUSIVE_LOCK - * - * Sometimes when requesting one lock state, additional lock states - * are inserted in between. The locking might fail on one of the later - * transitions leaving the lock state different from what it started but - * still short of its goal. The following chart shows the allowed - * transitions and the inserted intermediate states: - * - * UNLOCKED -> SHARED - * SHARED -> RESERVED - * SHARED -> (PENDING) -> EXCLUSIVE - * RESERVED -> (PENDING) -> EXCLUSIVE - * PENDING -> EXCLUSIVE + * Count the number of fullsyncs and normal syncs. This is used to test + * that syncs and fullsyncs are occurring at the right times. + */ +int sqlite3_sync_count = 0; +int sqlite3_fullsync_count = 0; +#endif + + +/* + * The fsync() system call does not work as advertised on many + * unix systems. The following procedure is an attempt to make + * it work better. * - * flock() only really support EXCLUSIVE locks. We track intermediate - * lock states in the sqlite3_file structure, but all locks SHARED or - * above are really EXCLUSIVE locks and exclude all other processes from - * access the file. + * The SQLITE_NO_SYNC macro disables all fsync()s. This is useful + * for testing when we want to run through the test suite quickly. + * You are strongly advised *not* to deploy with SQLITE_NO_SYNC + * enabled, however, since with SQLITE_NO_SYNC enabled, an OS crash + * or power failure will likely corrupt the database file. * - * This routine will only increase a lock. Use the sqlite3OsUnlock() - * routine to lower a locking level. + * SQLite sets the dataOnly flag if the size of the file is unchanged. + * The idea behind dataOnly is that it should only write the file content + * to disk, not the inode. We only set dataOnly if the file size is + * unchanged since the file size is part of the inode. However, + * Ted Ts'o tells us that fdatasync() will also write the inode if the + * file size has changed. The only real difference between fdatasync() + * and fsync(), Ted tells us, is that fdatasync() will not flush the + * inode if the mtime or owner or other inode attributes have changed. + * We only care about the file size, not the other file attributes, so + * as far as SQLite is concerned, an fdatasync() is always adequate. + * So, we always use fdatasync() if it is available, regardless of + * the value of the dataOnly flag. */ static int -flockLock(sqlite3_file * id, int eFileLock) +full_fsync(int fd, int fullSync, int dataOnly) { - int rc = SQLITE_OK; - unixFile *pFile = (unixFile *) id; - - assert(pFile); + UNUSED_PARAMETER(fd); + UNUSED_PARAMETER(fullSync); + UNUSED_PARAMETER(dataOnly); - /* if we already have a lock, it is exclusive. - * Just adjust level and punt on outta here. + /* Record the number of times that we do a normal fsync() and + * FULLSYNC. This is used during testing to verify that this procedure + * gets called with the correct arguments. */ - if (pFile->eFileLock > NO_LOCK) { - pFile->eFileLock = eFileLock; - return SQLITE_OK; - } - - /* grab an exclusive lock */ - - if (robust_flock(pFile->h, LOCK_EX | LOCK_NB)) { - int tErrno = errno; - /* didn't get, must be busy */ - rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_LOCK); - if (IS_LOCK_ERROR(rc)) { - storeLastErrno(pFile, tErrno); - } - } else { - /* got it, set the type and return ok */ - pFile->eFileLock = eFileLock; - } +#ifdef SQLITE_TEST + if (fullSync) + sqlite3_fullsync_count++; + sqlite3_sync_count++; +#endif -#ifdef SQLITE_IGNORE_FLOCK_LOCK_ERRORS - if ((rc & SQLITE_IOERR) == SQLITE_IOERR) { - rc = SQLITE_BUSY; - } -#endif /* SQLITE_IGNORE_FLOCK_LOCK_ERRORS */ - return rc; + struct stat buf; + return fstat(fd, &buf); } /* - * Lower the locking level on file descriptor pFile to eFileLock. eFileLock - * must be either NO_LOCK or SHARED_LOCK. + * Open a file descriptor to the directory containing file zFilename. + * If successful, *pFd is set to the opened file descriptor and + * SQLITE_OK is returned. If an error occurs, either SQLITE_NOMEM + * or SQLITE_CANTOPEN is returned and *pFd is set to an undefined + * value. * - * If the locking level of the file descriptor is already at or below - * the requested locking level, this routine is a no-op. + * The directory file descriptor is used for only one thing - to + * fsync() a directory to make sure file creation and deletion events + * are flushed to disk. Such fsyncs are not needed on newer + * journaling filesystems, but are required on older filesystems. + * + * This routine can be overridden using the xSetSysCall interface. + * The ability to override this routine was added in support of the + * chromium sandbox. Opening a directory is a security risk (we are + * told) so making it overrideable allows the chromium sandbox to + * replace this routine with a harmless no-op. To make this routine + * a no-op, replace it with a stub that returns SQLITE_OK but leaves + * *pFd set to a negative number. + * + * If SQLITE_OK is returned, the caller is responsible for closing + * the file descriptor *pFd using close(). */ static int -flockUnlock(sqlite3_file * id, int eFileLock) +openDirectory(const char *zFilename, int *pFd) { - unixFile *pFile = (unixFile *) id; - - assert(pFile); - assert(eFileLock <= SHARED_LOCK); - - /* no-op if possible */ - if (pFile->eFileLock == eFileLock) { - return SQLITE_OK; - } + int ii; + int fd; + char zDirname[MAX_PATHNAME + 1]; - /* shared can just be set because we always have an exclusive */ - if (eFileLock == SHARED_LOCK) { - pFile->eFileLock = eFileLock; - return SQLITE_OK; + sqlite3_snprintf(MAX_PATHNAME, zDirname, "%s", zFilename); + for (ii = (int)strlen(zDirname); ii > 0 && zDirname[ii] != '/'; ii--) ; + if (ii > 0) { + zDirname[ii] = '\0'; + } else { + if (zDirname[0] != '/') + zDirname[0] = '.'; + zDirname[1] = 0; } + fd = robust_open(zDirname, O_RDONLY | O_BINARY, 0); - /* no, really, unlock. */ - if (robust_flock(pFile->h, LOCK_UN)) { -#ifdef SQLITE_IGNORE_FLOCK_LOCK_ERRORS - return SQLITE_OK; -#endif /* SQLITE_IGNORE_FLOCK_LOCK_ERRORS */ - return SQLITE_IOERR_UNLOCK; - } else { - pFile->eFileLock = NO_LOCK; + *pFd = fd; + if (fd >= 0) return SQLITE_OK; - } + return unixLogError(SQLITE_CANTOPEN_BKPT, "openDirectory", zDirname); } /* - * Close a file. + * Make sure all writes to a particular file are committed to disk. + * + * If dataOnly==0 then both the file itself and its metadata (file + * size, access time, etc) are synced. If dataOnly!=0 then only the + * file data is synced. + * + * Under Unix, also make sure that the directory entry for the file + * has been created by fsync-ing the directory that contains the file. + * If we do not do this and we encounter a power failure, the directory + * entry for the journal might not exist after we reboot. The next + * SQLite to access the file will not know that the journal exists (because + * the directory entry for the journal was never created) and the transaction + * will not roll back - possibly leading to database corruption. */ static int -flockClose(sqlite3_file * id) +unixSync(sqlite3_file * id, int flags) { - assert(id != 0); - flockUnlock(id, NO_LOCK); - return closeUnixFile(id); -} - -#endif /* SQLITE_ENABLE_LOCKING_STYLE */ - -/******************* End of the flock lock implementation ********************* - *****************************************************************************/ + int rc; + unixFile *pFile = (unixFile *) id; + int isDataOnly = (flags & SQLITE_SYNC_DATAONLY); + int isFullsync = (flags & 0x0F) == SQLITE_SYNC_FULL; -/****************************************************************************** - ************************** Begin AFP Locking ********************************* - * - * AFP is the Apple Filing Protocol. AFP is a network filesystem found - * on Apple Macintosh computers - both OS9 and OSX. - * - * Third-party implementations of AFP are available. But this code here - * only works on OSX. - */ + /* Check that one of SQLITE_SYNC_NORMAL or FULL was passed */ + assert((flags & 0x0F) == SQLITE_SYNC_NORMAL + || (flags & 0x0F) == SQLITE_SYNC_FULL); -#if defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE -/* - * The afpLockingContext structure contains all afp lock specific state - */ -typedef struct afpLockingContext afpLockingContext; -struct afpLockingContext { - int reserved; - const char *dbPath; /* Name of the open file */ -}; + /* Unix cannot, but some systems may return SQLITE_FULL from here. This + * line is to test that doing so does not cause any problems. + */ + SimulateDiskfullError(return SQLITE_FULL); -struct ByteRangeLockPB2 { - unsigned long long offset; /* offset to first byte to lock */ - unsigned long long length; /* nbr of bytes to lock */ - unsigned long long retRangeStart; /* nbr of 1st byte locked if successful */ - unsigned char unLockFlag; /* 1 = unlock, 0 = lock */ - unsigned char startEndFlag; /* 1=rel to end of fork, 0=rel to start */ - int fd; /* file desc to assoc this lock with */ -}; + assert(pFile); + rc = full_fsync(pFile->h, isFullsync, isDataOnly); + SimulateIOError(rc = 1); + if (rc) { + storeLastErrno(pFile, errno); + return unixLogError(SQLITE_IOERR_FSYNC, "full_fsync", + pFile->zPath); + } -#define afpfsByteRangeLock2FSCTL _IOWR('z', 23, struct ByteRangeLockPB2) + return rc; +} /* - * This is a utility for setting or clearing a bit-range lock on an - * AFP filesystem. - * - * Return SQLITE_OK on success, SQLITE_BUSY on failure. + * Truncate an open file to a specified size */ static int -afpSetLock(const char *path, /* Name of the file to be locked or unlocked */ - unixFile * pFile, /* Open file descriptor on path */ - unsigned long long offset, /* First byte to be locked */ - unsigned long long length, /* Number of bytes to lock */ - int setLockFlag /* True to set lock. False to clear lock */ - ) +unixTruncate(sqlite3_file * id, i64 nByte) { - struct ByteRangeLockPB2 pb; - int err; - - pb.unLockFlag = setLockFlag ? 0 : 1; - pb.startEndFlag = 0; - pb.offset = offset; - pb.length = length; - pb.fd = pFile->h; + unixFile *pFile = (unixFile *) id; + int rc; + assert(pFile); + SimulateIOError(return SQLITE_IOERR_TRUNCATE); - err = fsctl(path, afpfsByteRangeLock2FSCTL, &pb, 0); - if (err == -1) { - int rc; - int tErrno = errno; + /* If the user has configured a chunk-size for this file, truncate the + * file so that it consists of an integer number of chunks (i.e. the + * actual file size after the operation may be larger than the requested + * size). + */ + if (pFile->szChunk > 0) { + nByte = + ((nByte + pFile->szChunk - + 1) / pFile->szChunk) * pFile->szChunk; + } -#ifdef SQLITE_IGNORE_AFP_LOCK_ERRORS - rc = SQLITE_BUSY; -#else - rc = sqliteErrorFromPosixError(tErrno, - setLockFlag ? SQLITE_IOERR_LOCK : - SQLITE_IOERR_UNLOCK); -#endif /* SQLITE_IGNORE_AFP_LOCK_ERRORS */ - if (IS_LOCK_ERROR(rc)) { - storeLastErrno(pFile, tErrno); - } - return rc; + rc = robust_ftruncate(pFile->h, nByte); + if (rc) { + storeLastErrno(pFile, errno); + return unixLogError(SQLITE_IOERR_TRUNCATE, "ftruncate", + pFile->zPath); } else { + /* If the file was just truncated to a size smaller than the currently + * mapped region, reduce the effective mapping size as well. SQLite will + * use read() and write() to access data beyond this point from now on. + */ + if (nByte < pFile->mmapSize) + pFile->mmapSize = nByte; + return SQLITE_OK; } } /* - * This routine checks if there is a RESERVED lock held on the specified - * file by this or any other process. If such a lock is held, set *pResOut - * to a non-zero value otherwise *pResOut is set to zero. The return value - * is set to SQLITE_OK unless an I/O error occurs during lock checking. + * Determine the current size of a file in bytes */ static int -afpCheckReservedLock(sqlite3_file * id, int *pResOut) -{ - int rc = SQLITE_OK; - int reserved = 0; - unixFile *pFile = (unixFile *) id; - afpLockingContext *context; +unixFileSize(sqlite3_file * id, i64 * pSize) { + int rc; + struct stat buf; + assert(id); + rc = fstat(((unixFile *) id)->h, &buf); + SimulateIOError(rc = 1); + if (rc != 0) { + storeLastErrno((unixFile *) id, errno); + return SQLITE_IOERR_FSTAT; + } + *pSize = buf.st_size; - SimulateIOError(return SQLITE_IOERR_CHECKRESERVEDLOCK; - ); + /* When opening a zero-size database, the findInodeInfo() procedure + * writes a single byte into that file in order to work around a bug + * in the OS-X msdos filesystem. In order to avoid problems with upper + * layers, we need to report this file size as zero even though it is + * really 1. Ticket #3260. + */ + if (*pSize == 1) + *pSize = 0; - assert(pFile); - context = (afpLockingContext *) pFile->lockingContext; - if (context->reserved) { - *pResOut = 1; - return SQLITE_OK; - } + return SQLITE_OK; +} - /* Check if a thread in this process holds such a lock */ - if (pFile->pInode->eFileLock > SHARED_LOCK) { - reserved = 1; - } +/* + * This function is called to handle the SQLITE_FCNTL_SIZE_HINT + * file-control operation. Enlarge the database to nBytes in size + * (rounded up to the next chunk-size). If the database is already + * nBytes or larger, this routine is a no-op. + */ +static int +fcntlSizeHint(unixFile * pFile, i64 nByte) +{ + if (pFile->szChunk > 0) { + i64 nSize; /* Required file size */ + struct stat buf; /* Used to hold return values of fstat() */ - /* Otherwise see if some other process holds it. - */ - if (!reserved) { - /* lock the RESERVED byte */ - int lrc = - afpSetLock(context->dbPath, pFile, RESERVED_BYTE, 1, 1); - if (SQLITE_OK == lrc) { - /* if we succeeded in taking the reserved lock, unlock it to restore - * the original state - */ - lrc = - afpSetLock(context->dbPath, pFile, RESERVED_BYTE, 1, - 0); - } else { - /* if we failed to get the lock then someone else must have it */ - reserved = 1; + if (fstat(pFile->h, &buf)) + return SQLITE_IOERR_FSTAT; + + nSize = + ((nByte + pFile->szChunk - + 1) / pFile->szChunk) * pFile->szChunk; + if (nSize > (i64) buf.st_size) { + int nBlk = buf.st_blksize; /* File-system block size */ + int nWrite = 0; /* Number of bytes written by seekAndWrite */ + i64 iWrite; /* Next offset to write to */ + + iWrite = (buf.st_size / nBlk) * nBlk + nBlk - 1; + assert(iWrite >= buf.st_size); + assert(((iWrite + 1) % nBlk) == 0); + for ( /*no-op */ ; iWrite < nSize + nBlk - 1; + iWrite += nBlk) { + if (iWrite >= nSize) + iWrite = nSize - 1; + nWrite = seekAndWrite(pFile, iWrite, "", 1); + if (nWrite != 1) + return SQLITE_IOERR_WRITE; + } } - if (IS_LOCK_ERROR(lrc)) { - rc = lrc; + } + if (pFile->mmapSizeMax > 0 && nByte > pFile->mmapSize) { + int rc; + if (pFile->szChunk <= 0) { + if (robust_ftruncate(pFile->h, nByte)) { + storeLastErrno(pFile, errno); + return unixLogError(SQLITE_IOERR_TRUNCATE, + "ftruncate", pFile->zPath); + } } + + rc = unixMapfile(pFile, nByte); + return rc; } - *pResOut = reserved; - return rc; + return SQLITE_OK; } +/* Forward declaration */ +static int unixGetTempname(int nBuf, char *zBuf); + /* - * Lock the file with the lock specified by parameter eFileLock - one - * of the following: - * - * (1) SHARED_LOCK - * (2) RESERVED_LOCK - * (3) PENDING_LOCK - * (4) EXCLUSIVE_LOCK - * - * Sometimes when requesting one lock state, additional lock states - * are inserted in between. The locking might fail on one of the later - * transitions leaving the lock state different from what it started but - * still short of its goal. The following chart shows the allowed - * transitions and the inserted intermediate states: - * - * UNLOCKED -> SHARED - * SHARED -> RESERVED - * SHARED -> (PENDING) -> EXCLUSIVE - * RESERVED -> (PENDING) -> EXCLUSIVE - * PENDING -> EXCLUSIVE - * - * This routine will only increase a lock. Use the sqlite3OsUnlock() - * routine to lower a locking level. + * Information and control of an open file handle. */ static int -afpLock(sqlite3_file * id, int eFileLock) +unixFileControl(sqlite3_file * id, int op, void *pArg) { - int rc = SQLITE_OK; unixFile *pFile = (unixFile *) id; - unixInodeInfo *pInode = pFile->pInode; - afpLockingContext *context = - (afpLockingContext *) pFile->lockingContext; - - assert(pFile); - - /* If there is already a lock of this type or more restrictive on the - * unixFile, do nothing. - */ - if (pFile->eFileLock >= eFileLock) { - return SQLITE_OK; - } - - /* Make sure the locking sequence is correct - * (1) We never move from unlocked to anything higher than shared lock. - * (2) SQLite never explicitly requests a pendig lock. - * (3) A shared lock is always held when a reserve lock is requested. - */ - assert(pFile->eFileLock != NO_LOCK || eFileLock == SHARED_LOCK); - assert(eFileLock != PENDING_LOCK); - assert(eFileLock != RESERVED_LOCK || pFile->eFileLock == SHARED_LOCK); - - pInode = pFile->pInode; - - /* If some thread using this PID has a lock via a different unixFile* - * handle that precludes the requested lock, return BUSY. - */ - if ((pFile->eFileLock != pInode->eFileLock && - (pInode->eFileLock >= PENDING_LOCK || eFileLock > SHARED_LOCK)) - ) { - rc = SQLITE_BUSY; - goto afp_end_lock; - } - - /* If a SHARED lock is requested, and some thread using this PID already - * has a SHARED or RESERVED lock, then increment reference counts and - * return SQLITE_OK. - */ - if (eFileLock == SHARED_LOCK && - (pInode->eFileLock == SHARED_LOCK - || pInode->eFileLock == RESERVED_LOCK)) { - assert(eFileLock == SHARED_LOCK); - assert(pFile->eFileLock == 0); - assert(pInode->nShared > 0); - pFile->eFileLock = SHARED_LOCK; - pInode->nShared++; - pInode->nLock++; - goto afp_end_lock; - } - - /* A PENDING lock is needed before acquiring a SHARED lock and before - * acquiring an EXCLUSIVE lock. For the SHARED lock, the PENDING will - * be released. - */ - if (eFileLock == SHARED_LOCK - || (eFileLock == EXCLUSIVE_LOCK && pFile->eFileLock < PENDING_LOCK) - ) { - int failed; - failed = afpSetLock(context->dbPath, pFile, PENDING_BYTE, 1, 1); - if (failed) { - rc = failed; - goto afp_end_lock; + switch (op) { + case SQLITE_FCNTL_LOCKSTATE:{ + *(int *)pArg = pFile->eFileLock; + return SQLITE_OK; } - } - - /* If control gets to this point, then actually go ahead and make - * operating system calls for the specified lock. - */ - if (eFileLock == SHARED_LOCK) { - int lrc1, lrc2, lrc1Errno = 0; - long lk, mask; - - assert(pInode->nShared == 0); - assert(pInode->eFileLock == 0); - - mask = (sizeof(long) == 8) ? LARGEST_INT64 : 0x7fffffff; - /* Now get the read-lock SHARED_LOCK */ - /* note that the quality of the randomness doesn't matter that much */ - lk = random(); - pInode->sharedByte = (lk & mask) % (SHARED_SIZE - 1); - lrc1 = afpSetLock(context->dbPath, pFile, - SHARED_FIRST + pInode->sharedByte, 1, 1); - if (IS_LOCK_ERROR(lrc1)) { - lrc1Errno = pFile->lastErrno; + case SQLITE_FCNTL_LAST_ERRNO:{ + *(int *)pArg = pFile->lastErrno; + return SQLITE_OK; } - /* Drop the temporary PENDING lock */ - lrc2 = afpSetLock(context->dbPath, pFile, PENDING_BYTE, 1, 0); - - if (IS_LOCK_ERROR(lrc1)) { - storeLastErrno(pFile, lrc1Errno); - rc = lrc1; - goto afp_end_lock; - } else if (IS_LOCK_ERROR(lrc2)) { - rc = lrc2; - goto afp_end_lock; - } else if (lrc1 != SQLITE_OK) { - rc = lrc1; - } else { - pFile->eFileLock = SHARED_LOCK; - pInode->nLock++; - pInode->nShared = 1; + case SQLITE_FCNTL_CHUNK_SIZE:{ + pFile->szChunk = *(int *)pArg; + return SQLITE_OK; } - } else if (eFileLock == EXCLUSIVE_LOCK && pInode->nShared > 1) { - /* We are trying for an exclusive lock but another thread in this - * same process is still holding a shared lock. - */ - rc = SQLITE_BUSY; - } else { - /* The request was for a RESERVED or EXCLUSIVE lock. It is - * assumed that there is a SHARED or greater lock on the file - * already. - */ - int failed = 0; - assert(0 != pFile->eFileLock); - if (eFileLock >= RESERVED_LOCK - && pFile->eFileLock < RESERVED_LOCK) { - /* Acquire a RESERVED lock */ - failed = - afpSetLock(context->dbPath, pFile, RESERVED_BYTE, 1, - 1); - if (!failed) { - context->reserved = 1; + case SQLITE_FCNTL_SIZE_HINT:{ + int rc; + SimulateIOErrorBenign(1); + rc = fcntlSizeHint(pFile, *(i64 *) pArg); + SimulateIOErrorBenign(0); + return rc; + } + case SQLITE_FCNTL_VFSNAME:{ + *(char **)pArg = + sqlite3_mprintf("%s", pFile->pVfs->zName); + return SQLITE_OK; + } + case SQLITE_FCNTL_TEMPFILENAME:{ + char *zTFile = + sqlite3_malloc64(pFile->pVfs->mxPathname); + if (zTFile) { + unixGetTempname(pFile->pVfs->mxPathname, + zTFile); + *(char **)pArg = zTFile; } + return SQLITE_OK; } - if (!failed && eFileLock == EXCLUSIVE_LOCK) { - /* Acquire an EXCLUSIVE lock */ - - /* Remove the shared lock before trying the range. we'll need to - * reestablish the shared lock if we can't get the afpUnlock - */ - if (! - (failed = - afpSetLock(context->dbPath, pFile, - SHARED_FIRST + pInode->sharedByte, 1, - 0))) { - int failed2 = SQLITE_OK; - /* now attemmpt to get the exclusive lock range */ - failed = - afpSetLock(context->dbPath, pFile, - SHARED_FIRST, SHARED_SIZE, 1); - if (failed - && (failed2 = - afpSetLock(context->dbPath, pFile, - SHARED_FIRST + - pInode->sharedByte, 1, 1))) { - /* Can't reestablish the shared lock. Sqlite can't deal, this is - * a critical I/O error - */ - rc = ((failed & SQLITE_IOERR) == - SQLITE_IOERR) ? failed2 : - SQLITE_IOERR_LOCK; - goto afp_end_lock; + case SQLITE_FCNTL_HAS_MOVED:{ + *(int *)pArg = fileHasMoved(pFile); + return SQLITE_OK; + } + case SQLITE_FCNTL_MMAP_SIZE:{ + i64 newLimit = *(i64 *) pArg; + int rc = SQLITE_OK; + if (newLimit > sqlite3GlobalConfig.mxMmap) { + newLimit = sqlite3GlobalConfig.mxMmap; + } + *(i64 *) pArg = pFile->mmapSizeMax; + if (newLimit >= 0 && newLimit != pFile->mmapSizeMax + && pFile->nFetchOut == 0) { + pFile->mmapSizeMax = newLimit; + if (pFile->mmapSize > 0) { + unixUnmapfile(pFile); + rc = unixMapfile(pFile, -1); } - } else { - rc = failed; } + return rc; } - if (failed) { - rc = failed; - } - } - - if (rc == SQLITE_OK) { - pFile->eFileLock = eFileLock; - pInode->eFileLock = eFileLock; - } else if (eFileLock == EXCLUSIVE_LOCK) { - pFile->eFileLock = PENDING_LOCK; - pInode->eFileLock = PENDING_LOCK; } - - afp_end_lock: - return rc; + return SQLITE_NOTFOUND; } /* - * Lower the locking level on file descriptor pFile to eFileLock. eFileLock - * must be either NO_LOCK or SHARED_LOCK. + * Return the sector size in bytes of the underlying block device for + * the specified file. This is almost always 512 bytes, but may be + * larger for some devices. * - * If the locking level of the file descriptor is already at or below - * the requested locking level, this routine is a no-op. + * SQLite code assumes this function cannot fail. It also assumes that + * if two files are created in the same file-system directory (i.e. + * a database and its journal file) that the sector size will be the + * same for both. */ static int -afpUnlock(sqlite3_file * id, int eFileLock) +unixSectorSize(sqlite3_file * NotUsed) { - int rc = SQLITE_OK; - unixFile *pFile = (unixFile *) id; - unixInodeInfo *pInode; - afpLockingContext *context = - (afpLockingContext *) pFile->lockingContext; - int skipShared = 0; -#ifdef SQLITE_TEST - int h = pFile->h; -#endif - - assert(pFile); - - assert(eFileLock <= SHARED_LOCK); - if (pFile->eFileLock <= eFileLock) { - return SQLITE_OK; - } - pInode = pFile->pInode; - assert(pInode->nShared != 0); - if (pFile->eFileLock > SHARED_LOCK) { - assert(pInode->eFileLock == pFile->eFileLock); - SimulateIOErrorBenign(1); - SimulateIOError(h = (-1)) - SimulateIOErrorBenign(0); - -#ifdef SQLITE_DEBUG - /* When reducing a lock such that other processes can start - * reading the database file again, make sure that the - * transaction counter was updated if any part of the database - * file changed. If the transaction counter is not updated, - * other connections to the same file might not realize that - * the file has changed and hence might not know to flush their - * cache. The use of a stale cache can lead to database corruption. - */ - assert(pFile->inNormalWrite == 0 - || pFile->dbUpdate == 0 || pFile->transCntrChng == 1); - pFile->inNormalWrite = 0; -#endif - - if (pFile->eFileLock == EXCLUSIVE_LOCK) { - rc = afpSetLock(context->dbPath, pFile, SHARED_FIRST, - SHARED_SIZE, 0); - if (rc == SQLITE_OK - && (eFileLock == SHARED_LOCK - || pInode->nShared > 1)) { - /* only re-establish the shared lock if necessary */ - int sharedLockByte = - SHARED_FIRST + pInode->sharedByte; - rc = afpSetLock(context->dbPath, pFile, - sharedLockByte, 1, 1); - } else { - skipShared = 1; - } - } - if (rc == SQLITE_OK && pFile->eFileLock >= PENDING_LOCK) { - rc = afpSetLock(context->dbPath, pFile, PENDING_BYTE, 1, - 0); - } - if (rc == SQLITE_OK && pFile->eFileLock >= RESERVED_LOCK - && context->reserved) { - rc = afpSetLock(context->dbPath, pFile, RESERVED_BYTE, - 1, 0); - if (!rc) { - context->reserved = 0; - } - } - if (rc == SQLITE_OK - && (eFileLock == SHARED_LOCK || pInode->nShared > 1)) { - pInode->eFileLock = SHARED_LOCK; - } - } - if (rc == SQLITE_OK && eFileLock == NO_LOCK) { - - /* Decrement the shared lock counter. Release the lock using an - * OS call only when all threads in this same process have released - * the lock. - */ - unsigned long long sharedLockByte = - SHARED_FIRST + pInode->sharedByte; - pInode->nShared--; - if (pInode->nShared == 0) { - SimulateIOErrorBenign(1); - SimulateIOError(h = (-1)) - SimulateIOErrorBenign(0); - if (!skipShared) { - rc = afpSetLock(context->dbPath, pFile, - sharedLockByte, 1, 0); - } - if (!rc) { - pInode->eFileLock = NO_LOCK; - pFile->eFileLock = NO_LOCK; - } - } - if (rc == SQLITE_OK) { - pInode->nLock--; - assert(pInode->nLock >= 0); - if (pInode->nLock == 0) { - closePendingFds(pFile); - } - } - } - - if (rc == SQLITE_OK) - pFile->eFileLock = eFileLock; - return rc; + UNUSED_PARAMETER(NotUsed); + return SQLITE_DEFAULT_SECTOR_SIZE; } /* - * Close a file & cleanup AFP specific locking context + * Return the device characteristics for the file. + * + * This VFS is set up to return SQLITE_IOCAP_POWERSAFE_OVERWRITE by default. + * However, that choice is controversial since technically the underlying + * file system does not always provide powersafe overwrites. (In other + * words, after a power-loss event, parts of the file that were never + * written might end up being altered.) However, non-PSOW behavior is very, + * very rare. And asserting PSOW makes a large reduction in the amount + * of required I/O for journaling, since a lot of padding is eliminated. + * Hence, while POWERSAFE_OVERWRITE is on by default, there is a file-control + * available to turn it off and URI query parameter available to turn it off. */ static int -afpClose(sqlite3_file * id) +unixDeviceCharacteristics(sqlite3_file * pNotUsed) { - int rc = SQLITE_OK; - unixFile *pFile = (unixFile *) id; - assert(id != 0); - afpUnlock(id, NO_LOCK); - if (pFile->pInode && pFile->pInode->nLock) { - /* If there are outstanding locks, do not actually close the file just - * yet because that would clear those locks. Instead, add the file - * descriptor to pInode->aPending. It will be automatically closed when - * the last lock is cleared. - */ - setPendingFd(pFile); - } - releaseInodeInfo(pFile); - sqlite3_free(pFile->lockingContext); - rc = closeUnixFile(id); - return rc; + UNUSED_PARAMETER(pNotUsed); + return SQLITE_OK; } -#endif /* defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE */ -/* - * The code above is the AFP lock implementation. The code is specific - * to MacOSX and does not work on other unix platforms. No alternative - * is available. If you don't compile for a mac, then the "unix-afp" - * VFS is not available. - * - ******************** End of the AFP lock implementation ********************** - *****************************************************************************/ - -/****************************************************************************** - ************************** Begin NFS Locking ******************************* - */ +#define unixShmMap 0 +#define unixShmLock 0 +#define unixShmBarrier 0 +#define unixShmUnmap 0 -#if defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE /* - ** Lower the locking level on file descriptor pFile to eFileLock. eFileLock - ** must be either NO_LOCK or SHARED_LOCK. - ** - ** If the locking level of the file descriptor is already at or below - ** the requested locking level, this routine is a no-op. + * If it is currently memory mapped, unmap file pFd. */ -static int -nfsUnlock(sqlite3_file * id, int eFileLock) +static void +unixUnmapfile(unixFile * pFd) { - return posixUnlock(id, eFileLock, 1); + assert(pFd->nFetchOut == 0); + if (pFd->pMapRegion) { + munmap(pFd->pMapRegion, pFd->mmapSizeActual); + pFd->pMapRegion = 0; + pFd->mmapSize = 0; + pFd->mmapSizeActual = 0; + } } -#endif /* defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE */ /* - * The code above is the NFS lock implementation. The code is specific - * to MacOSX and does not work on other unix platforms. No alternative - * is available. + * Attempt to set the size of the memory mapping maintained by file + * descriptor pFd to nNew bytes. Any existing mapping is discarded. * - ******************** End of the NFS lock implementation ********************** - *****************************************************************************/ - -/****************************************************************************** - *************** Non-locking sqlite3_file methods ***************************** + * If successful, this function sets the following variables: * - * The next division contains implementations for all methods of the - * sqlite3_file object other than the locking methods. The locking - * methods were defined in divisions above (one locking method per - * division). Those methods that are common to all locking modes - * are gather together into this division. - */ - -/* - * Seek to the offset passed as the second argument, then read cnt - * bytes into pBuf. Return the number of bytes actually read. - * - * NB: If you define USE_PREAD or USE_PREAD64, then it might also - * be necessary to define _XOPEN_SOURCE to be 500. This varies from - * one system to another. Since SQLite does not define USE_PREAD - * in any form by default, we will not attempt to define _XOPEN_SOURCE. - * See tickets #2741 and #2681. + * unixFile.pMapRegion + * unixFile.mmapSize + * unixFile.mmapSizeActual * - * To avoid stomping the errno value on a failed read the lastErrno value - * is set before returning. + * If unsuccessful, an error message is logged via sqlite3_log() and + * the three variables above are zeroed. In this case SQLite should + * continue accessing the database using the xRead() and xWrite() + * methods. */ -static int -seekAndRead(unixFile * id, sqlite3_int64 offset, void *pBuf, int cnt) +static void +unixRemapfile(unixFile * pFd, /* File descriptor object */ + i64 nNew /* Required mapping size */ + ) { - int got; - int prior = 0; -#if (!defined(USE_PREAD) && !defined(USE_PREAD64)) - i64 newOffset; -#endif - TIMER_START; - assert(cnt == (cnt & 0x1ffff)); - assert(id->h > 2); - do { -#if defined(USE_PREAD) - got = osPread(id->h, pBuf, cnt, offset); - SimulateIOError(got = -1); -#elif defined(USE_PREAD64) - got = osPread64(id->h, pBuf, cnt, offset); - SimulateIOError(got = -1); -#else - newOffset = lseek(id->h, offset, SEEK_SET); - SimulateIOError(newOffset = -1); - if (newOffset < 0) { - storeLastErrno((unixFile *) id, errno); - return -1; - } - got = osRead(id->h, pBuf, cnt); -#endif - if (got == cnt) - break; - if (got < 0) { - if (errno == EINTR) { - got = 1; - continue; + const char *zErr = "mmap"; + int h = pFd->h; /* File descriptor open on db file */ + u8 *pOrig = (u8 *) pFd->pMapRegion; /* Pointer to current file mapping */ + i64 nOrig = pFd->mmapSizeActual; /* Size of pOrig region in bytes */ + u8 *pNew = 0; /* Location of new mapping */ + int flags = PROT_READ; /* Flags to pass to mmap() */ + + assert(pFd->nFetchOut == 0); + assert(nNew > pFd->mmapSize); + assert(nNew <= pFd->mmapSizeMax); + assert(nNew > 0); + assert(pFd->mmapSizeActual >= pFd->mmapSize); + assert(MAP_FAILED != 0); + + if (pOrig) { + i64 nReuse = pFd->mmapSize; + u8 *pReq = &pOrig[nReuse]; + + /* Unmap any pages of the existing mapping that cannot be reused. */ + if (nReuse != nOrig) + munmap(pReq, nOrig - nReuse); + #ifndef __APPLE__ + pNew = mremap(pOrig, nReuse, nNew, MREMAP_MAYMOVE); + zErr = "mremap"; + #else + pNew = mmap(pReq, nNew - nReuse, flags, MAP_SHARED, h, nReuse); + if (pNew != MAP_FAILED) { + if (pNew != pReq) { + munmap(pNew, nNew - nReuse); + pNew = NULL; + } else { + pNew = pOrig; } - prior = 0; - storeLastErrno((unixFile *) id, errno); - break; - } else if (got > 0) { - cnt -= got; - offset += got; - prior += got; - pBuf = (void *)(got + (char *)pBuf); } - } while (got > 0); - TIMER_END; - return got + prior; + #endif + + /* The attempt to extend the existing mapping failed. Free it. */ + if (pNew == MAP_FAILED || pNew == NULL) + munmap(pOrig, nReuse); + } + + /* If pNew is still NULL, try to create an entirely new mapping. */ + if (pNew == NULL) + pNew = mmap(0, nNew, flags, MAP_SHARED, h, 0); + + if (pNew == MAP_FAILED) { + pNew = 0; + nNew = 0; + unixLogError(SQLITE_OK, zErr, pFd->zPath); + + /* If the mmap() above failed, assume that all subsequent mmap() calls + * will probably fail too. Fall back to using xRead/xWrite exclusively + * in this case. + */ + pFd->mmapSizeMax = 0; + } + pFd->pMapRegion = (void *)pNew; + pFd->mmapSize = pFd->mmapSizeActual = nNew; } /* - * Read data from a file into a buffer. Return SQLITE_OK if all - * bytes were read successfully and SQLITE_IOERR if anything goes - * wrong. + * Memory map or remap the file opened by file-descriptor pFd (if the file + * is already mapped, the existing mapping is replaced by the new). Or, if + * there already exists a mapping for this file, and there are still + * outstanding xFetch() references to it, this function is a no-op. + * + * If parameter nByte is non-negative, then it is the requested size of + * the mapping to create. Otherwise, if nByte is less than zero, then the + * requested size is the size of the file on disk. The actual size of the + * created mapping is either the requested size or the value configured + * using SQLITE_FCNTL_MMAP_LIMIT, whichever is smaller. + * + * SQLITE_OK is returned if no error occurs (even if the mapping is not + * recreated as a result of outstanding references) or an SQLite error + * code otherwise. */ static int -unixRead(sqlite3_file * id, void *pBuf, int amt, sqlite3_int64 offset) +unixMapfile(unixFile * pFd, i64 nMap) { - unixFile *pFile = (unixFile *) id; - int got; - assert(id); - assert(offset >= 0); - assert(amt > 0); + assert(nMap >= 0 || pFd->nFetchOut == 0); + assert(nMap > 0 || (pFd->mmapSize == 0 && pFd->pMapRegion == 0)); + if (pFd->nFetchOut > 0) + return SQLITE_OK; -#if SQLITE_MAX_MMAP_SIZE>0 - /* Deal with as much of this read request as possible by transfering - * data from the memory mapping using memcpy(). - */ - if (offset < pFile->mmapSize) { - if (offset + amt <= pFile->mmapSize) { - memcpy(pBuf, &((u8 *) (pFile->pMapRegion))[offset], - amt); - return SQLITE_OK; - } else { - int nCopy = pFile->mmapSize - offset; - memcpy(pBuf, &((u8 *) (pFile->pMapRegion))[offset], - nCopy); - pBuf = &((u8 *) pBuf)[nCopy]; - amt -= nCopy; - offset += nCopy; - } + if (nMap < 0) { + struct stat statbuf; /* Low-level file information */ + if (fstat(pFd->h, &statbuf)) + return SQLITE_IOERR_FSTAT; + nMap = statbuf.st_size; + } + if (nMap > pFd->mmapSizeMax) { + nMap = pFd->mmapSizeMax; } -#endif - got = seekAndRead(pFile, offset, pBuf, amt); - if (got == amt) { - return SQLITE_OK; - } else if (got < 0) { - /* lastErrno set by seekAndRead */ - return SQLITE_IOERR_READ; - } else { - storeLastErrno(pFile, 0); /* not a system error */ - /* Unread parts of the buffer must be zero-filled */ - memset(&((char *)pBuf)[got], 0, amt - got); - return SQLITE_IOERR_SHORT_READ; + assert(nMap > 0 || (pFd->mmapSize == 0 && pFd->pMapRegion == 0)); + if (nMap != pFd->mmapSize) { + unixRemapfile(pFd, nMap); } + + return SQLITE_OK; } /* - * Attempt to seek the file-descriptor passed as the first argument to - * absolute offset iOff, then attempt to write nBuf bytes of data from - * pBuf to it. If an error occurs, return -1 and set *piErrno. Otherwise, - * return the actual number of bytes written (which may be less than - * nBuf). + * If possible, return a pointer to a mapping of file fd starting at offset + * iOff. The mapping must be valid for at least nAmt bytes. + * + * If such a pointer can be obtained, store it in *pp and return SQLITE_OK. + * Or, if one cannot but no error occurs, set *pp to 0 and return SQLITE_OK. + * Finally, if an error does occur, return an SQLite error code. The final + * value of *pp is undefined in this case. + * + * If this function does return a pointer, the caller must eventually + * release the reference by calling unixUnfetch(). */ static int -seekAndWriteFd(int fd, /* File descriptor to write to */ - i64 iOff, /* File offset to begin writing at */ - const void *pBuf, /* Copy data from this buffer to the file */ - int nBuf, /* Size of buffer pBuf in bytes */ - int *piErrno /* OUT: Error number if error occurs */ - ) +unixFetch(sqlite3_file * fd MAYBE_UNUSED, + i64 iOff MAYBE_UNUSED, + int nAmt MAYBE_UNUSED, void **pp) { - int rc = 0; /* Value returned by system call */ - - assert(nBuf == (nBuf & 0x1ffff)); - assert(fd > 2); - assert(piErrno != 0); - nBuf &= 0x1ffff; - TIMER_START; +#if SQLITE_MAX_MMAP_SIZE>0 + unixFile *pFd = (unixFile *) fd; /* The underlying database file */ +#endif + *pp = 0; -#if defined(USE_PREAD) - do { - rc = (int)osPwrite(fd, pBuf, nBuf, iOff); - } while (rc < 0 && errno == EINTR); -#elif defined(USE_PREAD64) - do { - rc = (int)osPwrite64(fd, pBuf, nBuf, iOff); - } while (rc < 0 && errno == EINTR); -#else - do { - i64 iSeek = lseek(fd, iOff, SEEK_SET); - SimulateIOError(iSeek = -1); - if (iSeek < 0) { - rc = -1; - break; +#if SQLITE_MAX_MMAP_SIZE>0 + if (pFd->mmapSizeMax > 0) { + if (pFd->pMapRegion == 0) { + int rc = unixMapfile(pFd, -1); + if (rc != SQLITE_OK) + return rc; } - rc = osWrite(fd, pBuf, nBuf); - } while (rc < 0 && errno == EINTR); + if (pFd->mmapSize >= iOff + nAmt) { + *pp = &((u8 *) pFd->pMapRegion)[iOff]; + pFd->nFetchOut++; + } + } #endif - - TIMER_END; - - if (rc < 0) - *piErrno = errno; - return rc; + return SQLITE_OK; } /* - * Seek to the offset in id->offset then read cnt bytes into pBuf. - * Return the number of bytes actually read. Update the offset. + * If the third argument is non-NULL, then this function releases a + * reference obtained by an earlier call to unixFetch(). The second + * argument passed to this function must be the same as the corresponding + * argument that was passed to the unixFetch() invocation. * - * To avoid stomping the errno value on a failed write the lastErrno value - * is set before returning. + * Or, if the third argument is NULL, then this function is being called + * to inform the VFS layer that, according to POSIX, any existing mapping + * may now be invalid and should be unmapped. */ static int -seekAndWrite(unixFile * id, i64 offset, const void *pBuf, int cnt) +unixUnfetch(sqlite3_file * fd, i64 iOff, void *p) { - return seekAndWriteFd(id->h, offset, pBuf, cnt, &id->lastErrno); -} + unixFile *pFd = (unixFile *) fd; /* The underlying database file */ + UNUSED_PARAMETER(iOff); -/* - * Write data from a buffer into a file. Return SQLITE_OK on success - * or some other error code on failure. - */ -static int -unixWrite(sqlite3_file * id, const void *pBuf, int amt, sqlite3_int64 offset) -{ - unixFile *pFile = (unixFile *) id; - int wrote = 0; - assert(id); - assert(amt > 0); - -#ifdef SQLITE_DEBUG - /* If we are doing a normal write to a database file, - * then record the fact that the database - * has changed. If the transaction counter is modified, record that - * fact too. - */ - if (pFile->inNormalWrite) { - pFile->dbUpdate = 1; /* The database has been modified */ - if (offset <= 24 && offset + amt >= 27) { - int rc; - char oldCntr[4]; - SimulateIOErrorBenign(1); - rc = seekAndRead(pFile, 24, oldCntr, 4); - SimulateIOErrorBenign(0); - if (rc != 4 - || memcmp(oldCntr, &((char *)pBuf)[24 - offset], - 4) != 0) { - pFile->transCntrChng = 1; /* The transaction counter has changed */ - } - } - } -#endif - -#if defined(SQLITE_MMAP_READWRITE) && SQLITE_MAX_MMAP_SIZE>0 - /* Deal with as much of this write request as possible by transfering - * data from the memory mapping using memcpy(). - */ - if (offset < pFile->mmapSize) { - if (offset + amt <= pFile->mmapSize) { - memcpy(&((u8 *) (pFile->pMapRegion))[offset], pBuf, - amt); - return SQLITE_OK; - } else { - int nCopy = pFile->mmapSize - offset; - memcpy(&((u8 *) (pFile->pMapRegion))[offset], pBuf, - nCopy); - pBuf = &((u8 *) pBuf)[nCopy]; - amt -= nCopy; - offset += nCopy; - } - } -#endif - - while ((wrote = seekAndWrite(pFile, offset, pBuf, amt)) < amt - && wrote > 0) { - amt -= wrote; - offset += wrote; - pBuf = &((char *)pBuf)[wrote]; - } - SimulateIOError((wrote = (-1), amt = 1)); - SimulateDiskfullError((wrote = 0, amt = 1)); - - if (amt > wrote) { - if (wrote < 0 && pFile->lastErrno != ENOSPC) { - /* lastErrno set by seekAndWrite */ - return SQLITE_IOERR_WRITE; - } else { - storeLastErrno(pFile, 0); /* not a system error */ - return SQLITE_FULL; - } - } - - return SQLITE_OK; -} - -#ifdef SQLITE_TEST -/* - * Count the number of fullsyncs and normal syncs. This is used to test - * that syncs and fullsyncs are occurring at the right times. - */ -int sqlite3_sync_count = 0; -int sqlite3_fullsync_count = 0; -#endif - -/* - * We do not trust systems to provide a working fdatasync(). Some do. - * Others do no. To be safe, we will stick with the (slightly slower) - * fsync(). If you know that your system does support fdatasync() correctly, - * then simply compile with -Dfdatasync=fdatasync or -DHAVE_FDATASYNC - */ -#if !defined(fdatasync) && !HAVE_FDATASYNC -#define fdatasync fsync -#endif - -/* - * The fsync() system call does not work as advertised on many - * unix systems. The following procedure is an attempt to make - * it work better. - * - * The SQLITE_NO_SYNC macro disables all fsync()s. This is useful - * for testing when we want to run through the test suite quickly. - * You are strongly advised *not* to deploy with SQLITE_NO_SYNC - * enabled, however, since with SQLITE_NO_SYNC enabled, an OS crash - * or power failure will likely corrupt the database file. - * - * SQLite sets the dataOnly flag if the size of the file is unchanged. - * The idea behind dataOnly is that it should only write the file content - * to disk, not the inode. We only set dataOnly if the file size is - * unchanged since the file size is part of the inode. However, - * Ted Ts'o tells us that fdatasync() will also write the inode if the - * file size has changed. The only real difference between fdatasync() - * and fsync(), Ted tells us, is that fdatasync() will not flush the - * inode if the mtime or owner or other inode attributes have changed. - * We only care about the file size, not the other file attributes, so - * as far as SQLite is concerned, an fdatasync() is always adequate. - * So, we always use fdatasync() if it is available, regardless of - * the value of the dataOnly flag. - */ -static int -full_fsync(int fd, int fullSync, int dataOnly) -{ - int rc; - - /* The following "ifdef/elif/else/" block has the same structure as - * the one below. It is replicated here solely to avoid cluttering - * up the real code with the UNUSED_PARAMETER() macros. - */ -#ifdef SQLITE_NO_SYNC - UNUSED_PARAMETER(fd); - UNUSED_PARAMETER(fullSync); - UNUSED_PARAMETER(dataOnly); -#else - UNUSED_PARAMETER(fullSync); - UNUSED_PARAMETER(dataOnly); -#endif - - /* Record the number of times that we do a normal fsync() and - * FULLSYNC. This is used during testing to verify that this procedure - * gets called with the correct arguments. - */ -#ifdef SQLITE_TEST - if (fullSync) - sqlite3_fullsync_count++; - sqlite3_sync_count++; -#endif - - /* If we compiled with the SQLITE_NO_SYNC flag, then syncing is a - * no-op. But go ahead and call fstat() to validate the file - * descriptor as we need a method to provoke a failure during - * coverate testing. - */ -#ifdef SQLITE_NO_SYNC - { - struct stat buf; - rc = osFstat(fd, &buf); - } -#elif defined(__APPLE__) - /* fdatasync() on HFS+ doesn't yet flush the file size if it changed correctly - * so currently we default to the macro that redefines fdatasync to fsync - */ - rc = fsync(fd); -#else - rc = fdatasync(fd); -#endif /* ifdef SQLITE_NO_SYNC */ - - return rc; -} - -/* - * Open a file descriptor to the directory containing file zFilename. - * If successful, *pFd is set to the opened file descriptor and - * SQLITE_OK is returned. If an error occurs, either SQLITE_NOMEM - * or SQLITE_CANTOPEN is returned and *pFd is set to an undefined - * value. - * - * The directory file descriptor is used for only one thing - to - * fsync() a directory to make sure file creation and deletion events - * are flushed to disk. Such fsyncs are not needed on newer - * journaling filesystems, but are required on older filesystems. - * - * This routine can be overridden using the xSetSysCall interface. - * The ability to override this routine was added in support of the - * chromium sandbox. Opening a directory is a security risk (we are - * told) so making it overrideable allows the chromium sandbox to - * replace this routine with a harmless no-op. To make this routine - * a no-op, replace it with a stub that returns SQLITE_OK but leaves - * *pFd set to a negative number. - * - * If SQLITE_OK is returned, the caller is responsible for closing - * the file descriptor *pFd using close(). - */ -static int -openDirectory(const char *zFilename, int *pFd) -{ - int ii; - int fd; - char zDirname[MAX_PATHNAME + 1]; - - sqlite3_snprintf(MAX_PATHNAME, zDirname, "%s", zFilename); - for (ii = (int)strlen(zDirname); ii > 0 && zDirname[ii] != '/'; ii--) ; - if (ii > 0) { - zDirname[ii] = '\0'; - } else { - if (zDirname[0] != '/') - zDirname[0] = '.'; - zDirname[1] = 0; - } - fd = robust_open(zDirname, O_RDONLY | O_BINARY, 0); - - *pFd = fd; - if (fd >= 0) - return SQLITE_OK; - return unixLogError(SQLITE_CANTOPEN_BKPT, "openDirectory", zDirname); -} - -/* - * Make sure all writes to a particular file are committed to disk. - * - * If dataOnly==0 then both the file itself and its metadata (file - * size, access time, etc) are synced. If dataOnly!=0 then only the - * file data is synced. - * - * Under Unix, also make sure that the directory entry for the file - * has been created by fsync-ing the directory that contains the file. - * If we do not do this and we encounter a power failure, the directory - * entry for the journal might not exist after we reboot. The next - * SQLite to access the file will not know that the journal exists (because - * the directory entry for the journal was never created) and the transaction - * will not roll back - possibly leading to database corruption. - */ -static int -unixSync(sqlite3_file * id, int flags) -{ - int rc; - unixFile *pFile = (unixFile *) id; - - int isDataOnly = (flags & SQLITE_SYNC_DATAONLY); - int isFullsync = (flags & 0x0F) == SQLITE_SYNC_FULL; - - /* Check that one of SQLITE_SYNC_NORMAL or FULL was passed */ - assert((flags & 0x0F) == SQLITE_SYNC_NORMAL - || (flags & 0x0F) == SQLITE_SYNC_FULL); - - /* Unix cannot, but some systems may return SQLITE_FULL from here. This - * line is to test that doing so does not cause any problems. - */ - SimulateDiskfullError(return SQLITE_FULL); - - assert(pFile); - rc = full_fsync(pFile->h, isFullsync, isDataOnly); - SimulateIOError(rc = 1); - if (rc) { - storeLastErrno(pFile, errno); - return unixLogError(SQLITE_IOERR_FSYNC, "full_fsync", - pFile->zPath); - } - - return rc; -} - -/* - * Truncate an open file to a specified size - */ -static int -unixTruncate(sqlite3_file * id, i64 nByte) -{ - unixFile *pFile = (unixFile *) id; - int rc; - assert(pFile); - SimulateIOError(return SQLITE_IOERR_TRUNCATE); - - /* If the user has configured a chunk-size for this file, truncate the - * file so that it consists of an integer number of chunks (i.e. the - * actual file size after the operation may be larger than the requested - * size). - */ - if (pFile->szChunk > 0) { - nByte = - ((nByte + pFile->szChunk - - 1) / pFile->szChunk) * pFile->szChunk; - } - - rc = robust_ftruncate(pFile->h, nByte); - if (rc) { - storeLastErrno(pFile, errno); - return unixLogError(SQLITE_IOERR_TRUNCATE, "ftruncate", - pFile->zPath); - } else { -#ifdef SQLITE_DEBUG - /* If we are doing a normal write to a database file (as opposed to - * doing a hot-journal rollback or a write to some file other than a - * normal database file) and we truncate the file to zero length, - * that effectively updates the change counter. - */ - if (pFile->inNormalWrite && nByte == 0) { - pFile->transCntrChng = 1; - } -#endif - -#if SQLITE_MAX_MMAP_SIZE>0 - /* If the file was just truncated to a size smaller than the currently - * mapped region, reduce the effective mapping size as well. SQLite will - * use read() and write() to access data beyond this point from now on. - */ - if (nByte < pFile->mmapSize) { - pFile->mmapSize = nByte; - } -#endif - - return SQLITE_OK; - } -} - -/* - * Determine the current size of a file in bytes - */ -static int -unixFileSize(sqlite3_file * id, i64 * pSize) -{ - int rc; - struct stat buf; - assert(id); - rc = osFstat(((unixFile *) id)->h, &buf); - SimulateIOError(rc = 1); - if (rc != 0) { - storeLastErrno((unixFile *) id, errno); - return SQLITE_IOERR_FSTAT; - } - *pSize = buf.st_size; - - /* When opening a zero-size database, the findInodeInfo() procedure - * writes a single byte into that file in order to work around a bug - * in the OS-X msdos filesystem. In order to avoid problems with upper - * layers, we need to report this file size as zero even though it is - * really 1. Ticket #3260. - */ - if (*pSize == 1) - *pSize = 0; - - return SQLITE_OK; -} - -#if SQLITE_ENABLE_LOCKING_STYLE && defined(__APPLE__) -/* - * Handler for proxy-locking file-control verbs. Defined below in the - * proxying locking division. - */ -static int proxyFileControl(sqlite3_file *, int, void *); -#endif - -/* - * This function is called to handle the SQLITE_FCNTL_SIZE_HINT - * file-control operation. Enlarge the database to nBytes in size - * (rounded up to the next chunk-size). If the database is already - * nBytes or larger, this routine is a no-op. - */ -static int -fcntlSizeHint(unixFile * pFile, i64 nByte) -{ - if (pFile->szChunk > 0) { - i64 nSize; /* Required file size */ - struct stat buf; /* Used to hold return values of fstat() */ - - if (osFstat(pFile->h, &buf)) { - return SQLITE_IOERR_FSTAT; - } - - nSize = - ((nByte + pFile->szChunk - - 1) / pFile->szChunk) * pFile->szChunk; - if (nSize > (i64) buf.st_size) { - -#if defined(HAVE_POSIX_FALLOCATE) && HAVE_POSIX_FALLOCATE - /* The code below is handling the return value of osFallocate() - * correctly. posix_fallocate() is defined to "returns zero on success, - * or an error number on failure". See the manpage for details. - */ - int err; - do { - err = - osFallocate(pFile->h, buf.st_size, - nSize - buf.st_size); - } while (err == EINTR); - if (err) - return SQLITE_IOERR_WRITE; -#else - /* If the OS does not have posix_fallocate(), fake it. Write a - * single byte to the last byte in each block that falls entirely - * within the extended region. Then, if required, a single byte - * at offset (nSize-1), to set the size of the file correctly. - * This is a similar technique to that used by glibc on systems - * that do not have a real fallocate() call. - */ - int nBlk = buf.st_blksize; /* File-system block size */ - int nWrite = 0; /* Number of bytes written by seekAndWrite */ - i64 iWrite; /* Next offset to write to */ - - iWrite = (buf.st_size / nBlk) * nBlk + nBlk - 1; - assert(iWrite >= buf.st_size); - assert(((iWrite + 1) % nBlk) == 0); - for ( /*no-op */ ; iWrite < nSize + nBlk - 1; - iWrite += nBlk) { - if (iWrite >= nSize) - iWrite = nSize - 1; - nWrite = seekAndWrite(pFile, iWrite, "", 1); - if (nWrite != 1) - return SQLITE_IOERR_WRITE; - } -#endif - } - } -#if SQLITE_MAX_MMAP_SIZE>0 - if (pFile->mmapSizeMax > 0 && nByte > pFile->mmapSize) { - int rc; - if (pFile->szChunk <= 0) { - if (robust_ftruncate(pFile->h, nByte)) { - storeLastErrno(pFile, errno); - return unixLogError(SQLITE_IOERR_TRUNCATE, - "ftruncate", pFile->zPath); - } - } - - rc = unixMapfile(pFile, nByte); - return rc; - } -#endif - - return SQLITE_OK; -} - -/* Forward declaration */ -static int unixGetTempname(int nBuf, char *zBuf); - -/* - * Information and control of an open file handle. - */ -static int -unixFileControl(sqlite3_file * id, int op, void *pArg) -{ - unixFile *pFile = (unixFile *) id; - switch (op) { - case SQLITE_FCNTL_LOCKSTATE:{ - *(int *)pArg = pFile->eFileLock; - return SQLITE_OK; - } - case SQLITE_FCNTL_LAST_ERRNO:{ - *(int *)pArg = pFile->lastErrno; - return SQLITE_OK; - } - case SQLITE_FCNTL_CHUNK_SIZE:{ - pFile->szChunk = *(int *)pArg; - return SQLITE_OK; - } - case SQLITE_FCNTL_SIZE_HINT:{ - int rc; - SimulateIOErrorBenign(1); - rc = fcntlSizeHint(pFile, *(i64 *) pArg); - SimulateIOErrorBenign(0); - return rc; - } - case SQLITE_FCNTL_VFSNAME:{ - *(char **)pArg = - sqlite3_mprintf("%s", pFile->pVfs->zName); - return SQLITE_OK; - } - case SQLITE_FCNTL_TEMPFILENAME:{ - char *zTFile = - sqlite3_malloc64(pFile->pVfs->mxPathname); - if (zTFile) { - unixGetTempname(pFile->pVfs->mxPathname, - zTFile); - *(char **)pArg = zTFile; - } - return SQLITE_OK; - } - case SQLITE_FCNTL_HAS_MOVED:{ - *(int *)pArg = fileHasMoved(pFile); - return SQLITE_OK; - } -#if SQLITE_MAX_MMAP_SIZE>0 - case SQLITE_FCNTL_MMAP_SIZE:{ - i64 newLimit = *(i64 *) pArg; - int rc = SQLITE_OK; - if (newLimit > sqlite3GlobalConfig.mxMmap) { - newLimit = sqlite3GlobalConfig.mxMmap; - } - *(i64 *) pArg = pFile->mmapSizeMax; - if (newLimit >= 0 && newLimit != pFile->mmapSizeMax - && pFile->nFetchOut == 0) { - pFile->mmapSizeMax = newLimit; - if (pFile->mmapSize > 0) { - unixUnmapfile(pFile); - rc = unixMapfile(pFile, -1); - } - } - return rc; - } -#endif -#ifdef SQLITE_DEBUG - /* The pager calls this method to signal that it has done - * a rollback and that the database is therefore unchanged and - * it hence it is OK for the transaction change counter to be - * unchanged. - */ - case SQLITE_FCNTL_DB_UNCHANGED:{ - ((unixFile *) id)->dbUpdate = 0; - return SQLITE_OK; - } -#endif -#if SQLITE_ENABLE_LOCKING_STYLE && defined(__APPLE__) - case SQLITE_FCNTL_SET_LOCKPROXYFILE: - case SQLITE_FCNTL_GET_LOCKPROXYFILE:{ - return proxyFileControl(id, op, pArg); - } -#endif /* SQLITE_ENABLE_LOCKING_STYLE && defined(__APPLE__) */ - } - return SQLITE_NOTFOUND; -} - -/* - * Return the sector size in bytes of the underlying block device for - * the specified file. This is almost always 512 bytes, but may be - * larger for some devices. - * - * SQLite code assumes this function cannot fail. It also assumes that - * if two files are created in the same file-system directory (i.e. - * a database and its journal file) that the sector size will be the - * same for both. - */ -static int -unixSectorSize(sqlite3_file * NotUsed) -{ - UNUSED_PARAMETER(NotUsed); - return SQLITE_DEFAULT_SECTOR_SIZE; -} - -/* - * Return the device characteristics for the file. - * - * This VFS is set up to return SQLITE_IOCAP_POWERSAFE_OVERWRITE by default. - * However, that choice is controversial since technically the underlying - * file system does not always provide powersafe overwrites. (In other - * words, after a power-loss event, parts of the file that were never - * written might end up being altered.) However, non-PSOW behavior is very, - * very rare. And asserting PSOW makes a large reduction in the amount - * of required I/O for journaling, since a lot of padding is eliminated. - * Hence, while POWERSAFE_OVERWRITE is on by default, there is a file-control - * available to turn it off and URI query parameter available to turn it off. - */ -static int -unixDeviceCharacteristics(sqlite3_file * pNotUsed) -{ - UNUSED_PARAMETER(pNotUsed); - return SQLITE_OK; -} - -#if SQLITE_MAX_MMAP_SIZE>0 - -/* - * Return the system page size. - * - * This function should not be called directly by other code in this file. - * Instead, it should be called via macro osGetpagesize(). - */ -static int -unixGetpagesize(void) -{ - return (int)sysconf(_SC_PAGESIZE); -} - -#endif /* SQLITE_MAX_MMAP_SIZE>0 */ - -#define unixShmMap 0 -#define unixShmLock 0 -#define unixShmBarrier 0 -#define unixShmUnmap 0 - -#if SQLITE_MAX_MMAP_SIZE>0 -/* - * If it is currently memory mapped, unmap file pFd. - */ -static void -unixUnmapfile(unixFile * pFd) -{ - assert(pFd->nFetchOut == 0); - if (pFd->pMapRegion) { - osMunmap(pFd->pMapRegion, pFd->mmapSizeActual); - pFd->pMapRegion = 0; - pFd->mmapSize = 0; - pFd->mmapSizeActual = 0; - } -} - -/* - * Attempt to set the size of the memory mapping maintained by file - * descriptor pFd to nNew bytes. Any existing mapping is discarded. - * - * If successful, this function sets the following variables: - * - * unixFile.pMapRegion - * unixFile.mmapSize - * unixFile.mmapSizeActual - * - * If unsuccessful, an error message is logged via sqlite3_log() and - * the three variables above are zeroed. In this case SQLite should - * continue accessing the database using the xRead() and xWrite() - * methods. - */ -static void -unixRemapfile(unixFile * pFd, /* File descriptor object */ - i64 nNew /* Required mapping size */ - ) -{ - const char *zErr = "mmap"; - int h = pFd->h; /* File descriptor open on db file */ - u8 *pOrig = (u8 *) pFd->pMapRegion; /* Pointer to current file mapping */ - i64 nOrig = pFd->mmapSizeActual; /* Size of pOrig region in bytes */ - u8 *pNew = 0; /* Location of new mapping */ - int flags = PROT_READ; /* Flags to pass to mmap() */ - - assert(pFd->nFetchOut == 0); - assert(nNew > pFd->mmapSize); - assert(nNew <= pFd->mmapSizeMax); - assert(nNew > 0); - assert(pFd->mmapSizeActual >= pFd->mmapSize); - assert(MAP_FAILED != 0); - -#ifdef SQLITE_MMAP_READWRITE - if ((pFd->ctrlFlags & UNIXFILE_RDONLY) == 0) - flags |= PROT_WRITE; -#endif - - if (pOrig) { -#if HAVE_MREMAP - i64 nReuse = pFd->mmapSize; -#else - const int szSyspage = osGetpagesize(); - i64 nReuse = (pFd->mmapSize & ~(szSyspage - 1)); -#endif - u8 *pReq = &pOrig[nReuse]; - - /* Unmap any pages of the existing mapping that cannot be reused. */ - if (nReuse != nOrig) { - osMunmap(pReq, nOrig - nReuse); - } -#if HAVE_MREMAP - pNew = osMremap(pOrig, nReuse, nNew, MREMAP_MAYMOVE); - zErr = "mremap"; -#else - pNew = - osMmap(pReq, nNew - nReuse, flags, MAP_SHARED, h, nReuse); - if (pNew != MAP_FAILED) { - if (pNew != pReq) { - osMunmap(pNew, nNew - nReuse); - pNew = 0; - } else { - pNew = pOrig; - } - } -#endif - - /* The attempt to extend the existing mapping failed. Free it. */ - if (pNew == MAP_FAILED || pNew == 0) { - osMunmap(pOrig, nReuse); - } - } - - /* If pNew is still NULL, try to create an entirely new mapping. */ - if (pNew == 0) { - pNew = osMmap(0, nNew, flags, MAP_SHARED, h, 0); - } - - if (pNew == MAP_FAILED) { - pNew = 0; - nNew = 0; - unixLogError(SQLITE_OK, zErr, pFd->zPath); - - /* If the mmap() above failed, assume that all subsequent mmap() calls - * will probably fail too. Fall back to using xRead/xWrite exclusively - * in this case. - */ - pFd->mmapSizeMax = 0; - } - pFd->pMapRegion = (void *)pNew; - pFd->mmapSize = pFd->mmapSizeActual = nNew; -} - -/* - * Memory map or remap the file opened by file-descriptor pFd (if the file - * is already mapped, the existing mapping is replaced by the new). Or, if - * there already exists a mapping for this file, and there are still - * outstanding xFetch() references to it, this function is a no-op. - * - * If parameter nByte is non-negative, then it is the requested size of - * the mapping to create. Otherwise, if nByte is less than zero, then the - * requested size is the size of the file on disk. The actual size of the - * created mapping is either the requested size or the value configured - * using SQLITE_FCNTL_MMAP_LIMIT, whichever is smaller. - * - * SQLITE_OK is returned if no error occurs (even if the mapping is not - * recreated as a result of outstanding references) or an SQLite error - * code otherwise. - */ -static int -unixMapfile(unixFile * pFd, i64 nMap) -{ - assert(nMap >= 0 || pFd->nFetchOut == 0); - assert(nMap > 0 || (pFd->mmapSize == 0 && pFd->pMapRegion == 0)); - if (pFd->nFetchOut > 0) - return SQLITE_OK; - - if (nMap < 0) { - struct stat statbuf; /* Low-level file information */ - if (osFstat(pFd->h, &statbuf)) { - return SQLITE_IOERR_FSTAT; - } - nMap = statbuf.st_size; - } - if (nMap > pFd->mmapSizeMax) { - nMap = pFd->mmapSizeMax; - } - - assert(nMap > 0 || (pFd->mmapSize == 0 && pFd->pMapRegion == 0)); - if (nMap != pFd->mmapSize) { - unixRemapfile(pFd, nMap); - } - - return SQLITE_OK; -} -#endif /* SQLITE_MAX_MMAP_SIZE>0 */ - -/* - * If possible, return a pointer to a mapping of file fd starting at offset - * iOff. The mapping must be valid for at least nAmt bytes. - * - * If such a pointer can be obtained, store it in *pp and return SQLITE_OK. - * Or, if one cannot but no error occurs, set *pp to 0 and return SQLITE_OK. - * Finally, if an error does occur, return an SQLite error code. The final - * value of *pp is undefined in this case. - * - * If this function does return a pointer, the caller must eventually - * release the reference by calling unixUnfetch(). - */ -static int -unixFetch(sqlite3_file * fd MAYBE_UNUSED, - i64 iOff MAYBE_UNUSED, - int nAmt MAYBE_UNUSED, void **pp) -{ -#if SQLITE_MAX_MMAP_SIZE>0 - unixFile *pFd = (unixFile *) fd; /* The underlying database file */ -#endif - *pp = 0; - -#if SQLITE_MAX_MMAP_SIZE>0 - if (pFd->mmapSizeMax > 0) { - if (pFd->pMapRegion == 0) { - int rc = unixMapfile(pFd, -1); - if (rc != SQLITE_OK) - return rc; - } - if (pFd->mmapSize >= iOff + nAmt) { - *pp = &((u8 *) pFd->pMapRegion)[iOff]; - pFd->nFetchOut++; - } - } -#endif - return SQLITE_OK; -} - -/* - * If the third argument is non-NULL, then this function releases a - * reference obtained by an earlier call to unixFetch(). The second - * argument passed to this function must be the same as the corresponding - * argument that was passed to the unixFetch() invocation. - * - * Or, if the third argument is NULL, then this function is being called - * to inform the VFS layer that, according to POSIX, any existing mapping - * may now be invalid and should be unmapped. - */ -static int -unixUnfetch(sqlite3_file * fd, i64 iOff, void *p) -{ -#if SQLITE_MAX_MMAP_SIZE>0 - unixFile *pFd = (unixFile *) fd; /* The underlying database file */ - UNUSED_PARAMETER(iOff); - - /* If p==0 (unmap the entire file) then there must be no outstanding - * xFetch references. Or, if p!=0 (meaning it is an xFetch reference), - * then there must be at least one outstanding. - */ - assert((p == 0) == (pFd->nFetchOut == 0)); - - /* If p!=0, it must match the iOff value. */ - assert(p == 0 || p == &((u8 *) pFd->pMapRegion)[iOff]); - - if (p) { - pFd->nFetchOut--; - } else { - unixUnmapfile(pFd); - } - - assert(pFd->nFetchOut >= 0); -#else - UNUSED_PARAMETER(fd); - UNUSED_PARAMETER(p); - UNUSED_PARAMETER(iOff); -#endif - return SQLITE_OK; -} - -/* - * Here ends the implementation of all sqlite3_file methods. - * - ********************* End sqlite3_file Methods ******************************* - *****************************************************************************/ - -/* - * This division contains definitions of sqlite3_io_methods objects that - * implement various file locking strategies. It also contains definitions - * of "finder" functions. A finder-function is used to locate the appropriate - * sqlite3_io_methods object for a particular database file. The pAppData - * field of the sqlite3_vfs VFS objects are initialized to be pointers to - * the correct finder-function for that VFS. - * - * Most finder functions return a pointer to a fixed sqlite3_io_methods - * object. The only interesting finder-function is autolockIoFinder, which - * looks at the filesystem type and tries to guess the best locking - * strategy from that. - * - * For finder-function F, two objects are created: - * - * (1) The real finder-function named "FImpt()". - * - * (2) A constant pointer to this function named just "F". - * - * - * A pointer to the F pointer is used as the pAppData value for VFS - * objects. We have to do this instead of letting pAppData point - * directly at the finder-function since C90 rules prevent a void* - * from be cast into a function pointer. - * - * - * Each instance of this macro generates two objects: - * - * * A constant sqlite3_io_methods object call METHOD that has locking - * methods CLOSE, LOCK, UNLOCK, CKRESLOCK. - * - * * An I/O method finder function called FINDER that returns a pointer - * to the METHOD object in the previous bullet. - */ -#define IOMETHODS(FINDER,METHOD,VERSION,CLOSE,LOCK,UNLOCK,CKLOCK,SHMMAP) \ -static const sqlite3_io_methods METHOD = { \ - VERSION, /* iVersion */ \ - CLOSE, /* xClose */ \ - unixRead, /* xRead */ \ - unixWrite, /* xWrite */ \ - unixTruncate, /* xTruncate */ \ - unixSync, /* xSync */ \ - unixFileSize, /* xFileSize */ \ - LOCK, /* xLock */ \ - UNLOCK, /* xUnlock */ \ - CKLOCK, /* xCheckReservedLock */ \ - unixFileControl, /* xFileControl */ \ - unixSectorSize, /* xSectorSize */ \ - unixDeviceCharacteristics, /* xDeviceCapabilities */ \ - SHMMAP, /* xShmMap */ \ - unixShmLock, /* xShmLock */ \ - unixShmBarrier, /* xShmBarrier */ \ - unixShmUnmap, /* xShmUnmap */ \ - unixFetch, /* xFetch */ \ - unixUnfetch, /* xUnfetch */ \ -}; \ -static const sqlite3_io_methods *FINDER##Impl(const char *z, unixFile *p){ \ - UNUSED_PARAMETER(z); UNUSED_PARAMETER(p); \ - return &METHOD; \ -} \ -static const sqlite3_io_methods *(*const FINDER)(const char*,unixFile *p) \ - = FINDER##Impl; - -/* - * Here are all of the sqlite3_io_methods objects for each of the - * locking strategies. Functions that return pointers to these methods - * are also created. - */ -IOMETHODS(posixIoFinder, /* Finder function name */ - posixIoMethods, /* sqlite3_io_methods object name */ - 3, /* shared memory and mmap are enabled */ - unixClose, /* xClose method */ - unixLock, /* xLock method */ - unixUnlock, /* xUnlock method */ - unixCheckReservedLock, /* xCheckReservedLock method */ - unixShmMap /* xShmMap method */ - ) - IOMETHODS(nolockIoFinder, /* Finder function name */ - nolockIoMethods, /* sqlite3_io_methods object name */ - 3, /* shared memory is disabled */ - nolockClose, /* xClose method */ - nolockLock, /* xLock method */ - nolockUnlock, /* xUnlock method */ - nolockCheckReservedLock, /* xCheckReservedLock method */ - 0 /* xShmMap method */ - ) - IOMETHODS(dotlockIoFinder, /* Finder function name */ - dotlockIoMethods, /* sqlite3_io_methods object name */ - 1, /* shared memory is disabled */ - dotlockClose, /* xClose method */ - dotlockLock, /* xLock method */ - dotlockUnlock, /* xUnlock method */ - dotlockCheckReservedLock, /* xCheckReservedLock method */ - 0 /* xShmMap method */ - ) -#if SQLITE_ENABLE_LOCKING_STYLE - IOMETHODS(flockIoFinder, /* Finder function name */ - flockIoMethods, /* sqlite3_io_methods object name */ - 1, /* shared memory is disabled */ - flockClose, /* xClose method */ - flockLock, /* xLock method */ - flockUnlock, /* xUnlock method */ - flockCheckReservedLock, /* xCheckReservedLock method */ - 0 /* xShmMap method */ - ) -#endif -#if defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE - IOMETHODS(afpIoFinder, /* Finder function name */ - afpIoMethods, /* sqlite3_io_methods object name */ - 1, /* shared memory is disabled */ - afpClose, /* xClose method */ - afpLock, /* xLock method */ - afpUnlock, /* xUnlock method */ - afpCheckReservedLock, /* xCheckReservedLock method */ - 0 /* xShmMap method */ - ) -#endif -/* - * The proxy locking method is a "super-method" in the sense that it - * opens secondary file descriptors for the conch and lock files and - * it uses proxy, dot-file, AFP, and flock() locking methods on those - * secondary files. For this reason, the division that implements - * proxy locking is located much further down in the file. But we need - * to go ahead and define the sqlite3_io_methods and finder function - * for proxy locking here. So we forward declare the I/O methods. - */ -#if defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE -static int -proxyClose(sqlite3_file *); -static int -proxyLock(sqlite3_file *, int); -static int -proxyUnlock(sqlite3_file *, int); -static int -proxyCheckReservedLock(sqlite3_file *, int *); -IOMETHODS(proxyIoFinder, /* Finder function name */ - proxyIoMethods, /* sqlite3_io_methods object name */ - 1, /* shared memory is disabled */ - proxyClose, /* xClose method */ - proxyLock, /* xLock method */ - proxyUnlock, /* xUnlock method */ - proxyCheckReservedLock, /* xCheckReservedLock method */ - 0 /* xShmMap method */ - ) -#endif -/* nfs lockd on OSX 10.3+ doesn't clear write locks when a read lock is set */ -#if defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE - IOMETHODS(nfsIoFinder, /* Finder function name */ - nfsIoMethods, /* sqlite3_io_methods object name */ - 1, /* shared memory is disabled */ - unixClose, /* xClose method */ - unixLock, /* xLock method */ - nfsUnlock, /* xUnlock method */ - unixCheckReservedLock, /* xCheckReservedLock method */ - 0 /* xShmMap method */ - ) -#endif -#if defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE -/* - * This "finder" function attempts to determine the best locking strategy - * for the database file "filePath". It then returns the sqlite3_io_methods - * object that implements that strategy. - * - * This is for MacOSX only. - */ -static const sqlite3_io_methods * -autolockIoFinderImpl(const char *filePath, /* name of the database file */ - unixFile * pNew /* open file object for the database file */ - ) -{ - static const struct Mapping { - const char *zFilesystem; /* Filesystem type name */ - const sqlite3_io_methods *pMethods; /* Appropriate locking method */ - } aMap[] = { - { - "hfs", &posixIoMethods}, { - "ufs", &posixIoMethods}, { - "afpfs", &afpIoMethods}, { - "smbfs", &afpIoMethods}, { - "webdav", &nolockIoMethods}, { - 0, 0} - }; - int i; - struct statfs fsInfo; - struct flock lockInfo; - - if (!filePath) { - /* If filePath==NULL that means we are dealing with a transient file - * that does not need to be locked. - */ - return &nolockIoMethods; - } - if (statfs(filePath, &fsInfo) != -1) { - if (fsInfo.f_flags & MNT_RDONLY) { - return &nolockIoMethods; - } - for (i = 0; aMap[i].zFilesystem; i++) { - if (strcmp(fsInfo.f_fstypename, aMap[i].zFilesystem) == - 0) { - return aMap[i].pMethods; - } - } - } - - /* Default case. Handles, amongst others, "nfs". - * Test byte-range lock using fcntl(). If the call succeeds, - * assume that the file-system supports POSIX style locks. - */ - lockInfo.l_len = 1; - lockInfo.l_start = 0; - lockInfo.l_whence = SEEK_SET; - lockInfo.l_type = F_RDLCK; - if (osFcntl(pNew->h, F_GETLK, &lockInfo) != -1) { - if (strcmp(fsInfo.f_fstypename, "nfs") == 0) { - return &nfsIoMethods; - } else { - return &posixIoMethods; - } - } else { - return &dotlockIoMethods; - } -} - -static const sqlite3_io_methods - * (*const autolockIoFinder)(const char *, unixFile *) = - autolockIoFinderImpl; - -#endif /* defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE */ - -/* - * An abstract type for a pointer to an IO method finder function: - */ -typedef const sqlite3_io_methods *(*finder_type) (const char *, unixFile *); - -/**************************************************************************** - *************************** sqlite3_vfs methods **************************** - * - * This division contains the implementation of methods on the - * sqlite3_vfs object. - */ - -/* - * Initialize the contents of the unixFile structure pointed to by pId. - */ -static int -fillInUnixFile(sqlite3_vfs * pVfs, /* Pointer to vfs object */ - int h, /* Open file descriptor of file being opened */ - sqlite3_file * pId, /* Write to the unixFile structure here */ - const char *zFilename, /* Name of the file being opened */ - int ctrlFlags /* Zero or more UNIXFILE_* values */ - ) -{ - const sqlite3_io_methods *pLockingStyle; - unixFile *pNew = (unixFile *) pId; - int rc = SQLITE_OK; - - assert(pNew->pInode == NULL); - - /* Usually the path zFilename should not be a relative pathname. The - * exception is when opening the proxy "conch" file in builds that - * include the special Apple locking styles. - */ -#if defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE - assert(zFilename == 0 || zFilename[0] == '/' - || pVfs->pAppData == (void *)&autolockIoFinder); -#else - assert(zFilename == 0 || zFilename[0] == '/'); -#endif - - /* No locking occurs in temporary files */ - assert(zFilename != 0 || (ctrlFlags & UNIXFILE_NOLOCK) != 0); - - pNew->h = h; - pNew->pVfs = pVfs; - pNew->zPath = zFilename; - pNew->ctrlFlags = (u8) ctrlFlags; -#if SQLITE_MAX_MMAP_SIZE>0 - pNew->mmapSizeMax = sqlite3GlobalConfig.szMmap; -#endif - if (strcmp(pVfs->zName, "unix-excl") == 0) { - pNew->ctrlFlags |= UNIXFILE_EXCL; - } - if (ctrlFlags & UNIXFILE_NOLOCK) { - pLockingStyle = &nolockIoMethods; - } else { - pLockingStyle = - (**(finder_type *) pVfs->pAppData) (zFilename, pNew); -#if SQLITE_ENABLE_LOCKING_STYLE - /* Cache zFilename in the locking context (AFP and dotlock override) for - * proxyLock activation is possible (remote proxy is based on db name) - * zFilename remains valid until file is closed, to support - */ - pNew->lockingContext = (void *)zFilename; -#endif - } - - if (pLockingStyle == &posixIoMethods -#if defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE - || pLockingStyle == &nfsIoMethods -#endif - ) { - rc = findInodeInfo(pNew, &pNew->pInode); - if (rc != SQLITE_OK) { - /* If an error occurred in findInodeInfo(), close the file descriptor - * immediately. findInodeInfo() may fail - * in two scenarios: - * - * (a) A call to fstat() failed. - * (b) A malloc failed. - * - * Scenario (b) may only occur if the process is holding no other - * file descriptors open on the same file. If there were other file - * descriptors on this file, then no malloc would be required by - * findInodeInfo(). If this is the case, it is quite safe to close - * handle h - as it is guaranteed that no posix locks will be released - * by doing so. - * - * If scenario (a) caused the error then things are not so safe. The - * implicit assumption here is that if fstat() fails, things are in - * such bad shape that dropping a lock or two doesn't matter much. - */ - robust_close(pNew, h, __LINE__); - h = -1; - } - } -#if SQLITE_ENABLE_LOCKING_STYLE && defined(__APPLE__) - else if (pLockingStyle == &afpIoMethods) { - /* AFP locking uses the file path so it needs to be included in - * the afpLockingContext. - */ - afpLockingContext *pCtx; - pNew->lockingContext = pCtx = sqlite3_malloc64(sizeof(*pCtx)); - if (pCtx == 0) { - rc = SQLITE_NOMEM_BKPT; - } else { - /* NB: zFilename exists and remains valid until the file is closed - * according to requirement F11141. So we do not need to make a - * copy of the filename. - */ - pCtx->dbPath = zFilename; - pCtx->reserved = 0; - srandomdev(); - rc = findInodeInfo(pNew, &pNew->pInode); - if (rc != SQLITE_OK) { - sqlite3_free(pNew->lockingContext); - robust_close(pNew, h, __LINE__); - h = -1; - } - } - } -#endif - - else if (pLockingStyle == &dotlockIoMethods) { - /* Dotfile locking uses the file path so it needs to be included in - * the dotlockLockingContext - */ - char *zLockFile; - int nFilename; - assert(zFilename != 0); - nFilename = (int)strlen(zFilename) + 6; - zLockFile = (char *)sqlite3_malloc64(nFilename); - if (zLockFile == 0) { - rc = SQLITE_NOMEM_BKPT; - } else { - sqlite3_snprintf(nFilename, zLockFile, - "%s" DOTLOCK_SUFFIX, zFilename); - } - pNew->lockingContext = zLockFile; - } - storeLastErrno(pNew, 0); - if (rc != SQLITE_OK) { - if (h >= 0) - robust_close(pNew, h, __LINE__); - } else { - pNew->pMethod = pLockingStyle; - OpenCounter(+1); - verifyDbFile(pNew); - } - return rc; -} - -/* - * Return the name of a directory in which to put temporary files. - * If no suitable temporary file directory can be found, return NULL. - */ -static const char * -unixTempFileDir(void) -{ - static const char *azDirs[] = { - 0, - 0, - "/var/tmp", - "/usr/tmp", - "/tmp", - "." - }; - unsigned int i = 0; - struct stat buf; - const char *zDir = sqlite3_temp_directory; - - if (!azDirs[0]) - azDirs[0] = getenv("SQLITE_TMPDIR"); - if (!azDirs[1]) - azDirs[1] = getenv("TMPDIR"); - while (1) { - if (zDir != 0 && osStat(zDir, &buf) == 0 && S_ISDIR(buf.st_mode) - && osAccess(zDir, 03) == 0) { - return zDir; - } - if (i >= sizeof(azDirs) / sizeof(azDirs[0])) - break; - zDir = azDirs[i++]; - } - return 0; -} - -/* - * Create a temporary file name in zBuf. zBuf must be allocated - * by the calling process and must be big enough to hold at least - * pVfs->mxPathname bytes. - */ -static int -unixGetTempname(int nBuf, char *zBuf) -{ - const char *zDir; - int iLimit = 0; - - /* It's odd to simulate an io-error here, but really this is just - * using the io-error infrastructure to test that SQLite handles this - * function failing. - */ - zBuf[0] = 0; - SimulateIOError(return SQLITE_IOERR); - - zDir = unixTempFileDir(); - if (zDir == 0) - return SQLITE_IOERR_GETTEMPPATH; - do { - u64 r; - sqlite3_randomness(sizeof(r), &r); - assert(nBuf > 2); - zBuf[nBuf - 2] = 0; - sqlite3_snprintf(nBuf, zBuf, - "%s/" SQLITE_TEMP_FILE_PREFIX "%llx%c", zDir, - r, 0); - if (zBuf[nBuf - 2] != 0 || (iLimit++) > 10) - return SQLITE_ERROR; - } while (osAccess(zBuf, 0) == 0); - return SQLITE_OK; -} - -#if SQLITE_ENABLE_LOCKING_STYLE && defined(__APPLE__) -/* - * Routine to transform a unixFile into a proxy-locking unixFile. - * Implementation in the proxy-lock division, but used by unixOpen() - * if SQLITE_PREFER_PROXY_LOCKING is defined. - */ -static int proxyTransformUnixFile(unixFile *, const char *); -#endif - -/* - * Search for an unused file descriptor that was opened on the database - * file (not a journal or master-journal file) identified by pathname - * zPath with SQLITE_OPEN_XXX flags matching those passed as the second - * argument to this function. - * - * Such a file descriptor may exist if a database connection was closed - * but the associated file descriptor could not be closed because some - * other file descriptor open on the same file is holding a file-lock. - * Refer to comments in the unixClose() function and the lengthy comment - * describing "Posix Advisory Locking" at the start of this file for - * further details. Also, ticket #4018. - * - * If a suitable file descriptor is found, then it is returned. If no - * such file descriptor is located, -1 is returned. - */ -static UnixUnusedFd * -findReusableFd(const char *zPath, int flags) -{ - UnixUnusedFd *pUnused = 0; - - struct stat sStat; /* Results of stat() call */ - - /* A stat() call may fail for various reasons. If this happens, it is - * almost certain that an open() call on the same path will also fail. - * For this reason, if an error occurs in the stat() call here, it is - * ignored and -1 is returned. The caller will try to open a new file - * descriptor on the same path, fail, and return an error to SQLite. - * - * Even if a subsequent open() call does succeed, the consequences of - * not searching for a reusable file descriptor are not dire. - */ - if (0 == osStat(zPath, &sStat)) { - unixInodeInfo *pInode; - - pInode = inodeList; - while (pInode && (pInode->fileId.dev != sStat.st_dev - || pInode->fileId.ino != - (u64) sStat.st_ino)) { - pInode = pInode->pNext; - } - if (pInode) { - UnixUnusedFd **pp; - for (pp = &pInode->pUnused; - *pp && (*pp)->flags != flags; - pp = &((*pp)->pNext)) ; - pUnused = *pp; - if (pUnused) { - *pp = pUnused->pNext; - } - } - } - return pUnused; -} - -/* - * Find the mode, uid and gid of file zFile. - */ -static int -getFileMode(const char *zFile, /* File name */ - mode_t * pMode, /* OUT: Permissions of zFile */ - uid_t * pUid, /* OUT: uid of zFile. */ - gid_t * pGid /* OUT: gid of zFile. */ - ) -{ - struct stat sStat; /* Output of stat() on database file */ - int rc = SQLITE_OK; - if (0 == osStat(zFile, &sStat)) { - *pMode = sStat.st_mode & 0777; - *pUid = sStat.st_uid; - *pGid = sStat.st_gid; - } else { - rc = SQLITE_IOERR_FSTAT; - } - return rc; -} - -/* - * This function is called by unixOpen() to determine the unix permissions - * to create new files with. If no error occurs, then SQLITE_OK is returned - * and a value suitable for passing as the third argument to open(2) is - * written to *pMode. If an IO error occurs, an SQLite error code is - * returned and the value of *pMode is not modified. - * - * In most cases, this routine sets *pMode to 0, which will become - * an indication to robust_open() to create the file using - * SQLITE_DEFAULT_FILE_PERMISSIONS adjusted by the umask. - * But if the file being opened is a regular journal file, then - * this function queries the file-system for the permissions on the - * corresponding database file and sets *pMode to this value. Whenever - * possible, journal files are created using the same permissions - * as the associated database file. - * - * If the SQLITE_ENABLE_8_3_NAMES option is enabled, then the - * original filename is unavailable. But 8_3_NAMES is only used for - * FAT filesystems and permissions do not matter there, so just use - * the default permissions. - */ -static int -findCreateFileMode(const char *zPath, /* Path of file (possibly) being created */ - int flags, /* Flags passed as 4th argument to xOpen() */ - mode_t * pMode, /* OUT: Permissions to open file with */ - uid_t * pUid, /* OUT: uid to set on the file */ - gid_t * pGid /* OUT: gid to set on the file */ - ) -{ - int rc = SQLITE_OK; /* Return Code */ - *pMode = 0; - *pUid = 0; - *pGid = 0; - if (flags & SQLITE_OPEN_DELETEONCLOSE) { - *pMode = 0600; - } else if (flags & SQLITE_OPEN_URI) { - /* If this is a main database file and the file was opened using a URI - * filename, check for the "modeof" parameter. If present, interpret - * its value as a filename and try to copy the mode, uid and gid from - * that file. - */ - const char *z = sqlite3_uri_parameter(zPath, "modeof"); - if (z) { - rc = getFileMode(z, pMode, pUid, pGid); - } - } - return rc; -} - -/* - * Open the file zPath. - * - * Previously, the SQLite OS layer used three functions in place of this - * one: - * - * sqlite3OsOpenReadWrite(); - * sqlite3OsOpenReadOnly(); - * sqlite3OsOpenExclusive(); - * - * These calls correspond to the following combinations of flags: - * - * ReadWrite() -> (READWRITE | CREATE) - * ReadOnly() -> (READONLY) - * OpenExclusive() -> (READWRITE | CREATE | EXCLUSIVE) - * - * The old OpenExclusive() accepted a boolean argument - "delFlag". If - * true, the file was configured to be automatically deleted when the - * file handle closed. To achieve the same effect using this new - * interface, add the DELETEONCLOSE flag to those specified above for - * OpenExclusive(). - */ -static int -unixOpen(sqlite3_vfs * pVfs, /* The VFS for which this is the xOpen method */ - const char *zPath, /* Pathname of file to be opened */ - sqlite3_file * pFile, /* The file descriptor to be filled in */ - int flags, /* Input flags to control the opening */ - int *pOutFlags /* Output flags returned to SQLite core */ - ) -{ - unixFile *p = (unixFile *) pFile; - int fd = -1; /* File descriptor returned by open() */ - int openFlags = 0; /* Flags to pass to open() */ - int eType = flags & 0xFFFFFF00; /* Type of file to open */ - int noLock; /* True to omit locking primitives */ - int rc; /* Function Return Code */ - int ctrlFlags = 0; /* UNIXFILE_* flags */ - - int isExclusive = (flags & SQLITE_OPEN_EXCLUSIVE); - int isDelete = (flags & SQLITE_OPEN_DELETEONCLOSE); - int isCreate = (flags & SQLITE_OPEN_CREATE); - int isReadonly = (flags & SQLITE_OPEN_READONLY); - int isReadWrite = (flags & SQLITE_OPEN_READWRITE); -#if SQLITE_ENABLE_LOCKING_STYLE - int isAutoProxy = (flags & SQLITE_OPEN_AUTOPROXY); -#endif -#if defined(__APPLE__) || SQLITE_ENABLE_LOCKING_STYLE - struct statfs fsInfo; -#endif - - /* If creating a master or main-file journal, this function will open - * a file-descriptor on the directory too. The first time unixSync() - * is called the directory file descriptor will be fsync()ed and close()d. - */ - int syncDir = isCreate; - - /* If argument zPath is a NULL pointer, this function is required to open - * a temporary file. Use this buffer to store the file name in. - */ - char zTmpname[MAX_PATHNAME + 2]; - const char *zName = zPath; - - /* Check the following statements are true: - * - * (a) Exactly one of the READWRITE and READONLY flags must be set, and - * (b) if CREATE is set, then READWRITE must also be set, and - * (c) if EXCLUSIVE is set, then CREATE must also be set. - * (d) if DELETEONCLOSE is set, then CREATE must also be set. - */ - assert((isReadonly == 0 || isReadWrite == 0) - && (isReadWrite || isReadonly)); - assert(isCreate == 0 || isReadWrite); - assert(isExclusive == 0 || isCreate); - assert(isDelete == 0 || isCreate); - - /* Detect a pid change and reset the PRNG. There is a race condition - * here such that two or more threads all trying to open databases at - * the same instant might all reset the PRNG. But multiple resets - * are harmless. - */ - if (randomnessPid != getpid()) { - randomnessPid = getpid(); - sqlite3_randomness(0, 0); - } - - memset(p, 0, sizeof(unixFile)); - - if (eType == SQLITE_OPEN_MAIN_DB) { - UnixUnusedFd *pUnused; - pUnused = findReusableFd(zName, flags); - if (pUnused) { - fd = pUnused->fd; - } else { - pUnused = sqlite3_malloc64(sizeof(*pUnused)); - if (!pUnused) { - return SQLITE_NOMEM_BKPT; - } - } - p->pUnused = pUnused; - - /* Database filenames are double-zero terminated if they are not - * URIs with parameters. Hence, they can always be passed into - * sqlite3_uri_parameter(). - */ - assert((flags & SQLITE_OPEN_URI) - || zName[strlen(zName) + 1] == 0); - - } else if (!zName) { - /* If zName is NULL, the upper layer is requesting a temp file. */ - assert(isDelete); - rc = unixGetTempname(pVfs->mxPathname, zTmpname); - if (rc != SQLITE_OK) { - return rc; - } - zName = zTmpname; - - /* Generated temporary filenames are always double-zero terminated - * for use by sqlite3_uri_parameter(). - */ - assert(zName[strlen(zName) + 1] == 0); - } - - /* Determine the value of the flags parameter passed to POSIX function - * open(). These must be calculated even if open() is not called, as - * they may be stored as part of the file handle and used by the - * 'conch file' locking functions later on. - */ - if (isReadonly) - openFlags |= O_RDONLY; - if (isReadWrite) - openFlags |= O_RDWR; - if (isCreate) - openFlags |= O_CREAT; - if (isExclusive) - openFlags |= (O_EXCL | O_NOFOLLOW); - openFlags |= (O_LARGEFILE | O_BINARY); - - if (fd < 0) { - mode_t openMode; /* Permissions to create file with */ - uid_t uid; /* Userid for the file */ - gid_t gid; /* Groupid for the file */ - rc = findCreateFileMode(zName, flags, &openMode, &uid, &gid); - if (rc != SQLITE_OK) { - assert(!p->pUnused); - return rc; - } - fd = robust_open(zName, openFlags, openMode); - assert(!isExclusive || (openFlags & O_CREAT) != 0); - if (fd < 0 && errno != EISDIR && isReadWrite) { - /* Failed to open the file for read/write access. Try read-only. */ - flags &= ~(SQLITE_OPEN_READWRITE | SQLITE_OPEN_CREATE); - openFlags &= ~(O_RDWR | O_CREAT); - flags |= SQLITE_OPEN_READONLY; - openFlags |= O_RDONLY; - isReadonly = 1; - fd = robust_open(zName, openFlags, openMode); - } - if (fd < 0) { - rc = unixLogError(SQLITE_CANTOPEN_BKPT, "open", zName); - goto open_finished; - } - - } - assert(fd >= 0); - if (pOutFlags) { - *pOutFlags = flags; - } - - if (p->pUnused) { - p->pUnused->fd = fd; - p->pUnused->flags = flags; - } - - if (isDelete) { - -#if defined(SQLITE_UNLINK_AFTER_CLOSE) - zPath = sqlite3_mprintf("%s", zName); - if (zPath == 0) { - robust_close(p, fd, __LINE__); - return SQLITE_NOMEM_BKPT; - } -#else - osUnlink(zName); -#endif - } -#if SQLITE_ENABLE_LOCKING_STYLE - else { - p->openFlags = openFlags; - } -#endif - -#if defined(__APPLE__) || SQLITE_ENABLE_LOCKING_STYLE - if (fstatfs(fd, &fsInfo) == -1) { - storeLastErrno(p, errno); - robust_close(p, fd, __LINE__); - return SQLITE_IOERR_ACCESS; - } - if (0 == strncmp("msdos", fsInfo.f_fstypename, 5)) { - ((unixFile *) pFile)->fsFlags |= SQLITE_FSFLAGS_IS_MSDOS; - } - if (0 == strncmp("exfat", fsInfo.f_fstypename, 5)) { - ((unixFile *) pFile)->fsFlags |= SQLITE_FSFLAGS_IS_MSDOS; - } -#endif - - /* Set up appropriate ctrlFlags */ - if (isDelete) - ctrlFlags |= UNIXFILE_DELETE; - if (isReadonly) - ctrlFlags |= UNIXFILE_RDONLY; - noLock = eType != SQLITE_OPEN_MAIN_DB; - if (noLock) - ctrlFlags |= UNIXFILE_NOLOCK; - if (syncDir) - ctrlFlags |= UNIXFILE_DIRSYNC; - if (flags & SQLITE_OPEN_URI) - ctrlFlags |= UNIXFILE_URI; - -#if SQLITE_ENABLE_LOCKING_STYLE -#if SQLITE_PREFER_PROXY_LOCKING - isAutoProxy = 1; -#endif - if (isAutoProxy && (zPath != NULL) && (!noLock) && pVfs->xOpen) { - char *envforce = getenv("SQLITE_FORCE_PROXY_LOCKING"); - int useProxy = 0; - - /* SQLITE_FORCE_PROXY_LOCKING==1 means force always use proxy, 0 means - * never use proxy, NULL means use proxy for non-local files only. - */ - if (envforce != NULL) { - useProxy = atoi(envforce) > 0; - } else { - useProxy = !(fsInfo.f_flags & MNT_LOCAL); - } - if (useProxy) { - rc = fillInUnixFile(pVfs, fd, pFile, zPath, ctrlFlags); - if (rc == SQLITE_OK) { - rc = proxyTransformUnixFile((unixFile *) pFile, - ":auto:"); - if (rc != SQLITE_OK) { - /* Use unixClose to clean up the resources added in fillInUnixFile - * and clear all the structure's references. Specifically, - * pFile->pMethods will be NULL so sqlite3OsClose will be a no-op - */ - unixClose(pFile); - return rc; - } - } - goto open_finished; - } - } -#endif - - rc = fillInUnixFile(pVfs, fd, pFile, zPath, ctrlFlags); - - open_finished: - if (rc != SQLITE_OK) { - sqlite3_free(p->pUnused); - } - return rc; -} - -/* - * Delete the file at zPath. If the dirSync argument is true, fsync() - * the directory after deleting the file. - */ -static int -unixDelete(sqlite3_vfs * NotUsed, /* VFS containing this as the xDelete method */ - const char *zPath, /* Name of file to be deleted */ - int dirSync /* If true, fsync() directory after deleting file */ - ) -{ - int rc = SQLITE_OK; - UNUSED_PARAMETER(NotUsed); - SimulateIOError(return SQLITE_IOERR_DELETE); - if (osUnlink(zPath) == (-1)) { - if (errno == ENOENT) { - rc = SQLITE_IOERR_DELETE_NOENT; - } else { - rc = unixLogError(SQLITE_IOERR_DELETE, "unlink", zPath); - } - return rc; - } -#ifndef SQLITE_DISABLE_DIRSYNC - if ((dirSync & 1) != 0) { - int fd; - rc = osOpenDirectory(zPath, &fd); - if (rc == SQLITE_OK) { - if (full_fsync(fd, 0, 0)) { - rc = unixLogError(SQLITE_IOERR_DIR_FSYNC, - "fsync", zPath); - } - robust_close(0, fd, __LINE__); - } else { - assert(rc == SQLITE_CANTOPEN); - rc = SQLITE_OK; - } - } -#endif - return rc; -} - -/* - * Test the existence of or access permissions of file zPath. The - * test performed depends on the value of flags: - * - * SQLITE_ACCESS_EXISTS: Return 1 if the file exists - * SQLITE_ACCESS_READWRITE: Return 1 if the file is read and writable. - * SQLITE_ACCESS_READONLY: Return 1 if the file is readable. - * - * Otherwise return 0. - */ -static int -unixAccess(sqlite3_vfs * NotUsed, /* The VFS containing this xAccess method */ - const char *zPath, /* Path of the file to examine */ - int flags, /* What do we want to learn about the zPath file? */ - int *pResOut /* Write result boolean here */ - ) -{ - UNUSED_PARAMETER(NotUsed); - SimulateIOError(return SQLITE_IOERR_ACCESS; - ); - assert(pResOut != 0); - - /* The spec says there are three possible values for flags. But only - * two of them are actually used - */ - assert(flags == SQLITE_ACCESS_EXISTS - || flags == SQLITE_ACCESS_READWRITE); - - if (flags == SQLITE_ACCESS_EXISTS) { - struct stat buf; - *pResOut = (0 == osStat(zPath, &buf) && buf.st_size > 0); - } else { - *pResOut = osAccess(zPath, W_OK | R_OK) == 0; - } - return SQLITE_OK; -} - -/* - * - */ -static int -mkFullPathname(const char *zPath, /* Input path */ - char *zOut, /* Output buffer */ - int nOut /* Allocated size of buffer zOut */ - ) -{ - int nPath = sqlite3Strlen30(zPath); - int iOff = 0; - if (zPath[0] != '/') { - if (osGetcwd(zOut, nOut - 2) == 0) { - return unixLogError(SQLITE_CANTOPEN_BKPT, "getcwd", - zPath); - } - iOff = sqlite3Strlen30(zOut); - zOut[iOff++] = '/'; - } - if ((iOff + nPath + 1) > nOut) { - /* SQLite assumes that xFullPathname() nul-terminates the output buffer - * even if it returns an error. - */ - zOut[iOff] = '\0'; - return SQLITE_CANTOPEN_BKPT; - } - sqlite3_snprintf(nOut - iOff, &zOut[iOff], "%s", zPath); - return SQLITE_OK; -} - -/* - * Turn a relative pathname into a full pathname. The relative path - * is stored as a nul-terminated string in the buffer pointed to by - * zPath. - * - * zOut points to a buffer of at least sqlite3_vfs.mxPathname bytes - * (in this case, MAX_PATHNAME bytes). The full-path is written to - * this buffer before returning. - */ -static int -unixFullPathname(sqlite3_vfs * pVfs, /* Pointer to vfs object */ - const char *zPath, /* Possibly relative input path */ - int nOut, /* Size of output buffer in bytes */ - char *zOut /* Output buffer */ - ) -{ - (void)pVfs; -#if !defined(HAVE_READLINK) || !defined(HAVE_LSTAT) - return mkFullPathname(zPath, zOut, nOut); -#else - int rc = SQLITE_OK; - int nByte; - int nLink = 1; /* Number of symbolic links followed so far */ - const char *zIn = zPath; /* Input path for each iteration of loop */ - char *zDel = 0; - - assert(pVfs->mxPathname == MAX_PATHNAME); - UNUSED_PARAMETER(pVfs); - - /* It's odd to simulate an io-error here, but really this is just - * using the io-error infrastructure to test that SQLite handles this - * function failing. This function could fail if, for example, the - * current working directory has been unlinked. - */ - SimulateIOError(return SQLITE_ERROR); - - do { - - /* Call stat() on path zIn. Set bLink to true if the path is a symbolic - * link, or false otherwise. - */ - int bLink = 0; - struct stat buf; - if (osLstat(zIn, &buf) != 0) { - if (errno != ENOENT) { - rc = unixLogError(SQLITE_CANTOPEN_BKPT, "lstat", - zIn); - } - } else { - bLink = S_ISLNK(buf.st_mode); - } - - if (bLink) { - if (zDel == 0) { - zDel = sqlite3_malloc(nOut); - if (zDel == 0) - rc = SQLITE_NOMEM_BKPT; - } else if (++nLink > SQLITE_MAX_SYMLINKS) { - rc = SQLITE_CANTOPEN_BKPT; - } - - if (rc == SQLITE_OK) { - nByte = osReadlink(zIn, zDel, nOut - 1); - if (nByte < 0) { - rc = unixLogError(SQLITE_CANTOPEN_BKPT, - "readlink", zIn); - } else { - if (zDel[0] != '/') { - int n; - for (n = sqlite3Strlen30(zIn); - n > 0 && zIn[n - 1] != '/'; - n--) ; - if (nByte + n + 1 > nOut) { - rc = SQLITE_CANTOPEN_BKPT; - } else { - memmove(&zDel[n], zDel, - nByte + 1); - memcpy(zDel, zIn, n); - nByte += n; - } - } - zDel[nByte] = '\0'; - } - } - - zIn = zDel; - } - - assert(rc != SQLITE_OK || zIn != zOut || zIn[0] == '/'); - if (rc == SQLITE_OK && zIn != zOut) { - rc = mkFullPathname(zIn, zOut, nOut); - } - if (bLink == 0) - break; - zIn = zOut; - } while (rc == SQLITE_OK); - - sqlite3_free(zDel); - return rc; -#endif /* HAVE_READLINK && HAVE_LSTAT */ -} - -#ifndef SQLITE_OMIT_LOAD_EXTENSION -/* - * Interfaces for opening a shared library, finding entry points - * within the shared library, and closing the shared library. - */ -#include -static void * -unixDlOpen(sqlite3_vfs * NotUsed, const char *zFilename) -{ - UNUSED_PARAMETER(NotUsed); - return dlopen(zFilename, RTLD_NOW | RTLD_GLOBAL); -} - -/* - * SQLite calls this function immediately after a call to unixDlSym() or - * unixDlOpen() fails (returns a null pointer). If a more detailed error - * message is available, it is written to zBufOut. If no error message - * is available, zBufOut is left unmodified and SQLite uses a default - * error message. - */ -static void -unixDlError(sqlite3_vfs * NotUsed, int nBuf, char *zBufOut) -{ - const char *zErr; - UNUSED_PARAMETER(NotUsed); - zErr = dlerror(); - if (zErr) { - sqlite3_snprintf(nBuf, zBufOut, "%s", zErr); - } -} - -static - void (*unixDlSym(sqlite3_vfs * NotUsed, void *p, const char *zSym)) (void) { - /* - * GCC with -pedantic-errors says that C90 does not allow a void* to be - * cast into a pointer to a function. And yet the library dlsym() routine - * returns a void* which is really a pointer to a function. So how do we - * use dlsym() with -pedantic-errors? - * - * Variable x below is defined to be a pointer to a function taking - * parameters void* and const char* and returning a pointer to a function. - * We initialize x by assigning it a pointer to the dlsym() function. - * (That assignment requires a cast.) Then we call the function that - * x points to. - * - * This work-around is unlikely to work correctly on any system where - * you really cannot cast a function pointer into void*. But then, on the - * other hand, dlsym() will not work on such a system either, so we have - * not really lost anything. - */ - void (*(*x) (void *, const char *)) (void); - UNUSED_PARAMETER(NotUsed); - x = (void (*(*)(void *, const char *))(void))dlsym; - return (*x) (p, zSym); -} - -static void -unixDlClose(sqlite3_vfs * NotUsed, void *pHandle) -{ - UNUSED_PARAMETER(NotUsed); - dlclose(pHandle); -} -#else /* if SQLITE_OMIT_LOAD_EXTENSION is defined: */ -#define unixDlOpen 0 -#define unixDlError 0 -#define unixDlSym 0 -#define unixDlClose 0 -#endif - -/* - * Write nBuf bytes of random data to the supplied buffer zBuf. - */ -static int -unixRandomness(sqlite3_vfs * NotUsed, int nBuf, char *zBuf) -{ - UNUSED_PARAMETER(NotUsed); - assert((size_t) nBuf >= (sizeof(time_t) + sizeof(int))); - - /* We have to initialize zBuf to prevent valgrind from reporting - * errors. The reports issued by valgrind are incorrect - we would - * prefer that the randomness be increased by making use of the - * uninitialized space in zBuf - but valgrind errors tend to worry - * some users. Rather than argue, it seems easier just to initialize - * the whole array and silence valgrind, even if that means less randomness - * in the random seed. - * - * When testing, initializing zBuf[] to zero is all we do. That means - * that we always use the same random number sequence. This makes the - * tests repeatable. - */ - memset(zBuf, 0, nBuf); - randomnessPid = getpid(); -#if !defined(SQLITE_TEST) && !defined(SQLITE_OMIT_RANDOMNESS) - { - int fd, got; - fd = robust_open("/dev/urandom", O_RDONLY, 0); - if (fd < 0) { - time_t t; - time(&t); - memcpy(zBuf, &t, sizeof(t)); - memcpy(&zBuf[sizeof(t)], &randomnessPid, - sizeof(randomnessPid)); - assert(sizeof(t) + sizeof(randomnessPid) <= - (size_t) nBuf); - nBuf = sizeof(t) + sizeof(randomnessPid); - } else { - do { - got = osRead(fd, zBuf, nBuf); - } while (got < 0 && errno == EINTR); - robust_close(0, fd, __LINE__); - } - } -#endif - return nBuf; -} - -/* - * Sleep for a little while. Return the amount of time slept. - * The argument is the number of microseconds we want to sleep. - * The return value is the number of microseconds of sleep actually - * requested from the underlying operating system, a number which - * might be greater than or equal to the argument, but not less - * than the argument. - */ -static int -unixSleep(sqlite3_vfs * NotUsed, int microseconds) -{ -#if defined(HAVE_USLEEP) && HAVE_USLEEP - usleep(microseconds); - UNUSED_PARAMETER(NotUsed); - return microseconds; -#else - int seconds = (microseconds + 999999) / 1000000; - sleep(seconds); - UNUSED_PARAMETER(NotUsed); - return seconds * 1000000; -#endif -} - -/* - * The following variable, if set to a non-zero value, is interpreted as - * the number of seconds since 1970 and is used to set the result of - * sqlite3OsCurrentTime() during testing. - */ -#ifdef SQLITE_TEST -int sqlite3_current_time = 0; /* Fake system time in seconds since 1970. */ -#endif - -/* - * Find the current time (in Universal Coordinated Time). Write into *piNow - * the current time and date as a Julian Day number times 86_400_000. In - * other words, write into *piNow the number of milliseconds since the Julian - * epoch of noon in Greenwich on November 24, 4714 B.C according to the - * proleptic Gregorian calendar. - * - * On success, return SQLITE_OK. Return SQLITE_ERROR if the time and date - * cannot be found. - */ -static int -unixCurrentTimeInt64(sqlite3_vfs * NotUsed, sqlite3_int64 * piNow) -{ - static const sqlite3_int64 unixEpoch = - 24405875 * (sqlite3_int64) 8640000; - int rc = SQLITE_OK; -#if defined(NO_GETTOD) - time_t t; - time(&t); - *piNow = ((sqlite3_int64) t) * 1000 + unixEpoch; -#else - struct timeval sNow; - (void)gettimeofday(&sNow, 0); /* Cannot fail given valid arguments */ - *piNow = - unixEpoch + 1000 * (sqlite3_int64) sNow.tv_sec + - sNow.tv_usec / 1000; -#endif + /* If p==0 (unmap the entire file) then there must be no outstanding + * xFetch references. Or, if p!=0 (meaning it is an xFetch reference), + * then there must be at least one outstanding. + */ + assert((p == 0) == (pFd->nFetchOut == 0)); -#ifdef SQLITE_TEST - if (sqlite3_current_time) { - *piNow = - 1000 * (sqlite3_int64) sqlite3_current_time + unixEpoch; + /* If p!=0, it must match the iOff value. */ + assert(p == 0 || p == &((u8 *) pFd->pMapRegion)[iOff]); + + if (p) { + pFd->nFetchOut--; + } else { + unixUnmapfile(pFd); } -#endif - UNUSED_PARAMETER(NotUsed); - return rc; -} -/* - * The xGetLastError() method is designed to return a better - * low-level error message when operating-system problems come up - * during SQLite operation. Only the integer return code is currently - * used. - */ -static int -unixGetLastError(sqlite3_vfs * NotUsed, int NotUsed2, char *NotUsed3) -{ - UNUSED_PARAMETER(NotUsed); - UNUSED_PARAMETER(NotUsed2); - UNUSED_PARAMETER(NotUsed3); - return errno; + assert(pFd->nFetchOut >= 0); + return SQLITE_OK; } /* - *********************** End of sqlite3_vfs methods *************************** + * Here ends the implementation of all sqlite3_file methods. + * + ********************* End sqlite3_file Methods ******************************* *****************************************************************************/ -/****************************************************************************** - ************************* Begin Proxy Locking ******************************** - * - * Proxy locking is a "uber-locking-method" in this sense: It uses the - * other locking methods on secondary lock files. Proxy locking is a - * meta-layer over top of the primitive locking implemented above. For - * this reason, the division that implements of proxy locking is deferred - * until late in the file (here) after all of the other I/O methods have - * been defined - so that the primitive locking methods are available - * as services to help with the implementation of proxy locking. - * - *** - * - * The default locking schemes in SQLite use byte-range locks on the - * database file to coordinate safe, concurrent access by multiple readers - * and writers [http://sqlite.org/lockingv3.html]. The five file locking - * states (UNLOCKED, PENDING, SHARED, RESERVED, EXCLUSIVE) are implemented - * as POSIX read & write locks over fixed set of locations (via fsctl), - * on AFP and SMB only exclusive byte-range locks are available via fsctl - * with _IOWR('z', 23, struct ByteRangeLockPB2) to track the same 5 states. - * To simulate a F_RDLCK on the shared range, on AFP a randomly selected - * address in the shared range is taken for a SHARED lock, the entire - * shared range is taken for an EXCLUSIVE lock): - * - * PENDING_BYTE 0x40000000 - * RESERVED_BYTE 0x40000001 - * SHARED_RANGE 0x40000002 -> 0x40000200 - * - * This works well on the local file system, but shows a nearly 100x - * slowdown in read performance on AFP because the AFP client disables - * the read cache when byte-range locks are present. Enabling the read - * cache exposes a cache coherency problem that is present on all OS X - * supported network file systems. NFS and AFP both observe the - * close-to-open semantics for ensuring cache coherency - * [http://nfs.sourceforge.net/#faq_a8], which does not effectively - * address the requirements for concurrent database access by multiple - * readers and writers - * [http://www.nabble.com/SQLite-on-NFS-cache-coherency-td15655701.html]. - * - * To address the performance and cache coherency issues, proxy file locking - * changes the way database access is controlled by limiting access to a - * single host at a time and moving file locks off of the database file - * and onto a proxy file on the local file system. - * - * - * Using proxy locks - * ----------------- - * - * C APIs - * - * sqlite3_file_control(db, dbname, SQLITE_FCNTL_SET_LOCKPROXYFILE, - * | ":auto:"); - * sqlite3_file_control(db, dbname, SQLITE_FCNTL_GET_LOCKPROXYFILE, - * &); - * - * - * SQL pragmas - * - * PRAGMA [database.]lock_proxy_file= | :auto: - * PRAGMA [database.]lock_proxy_file - * - * Specifying ":auto:" means that if there is a conch file with a matching - * host ID in it, the proxy path in the conch file will be used, otherwise - * a proxy path based on the user's temp dir - * (via confstr(_CS_DARWIN_USER_TEMP_DIR,...)) will be used and the - * actual proxy file name is generated from the name and path of the - * database file. For example: - * - * For database path "/Users/me/foo.db" - * The lock path will be "/sqliteplocks/_Users_me_foo.db:auto:") - * - * Once a lock proxy is configured for a database connection, it can not - * be removed, however it may be switched to a different proxy path via - * the above APIs (assuming the conch file is not being held by another - * connection or process). - * - * - * How proxy locking works - * ----------------------- - * - * Proxy file locking relies primarily on two new supporting files: - * - * * conch file to limit access to the database file to a single host - * at a time - * - * * proxy file to act as a proxy for the advisory locks normally - * taken on the database - * - * The conch file - to use a proxy file, sqlite must first "hold the conch" - * by taking an sqlite-style shared lock on the conch file, reading the - * contents and comparing the host's unique host ID (see below) and lock - * proxy path against the values stored in the conch. The conch file is - * stored in the same directory as the database file and the file name - * is patterned after the database file name as ".-conch". - * If the conch file does not exist, or its contents do not match the - * host ID and/or proxy path, then the lock is escalated to an exclusive - * lock and the conch file contents is updated with the host ID and proxy - * path and the lock is downgraded to a shared lock again. If the conch - * is held by another process (with a shared lock), the exclusive lock - * will fail and SQLITE_BUSY is returned. - * - * The proxy file - a single-byte file used for all advisory file locks - * normally taken on the database file. This allows for safe sharing - * of the database file for multiple readers and writers on the same - * host (the conch ensures that they all use the same local lock file). - * - * Requesting the lock proxy does not immediately take the conch, it is - * only taken when the first request to lock database file is made. - * This matches the semantics of the traditional locking behavior, where - * opening a connection to a database file does not take a lock on it. - * The shared lock and an open file descriptor are maintained until - * the connection to the database is closed. - * - * The proxy file and the lock file are never deleted so they only need - * to be created the first time they are used. +/* + * This division contains definitions of sqlite3_io_methods objects that + * implement various file locking strategies. It also contains definitions + * of "finder" functions. A finder-function is used to locate the appropriate + * sqlite3_io_methods object for a particular database file. The pAppData + * field of the sqlite3_vfs VFS objects are initialized to be pointers to + * the correct finder-function for that VFS. * - * Configuration options - * --------------------- + * Most finder functions return a pointer to a fixed sqlite3_io_methods + * object. The only interesting finder-function is autolockIoFinder, which + * looks at the filesystem type and tries to guess the best locking + * strategy from that. * - * SQLITE_PREFER_PROXY_LOCKING + * For finder-function F, two objects are created: * - * Database files accessed on non-local file systems are - * automatically configured for proxy locking, lock files are - * named automatically using the same logic as - * PRAGMA lock_proxy_file=":auto:" + * (1) The real finder-function named "FImpt()". * - * SQLITE_PROXY_DEBUG + * (2) A constant pointer to this function named just "F". * - * Enables the logging of error messages during host id file - * retrieval and creation * - * LOCKPROXYDIR + * A pointer to the F pointer is used as the pAppData value for VFS + * objects. We have to do this instead of letting pAppData point + * directly at the finder-function since C90 rules prevent a void* + * from be cast into a function pointer. * - * Overrides the default directory used for lock proxy files that - * are named automatically via the ":auto:" setting * - * SQLITE_DEFAULT_PROXYDIR_PERMISSIONS + * Each instance of this macro generates two objects: * - * Permissions to use when creating a directory for storing the - * lock proxy files, only used when LOCKPROXYDIR is not set. + * * A constant sqlite3_io_methods object call METHOD that has locking + * methods CLOSE, LOCK, UNLOCK, CKRESLOCK. * + * * An I/O method finder function called FINDER that returns a pointer + * to the METHOD object in the previous bullet. + */ +#define IOMETHODS(FINDER,METHOD,VERSION,CLOSE,LOCK,UNLOCK,SHMMAP) \ +static const sqlite3_io_methods METHOD = { \ + VERSION, /* iVersion */ \ + CLOSE, /* xClose */ \ + unixRead, /* xRead */ \ + unixWrite, /* xWrite */ \ + unixTruncate, /* xTruncate */ \ + unixSync, /* xSync */ \ + unixFileSize, /* xFileSize */ \ + LOCK, /* xLock */ \ + UNLOCK, /* xUnlock */ \ + unixFileControl, /* xFileControl */ \ + unixSectorSize, /* xSectorSize */ \ + unixDeviceCharacteristics, /* xDeviceCapabilities */ \ + SHMMAP, /* xShmMap */ \ + unixShmLock, /* xShmLock */ \ + unixShmBarrier, /* xShmBarrier */ \ + unixShmUnmap, /* xShmUnmap */ \ + unixFetch, /* xFetch */ \ + unixUnfetch, /* xUnfetch */ \ +}; \ +static const sqlite3_io_methods *FINDER##Impl(const char *z, unixFile *p){ \ + UNUSED_PARAMETER(z); UNUSED_PARAMETER(p); \ + return &METHOD; \ +} \ +static const sqlite3_io_methods *(*const FINDER)(const char*,unixFile *p) \ + = FINDER##Impl; + +/* + * Here are all of the sqlite3_io_methods objects for each of the + * locking strategies. Functions that return pointers to these methods + * are also created. + */ +IOMETHODS(posixIoFinder, /* Finder function name */ + posixIoMethods, /* sqlite3_io_methods object name */ + 3, /* shared memory and mmap are enabled */ + unixClose, /* xClose method */ + unixLock, /* xLock method */ + unixUnlock, /* xUnlock method */ + unixShmMap /* xShmMap method */ + ) + IOMETHODS(nolockIoFinder, /* Finder function name */ + nolockIoMethods, /* sqlite3_io_methods object name */ + 3, /* shared memory is disabled */ + nolockClose, /* xClose method */ + nolockLock, /* xLock method */ + nolockUnlock, /* xUnlock method */ + NULL /* xShmMap method */ + ) + +/* + * An abstract type for a pointer to an IO method finder function: + */ +typedef const sqlite3_io_methods *(*finder_type) (const char *, unixFile *); + +/**************************************************************************** + *************************** sqlite3_vfs methods **************************** * - * As mentioned above, when compiled with SQLITE_PREFER_PROXY_LOCKING, - * setting the environment variable SQLITE_FORCE_PROXY_LOCKING to 1 will - * force proxy locking to be used for every database file opened, and 0 - * will force automatic proxy locking to be disabled for all database - * files (explicitly calling the SQLITE_FCNTL_SET_LOCKPROXYFILE pragma or - * sqlite_file_control API is not affected by SQLITE_FORCE_PROXY_LOCKING). + * This division contains the implementation of methods on the + * sqlite3_vfs object. */ /* - * Proxy locking is only available on MacOSX + * Initialize the contents of the unixFile structure pointed to by pId. */ -#if defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE +static int +fillInUnixFile(sqlite3_vfs * pVfs, /* Pointer to vfs object */ + int h, /* Open file descriptor of file being opened */ + sqlite3_file * pId, /* Write to the unixFile structure here */ + const char *zFilename, /* Name of the file being opened */ + int ctrlFlags /* Zero or more UNIXFILE_* values */ + ) +{ + const sqlite3_io_methods *pLockingStyle; + unixFile *pNew = (unixFile *) pId; + int rc = SQLITE_OK; + + assert(pNew->pInode == NULL); + + /* Usually the path zFilename should not be a relative pathname. The + * exception is when opening the proxy "conch" file in builds that + * include the special Apple locking styles. + */ + assert(zFilename == 0 || zFilename[0] == '/'); + + /* No locking occurs in temporary files */ + assert(zFilename != 0 || (ctrlFlags & UNIXFILE_NOLOCK) != 0); + + pNew->h = h; + pNew->pVfs = pVfs; + pNew->zPath = zFilename; + pNew->ctrlFlags = (u8) ctrlFlags; +#if SQLITE_MAX_MMAP_SIZE>0 + pNew->mmapSizeMax = sqlite3GlobalConfig.szMmap; +#endif + if (strcmp(pVfs->zName, "unix-excl") == 0) { + pNew->ctrlFlags |= UNIXFILE_EXCL; + } + if (ctrlFlags & UNIXFILE_NOLOCK) { + pLockingStyle = &nolockIoMethods; + } else { + pLockingStyle = + (**(finder_type *) pVfs->pAppData) (zFilename, pNew); + assert(pLockingStyle == &posixIoMethods); + } + + if (pLockingStyle == &posixIoMethods) { + rc = findInodeInfo(pNew, &pNew->pInode); + if (rc != SQLITE_OK) { + /* If an error occurred in findInodeInfo(), close the file descriptor + * immediately. findInodeInfo() may fail + * in two scenarios: + * + * (a) A call to fstat() failed. + * (b) A malloc failed. + * + * Scenario (b) may only occur if the process is holding no other + * file descriptors open on the same file. If there were other file + * descriptors on this file, then no malloc would be required by + * findInodeInfo(). If this is the case, it is quite safe to close + * handle h - as it is guaranteed that no posix locks will be released + * by doing so. + * + * If scenario (a) caused the error then things are not so safe. The + * implicit assumption here is that if fstat() fails, things are in + * such bad shape that dropping a lock or two doesn't matter much. + */ + robust_close(pNew, h, __LINE__); + h = -1; + } + } + storeLastErrno(pNew, 0); + if (rc != SQLITE_OK) { + if (h >= 0) + robust_close(pNew, h, __LINE__); + } else { + pNew->pMethod = pLockingStyle; + OpenCounter(+1); + verifyDbFile(pNew); + } + return rc; +} /* - * The proxyLockingContext has the path and file structures for the remote - * and local proxy files in it + * Return the name of a directory in which to put temporary files. + * If no suitable temporary file directory can be found, return NULL. */ -typedef struct proxyLockingContext proxyLockingContext; -struct proxyLockingContext { - unixFile *conchFile; /* Open conch file */ - char *conchFilePath; /* Name of the conch file */ - unixFile *lockProxy; /* Open proxy lock file */ - char *lockProxyPath; /* Name of the proxy lock file */ - char *dbPath; /* Name of the open file */ - int conchHeld; /* 1 if the conch is held, -1 if lockless */ - int nFails; /* Number of conch taking failures */ - void *oldLockingContext; /* Original lockingcontext to restore on close */ - sqlite3_io_methods const *pOldMethod; /* Original I/O methods for close */ -}; +static const char * +unixTempFileDir(void) +{ + static const char *azDirs[] = { + 0, + 0, + "/var/tmp", + "/usr/tmp", + "/tmp", + "." + }; + unsigned int i = 0; + struct stat buf; + const char *zDir = sqlite3_temp_directory; + + if (!azDirs[0]) + azDirs[0] = getenv("SQLITE_TMPDIR"); + if (!azDirs[1]) + azDirs[1] = getenv("TMPDIR"); + while (1) { + if (zDir != 0 && stat(zDir, &buf) == 0 && + S_ISDIR(buf.st_mode) && access(zDir, 03) == 0) + return zDir; + if (i >= sizeof(azDirs) / sizeof(azDirs[0])) + break; + zDir = azDirs[i++]; + } + return 0; +} /* - * The proxy lock file path for the database at dbPath is written into lPath, - * which must point to valid, writable memory large enough for a maxLen length - * file path. + * Create a temporary file name in zBuf. zBuf must be allocated + * by the calling process and must be big enough to hold at least + * pVfs->mxPathname bytes. */ static int -proxyGetLockPath(const char *dbPath, char *lPath, size_t maxLen) +unixGetTempname(int nBuf, char *zBuf) { - int len; - int dbLen; - int i; - -#ifdef LOCKPROXYDIR - len = strlcpy(lPath, LOCKPROXYDIR, maxLen); -#else -#ifdef _CS_DARWIN_USER_TEMP_DIR - { - if (!confstr(_CS_DARWIN_USER_TEMP_DIR, lPath, maxLen)) { - return SQLITE_IOERR_LOCK; - } - len = strlcat(lPath, "sqliteplocks", maxLen); - } -#else - len = strlcpy(lPath, "/tmp/", maxLen); -#endif -#endif + const char *zDir; + int iLimit = 0; - if (lPath[len - 1] != '/') { - len = strlcat(lPath, "/", maxLen); - } + /* It's odd to simulate an io-error here, but really this is just + * using the io-error infrastructure to test that SQLite handles this + * function failing. + */ + zBuf[0] = 0; + SimulateIOError(return SQLITE_IOERR); - /* transform the db path to a unique cache name */ - dbLen = (int)strlen(dbPath); - for (i = 0; i < dbLen && (i + len + 7) < (int)maxLen; i++) { - char c = dbPath[i]; - lPath[i + len] = (c == '/') ? '_' : c; - } - lPath[i + len] = '\0'; - strlcat(lPath, ":auto:", maxLen); + zDir = unixTempFileDir(); + if (zDir == 0) + return SQLITE_IOERR_GETTEMPPATH; + do { + u64 r; + sqlite3_randomness(sizeof(r), &r); + assert(nBuf > 2); + zBuf[nBuf - 2] = 0; + sqlite3_snprintf(nBuf, zBuf, + "%s/" SQLITE_TEMP_FILE_PREFIX "%llx%c", zDir, + r, 0); + if (zBuf[nBuf - 2] != 0 || (iLimit++) > 10) + return SQLITE_ERROR; + } while (access(zBuf, 0) == 0); return SQLITE_OK; } /* - ** Creates the lock file and any missing directories in lockPath + * Search for an unused file descriptor that was opened on the database + * file (not a journal or master-journal file) identified by pathname + * zPath with SQLITE_OPEN_XXX flags matching those passed as the second + * argument to this function. + * + * Such a file descriptor may exist if a database connection was closed + * but the associated file descriptor could not be closed because some + * other file descriptor open on the same file is holding a file-lock. + * Refer to comments in the unixClose() function and the lengthy comment + * describing "Posix Advisory Locking" at the start of this file for + * further details. Also, ticket #4018. + * + * If a suitable file descriptor is found, then it is returned. If no + * such file descriptor is located, -1 is returned. */ -static int -proxyCreateLockPath(const char *lockPath) +static UnixUnusedFd * +findReusableFd(const char *zPath, int flags) { - int i, len; - char buf[MAXPATHLEN]; - int start = 0; - - assert(lockPath != NULL); - /* try to create all the intermediate directories */ - len = (int)strlen(lockPath); - buf[0] = lockPath[0]; - for (i = 1; i < len; i++) { - if (lockPath[i] == '/' && (i - start > 0)) { - /* only mkdir if leaf dir != "." or "/" or ".." */ - if (i - start > 2 - || (i - start == 1 && buf[start] != '.' - && buf[start] != '/') - || (i - start == 2 && buf[start] != '.' - && buf[start + 1] != '.')) { - buf[i] = '\0'; - if (osMkdir - (buf, - SQLITE_DEFAULT_PROXYDIR_PERMISSIONS)) { - int err = errno; - if (err != EEXIST) { - return err; - } - } + UnixUnusedFd *pUnused = 0; + + struct stat sStat; /* Results of stat() call */ + + /* A stat() call may fail for various reasons. If this happens, it is + * almost certain that an open() call on the same path will also fail. + * For this reason, if an error occurs in the stat() call here, it is + * ignored and -1 is returned. The caller will try to open a new file + * descriptor on the same path, fail, and return an error to SQLite. + * + * Even if a subsequent open() call does succeed, the consequences of + * not searching for a reusable file descriptor are not dire. + */ + if (0 == stat(zPath, &sStat)) { + unixInodeInfo *pInode; + + pInode = inodeList; + while (pInode && (pInode->fileId.dev != sStat.st_dev + || pInode->fileId.ino != + (u64) sStat.st_ino)) { + pInode = pInode->pNext; + } + if (pInode) { + UnixUnusedFd **pp; + for (pp = &pInode->pUnused; + *pp && (*pp)->flags != flags; + pp = &((*pp)->pNext)) ; + pUnused = *pp; + if (pUnused) { + *pp = pUnused->pNext; } - start = i + 1; } - buf[i] = lockPath[i]; } - - return 0; + return pUnused; } /* - * Create a new VFS file descriptor (stored in memory obtained from - * sqlite3_malloc) and open the file named "path" in the file descriptor. - * - * The caller is responsible not only for closing the file descriptor - * but also for freeing the memory associated with the file descriptor. + * Find the mode, uid and gid of file zFile. */ static int -proxyCreateUnixFile(const char *path, /* path for the new unixFile */ - unixFile ** ppFile, /* unixFile created and returned by ref */ - int islockfile /* if non zero missing dirs will be created */ +getFileMode(const char *zFile, /* File name */ + mode_t * pMode, /* OUT: Permissions of zFile */ + uid_t * pUid, /* OUT: uid of zFile. */ + gid_t * pGid /* OUT: gid of zFile. */ ) { - int fd = -1; - unixFile *pNew; + struct stat sStat; /* Output of stat() on database file */ int rc = SQLITE_OK; - int openFlags = O_RDWR | O_CREAT; - sqlite3_vfs dummyVfs; - int terrno = 0; - UnixUnusedFd *pUnused = NULL; - - /* 1. first try to open/create the file - * 2. if that fails, and this is a lock file (not-conch), try creating - * the parent directories and then try again. - * 3. if that fails, try to open the file read-only - * otherwise return BUSY (if lock file) or CANTOPEN for the conch file - */ - pUnused = findReusableFd(path, openFlags); - if (pUnused) { - fd = pUnused->fd; + if (0 == stat(zFile, &sStat)) { + *pMode = sStat.st_mode & 0777; + *pUid = sStat.st_uid; + *pGid = sStat.st_gid; } else { - pUnused = sqlite3_malloc64(sizeof(*pUnused)); - if (!pUnused) { - return SQLITE_NOMEM_BKPT; - } - } - if (fd < 0) { - fd = robust_open(path, openFlags, 0); - terrno = errno; - if (fd < 0 && errno == ENOENT && islockfile) { - if (proxyCreateLockPath(path) == SQLITE_OK) { - fd = robust_open(path, openFlags, 0); - } - } - } - if (fd < 0) { - openFlags = O_RDONLY; - fd = robust_open(path, openFlags, 0); - terrno = errno; - } - if (fd < 0) { - if (islockfile) { - return SQLITE_BUSY; - } - switch (terrno) { - case EACCES: - return SQLITE_PERM; - case EIO: - return SQLITE_IOERR_LOCK; /* even though it is the conch */ - default: - return SQLITE_CANTOPEN_BKPT; - } - } - - pNew = (unixFile *) sqlite3_malloc64(sizeof(*pNew)); - if (pNew == NULL) { - rc = SQLITE_NOMEM_BKPT; - goto end_create_proxy; - } - memset(pNew, 0, sizeof(unixFile)); - pNew->openFlags = openFlags; - memset(&dummyVfs, 0, sizeof(dummyVfs)); - dummyVfs.pAppData = (void *)&autolockIoFinder; - dummyVfs.zName = "dummy"; - pUnused->fd = fd; - pUnused->flags = openFlags; - pNew->pUnused = pUnused; - - rc = fillInUnixFile(&dummyVfs, fd, (sqlite3_file *) pNew, path, 0); - if (rc == SQLITE_OK) { - *ppFile = pNew; - return SQLITE_OK; + rc = SQLITE_IOERR_FSTAT; } - end_create_proxy: - robust_close(pNew, fd, __LINE__); - sqlite3_free(pNew); - sqlite3_free(pUnused); return rc; } -#ifdef SQLITE_TEST -/* simulate multiple hosts by creating unique hostid file paths */ -int sqlite3_hostid_num = 0; -#endif - -#define PROXY_HOSTIDLEN 16 /* conch file host id length */ - -#ifdef HAVE_GETHOSTUUID -/* Not always defined in the headers as it ought to be */ -extern int gethostuuid(uuid_t id, const struct timespec *wait); -#endif - -/* get the host ID via gethostuuid(), pHostID must point to PROXY_HOSTIDLEN - * bytes of writable memory. +/* + * This function is called by unixOpen() to determine the unix permissions + * to create new files with. If no error occurs, then SQLITE_OK is returned + * and a value suitable for passing as the third argument to open(2) is + * written to *pMode. If an IO error occurs, an SQLite error code is + * returned and the value of *pMode is not modified. + * + * In most cases, this routine sets *pMode to 0, which will become + * an indication to robust_open() to create the file using + * SQLITE_DEFAULT_FILE_PERMISSIONS adjusted by the umask. + * But if the file being opened is a regular journal file, then + * this function queries the file-system for the permissions on the + * corresponding database file and sets *pMode to this value. Whenever + * possible, journal files are created using the same permissions + * as the associated database file. + * + * If the SQLITE_ENABLE_8_3_NAMES option is enabled, then the + * original filename is unavailable. But 8_3_NAMES is only used for + * FAT filesystems and permissions do not matter there, so just use + * the default permissions. */ static int -proxyGetHostID(unsigned char *pHostID, int *pError) +findCreateFileMode(const char *zPath, /* Path of file (possibly) being created */ + int flags, /* Flags passed as 4th argument to xOpen() */ + mode_t * pMode, /* OUT: Permissions to open file with */ + uid_t * pUid, /* OUT: uid to set on the file */ + gid_t * pGid /* OUT: gid to set on the file */ + ) { - assert(PROXY_HOSTIDLEN == sizeof(uuid_t)); - memset(pHostID, 0, PROXY_HOSTIDLEN); -#ifdef HAVE_GETHOSTUUID - { - struct timespec timeout = { 1, 0 }; /* 1 sec timeout */ - if (gethostuuid(pHostID, &timeout)) { - int err = errno; - if (pError) { - *pError = err; - } - return SQLITE_IOERR; + int rc = SQLITE_OK; /* Return Code */ + *pMode = 0; + *pUid = 0; + *pGid = 0; + if (flags & SQLITE_OPEN_DELETEONCLOSE) { + *pMode = 0600; + } else if (flags & SQLITE_OPEN_URI) { + /* If this is a main database file and the file was opened using a URI + * filename, check for the "modeof" parameter. If present, interpret + * its value as a filename and try to copy the mode, uid and gid from + * that file. + */ + const char *z = sqlite3_uri_parameter(zPath, "modeof"); + if (z) { + rc = getFileMode(z, pMode, pUid, pGid); } } -#else - UNUSED_PARAMETER(pError); -#endif -#ifdef SQLITE_TEST - /* simulate multiple hosts by creating unique hostid file paths */ - if (sqlite3_hostid_num != 0) { - pHostID[0] = - (char)(pHostID[0] + (char)(sqlite3_hostid_num & 0xFF)); - } -#endif - - return SQLITE_OK; + return rc; } -/* The conch file contains the header, host id and lock file path - */ -#define PROXY_CONCHVERSION 2 /* 1-byte header, 16-byte host id, path */ -#define PROXY_HEADERLEN 1 /* conch file header length */ -#define PROXY_PATHINDEX (PROXY_HEADERLEN+PROXY_HOSTIDLEN) -#define PROXY_MAXCONCHLEN (PROXY_HEADERLEN+PROXY_HOSTIDLEN+MAXPATHLEN) - /* - * Takes an open conch file, copies the contents to a new path and then moves - * it back. The newly created file's file descriptor is assigned to the - * conch file structure and finally the original conch file descriptor is - * closed. Returns zero if successful. + * Open the file zPath. + * + * Previously, the SQLite OS layer used three functions in place of this + * one: + * + * sqlite3OsOpenReadWrite(); + * sqlite3OsOpenReadOnly(); + * sqlite3OsOpenExclusive(); + * + * These calls correspond to the following combinations of flags: + * + * ReadWrite() -> (READWRITE | CREATE) + * ReadOnly() -> (READONLY) + * OpenExclusive() -> (READWRITE | CREATE | EXCLUSIVE) + * + * The old OpenExclusive() accepted a boolean argument - "delFlag". If + * true, the file was configured to be automatically deleted when the + * file handle closed. To achieve the same effect using this new + * interface, add the DELETEONCLOSE flag to those specified above for + * OpenExclusive(). */ static int -proxyBreakConchLock(unixFile * pFile, uuid_t myHostID) +unixOpen(sqlite3_vfs * pVfs, /* The VFS for which this is the xOpen method */ + const char *zPath, /* Pathname of file to be opened */ + sqlite3_file * pFile, /* The file descriptor to be filled in */ + int flags, /* Input flags to control the opening */ + int *pOutFlags /* Output flags returned to SQLite core */ + ) { - proxyLockingContext *pCtx = - (proxyLockingContext *) pFile->lockingContext; - unixFile *conchFile = pCtx->conchFile; - char tPath[MAXPATHLEN]; - char buf[PROXY_MAXCONCHLEN]; - char *cPath = pCtx->conchFilePath; - size_t readLen = 0; - size_t pathLen = 0; - char errmsg[64] = ""; - int fd = -1; - int rc = -1; - UNUSED_PARAMETER(myHostID); - - /* create a new path by replace the trailing '-conch' with '-break' */ - pathLen = strlcpy(tPath, cPath, MAXPATHLEN); - if (pathLen > MAXPATHLEN || pathLen < 6 || - (strlcpy(&tPath[pathLen - 5], "break", 6) != 5)) { - sqlite3_snprintf(sizeof(errmsg), errmsg, "path error (len %d)", - (int)pathLen); - goto end_breaklock; - } - /* read the conch content */ - readLen = osPread(conchFile->h, buf, PROXY_MAXCONCHLEN, 0); - if (readLen < PROXY_PATHINDEX) { - sqlite3_snprintf(sizeof(errmsg), errmsg, "read error (len %d)", - (int)readLen); - goto end_breaklock; - } - /* write it out to the temporary break file */ - fd = robust_open(tPath, (O_RDWR | O_CREAT | O_EXCL), 0); - if (fd < 0) { - sqlite3_snprintf(sizeof(errmsg), errmsg, "create failed (%d)", - errno); - goto end_breaklock; - } - if (osPwrite(fd, buf, readLen, 0) != (ssize_t) readLen) { - sqlite3_snprintf(sizeof(errmsg), errmsg, "write failed (%d)", - errno); - goto end_breaklock; - } - if (rename(tPath, cPath)) { - sqlite3_snprintf(sizeof(errmsg), errmsg, "rename failed (%d)", - errno); - goto end_breaklock; - } - rc = 0; - fprintf(stderr, "broke stale lock on %s\n", cPath); - robust_close(pFile, conchFile->h, __LINE__); - conchFile->h = fd; - conchFile->openFlags = O_RDWR | O_CREAT; + unixFile *p = (unixFile *) pFile; + int fd = -1; /* File descriptor returned by open() */ + int openFlags = 0; /* Flags to pass to open() */ + int eType = flags & 0xFFFFFF00; /* Type of file to open */ + int noLock; /* True to omit locking primitives */ + int rc; /* Function Return Code */ + int ctrlFlags = 0; /* UNIXFILE_* flags */ - end_breaklock: - if (rc) { - if (fd >= 0) { - osUnlink(tPath); - robust_close(pFile, fd, __LINE__); - } - fprintf(stderr, "failed to break stale lock on %s, %s\n", cPath, - errmsg); - } - return rc; -} + int isExclusive = (flags & SQLITE_OPEN_EXCLUSIVE); + int isDelete = (flags & SQLITE_OPEN_DELETEONCLOSE); + int isCreate = (flags & SQLITE_OPEN_CREATE); + int isReadonly = (flags & SQLITE_OPEN_READONLY); + int isReadWrite = (flags & SQLITE_OPEN_READWRITE); -/* Take the requested lock on the conch file and break a stale lock if the - * host id matches. - */ -static int -proxyConchLock(unixFile * pFile, uuid_t myHostID, int lockType) -{ - proxyLockingContext *pCtx = - (proxyLockingContext *) pFile->lockingContext; - unixFile *conchFile = pCtx->conchFile; - int rc = SQLITE_OK; - int nTries = 0; - struct timespec conchModTime; + /* If creating a master or main-file journal, this function will open + * a file-descriptor on the directory too. The first time unixSync() + * is called the directory file descriptor will be fsync()ed and close()d. + */ + int syncDir = isCreate; - memset(&conchModTime, 0, sizeof(conchModTime)); - do { - rc = conchFile->pMethod->xLock((sqlite3_file *) conchFile, - lockType); - nTries++; - if (rc == SQLITE_BUSY) { - /* If the lock failed (busy): - * 1st try: get the mod time of the conch, wait 0.5s and try again. - * 2nd try: fail if the mod time changed or host id is different, wait - * 10 sec and try again - * 3rd try: break the lock unless the mod time has changed. - */ - struct stat buf; - if (osFstat(conchFile->h, &buf)) { - storeLastErrno(pFile, errno); - return SQLITE_IOERR_LOCK; - } + /* If argument zPath is a NULL pointer, this function is required to open + * a temporary file. Use this buffer to store the file name in. + */ + char zTmpname[MAX_PATHNAME + 2]; + const char *zName = zPath; + + /* Check the following statements are true: + * + * (a) Exactly one of the READWRITE and READONLY flags must be set, and + * (b) if CREATE is set, then READWRITE must also be set, and + * (c) if EXCLUSIVE is set, then CREATE must also be set. + * (d) if DELETEONCLOSE is set, then CREATE must also be set. + */ + assert((isReadonly == 0 || isReadWrite == 0) + && (isReadWrite || isReadonly)); + assert(isCreate == 0 || isReadWrite); + assert(isExclusive == 0 || isCreate); + assert(isDelete == 0 || isCreate); + + /* Detect a pid change and reset the PRNG. There is a race condition + * here such that two or more threads all trying to open databases at + * the same instant might all reset the PRNG. But multiple resets + * are harmless. + */ + if (randomnessPid != getpid()) { + randomnessPid = getpid(); + sqlite3_randomness(0, 0); + } - if (nTries == 1) { - conchModTime = buf.st_mtimespec; - usleep(500000); /* wait 0.5 sec and try the lock again */ - continue; - } + memset(p, 0, sizeof(unixFile)); - assert(nTries > 1); - if (conchModTime.tv_sec != buf.st_mtimespec.tv_sec || - conchModTime.tv_nsec != buf.st_mtimespec.tv_nsec) { - return SQLITE_BUSY; + if (eType == SQLITE_OPEN_MAIN_DB) { + UnixUnusedFd *pUnused; + pUnused = findReusableFd(zName, flags); + if (pUnused) { + fd = pUnused->fd; + } else { + pUnused = sqlite3_malloc64(sizeof(*pUnused)); + if (!pUnused) { + return SQLITE_NOMEM_BKPT; } + } + p->pUnused = pUnused; - if (nTries == 2) { - char tBuf[PROXY_MAXCONCHLEN]; - int len = - osPread(conchFile->h, tBuf, - PROXY_MAXCONCHLEN, 0); - if (len < 0) { - storeLastErrno(pFile, errno); - return SQLITE_IOERR_LOCK; - } - if (len > PROXY_PATHINDEX - && tBuf[0] == (char)PROXY_CONCHVERSION) { - /* don't break the lock if the host id doesn't match */ - if (0 != - memcmp(&tBuf[PROXY_HEADERLEN], - myHostID, PROXY_HOSTIDLEN)) { - return SQLITE_BUSY; - } - } else { - /* don't break the lock on short read or a version mismatch */ - return SQLITE_BUSY; - } - usleep(10000000); /* wait 10 sec and try the lock again */ - continue; - } + /* Database filenames are double-zero terminated if they are not + * URIs with parameters. Hence, they can always be passed into + * sqlite3_uri_parameter(). + */ + assert((flags & SQLITE_OPEN_URI) + || zName[strlen(zName) + 1] == 0); - assert(nTries == 3); - if (0 == proxyBreakConchLock(pFile, myHostID)) { - rc = SQLITE_OK; - if (lockType == EXCLUSIVE_LOCK) { - rc = conchFile->pMethod-> - xLock((sqlite3_file *) conchFile, - SHARED_LOCK); - } - if (!rc) { - rc = conchFile->pMethod-> - xLock((sqlite3_file *) conchFile, - lockType); - } - } + } else if (!zName) { + /* If zName is NULL, the upper layer is requesting a temp file. */ + assert(isDelete); + rc = unixGetTempname(pVfs->mxPathname, zTmpname); + if (rc != SQLITE_OK) { + return rc; } - } while (rc == SQLITE_BUSY && nTries < 3); + zName = zTmpname; - return rc; -} + /* Generated temporary filenames are always double-zero terminated + * for use by sqlite3_uri_parameter(). + */ + assert(zName[strlen(zName) + 1] == 0); + } -/* Takes the conch by taking a shared lock and read the contents conch, if - * lockPath is non-NULL, the host ID and lock file path must match. A NULL - * lockPath means that the lockPath in the conch file will be used if the - * host IDs match, or a new lock path will be generated automatically - * and written to the conch file. - */ -static int -proxyTakeConch(unixFile * pFile) -{ - proxyLockingContext *pCtx = - (proxyLockingContext *) pFile->lockingContext; + /* Determine the value of the flags parameter passed to POSIX function + * open(). These must be calculated even if open() is not called, as + * they may be stored as part of the file handle and used by the + * 'conch file' locking functions later on. + */ + if (isReadonly) + openFlags |= O_RDONLY; + if (isReadWrite) + openFlags |= O_RDWR; + if (isCreate) + openFlags |= O_CREAT; + if (isExclusive) + openFlags |= (O_EXCL | O_NOFOLLOW); + openFlags |= O_BINARY; + #ifndef __APPLE__ + openFlags |= O_LARGEFILE; + #endif - if (pCtx->conchHeld != 0) { - return SQLITE_OK; - } else { - unixFile *conchFile = pCtx->conchFile; - uuid_t myHostID; - int pError = 0; - char readBuf[PROXY_MAXCONCHLEN]; - char lockPath[MAXPATHLEN]; - char *tempLockPath = NULL; - int rc = SQLITE_OK; - int createConch = 0; - int hostIdMatch = 0; - int readLen = 0; - int tryOldLockPath = 0; - int forceNewLockPath = 0; - - rc = proxyGetHostID(myHostID, &pError); - if ((rc & 0xff) == SQLITE_IOERR) { - storeLastErrno(pFile, pError); - goto end_takeconch; - } - rc = proxyConchLock(pFile, myHostID, SHARED_LOCK); + if (fd < 0) { + mode_t openMode; /* Permissions to create file with */ + uid_t uid; /* Userid for the file */ + gid_t gid; /* Groupid for the file */ + rc = findCreateFileMode(zName, flags, &openMode, &uid, &gid); if (rc != SQLITE_OK) { - goto end_takeconch; + assert(!p->pUnused); + return rc; } - /* read the existing conch file */ - readLen = - seekAndRead((unixFile *) conchFile, 0, readBuf, - PROXY_MAXCONCHLEN); - if (readLen < 0) { - /* I/O error: lastErrno set by seekAndRead */ - storeLastErrno(pFile, conchFile->lastErrno); - rc = SQLITE_IOERR_READ; - goto end_takeconch; - } else if (readLen <= (PROXY_HEADERLEN + PROXY_HOSTIDLEN) || - readBuf[0] != (char)PROXY_CONCHVERSION) { - /* a short read or version format mismatch means we need to create a new - * conch file. - */ - createConch = 1; + fd = robust_open(zName, openFlags, openMode); + assert(!isExclusive || (openFlags & O_CREAT) != 0); + if (fd < 0 && errno != EISDIR && isReadWrite) { + /* Failed to open the file for read/write access. Try read-only. */ + flags &= ~(SQLITE_OPEN_READWRITE | SQLITE_OPEN_CREATE); + openFlags &= ~(O_RDWR | O_CREAT); + flags |= SQLITE_OPEN_READONLY; + openFlags |= O_RDONLY; + isReadonly = 1; + fd = robust_open(zName, openFlags, openMode); } - /* if the host id matches and the lock path already exists in the conch - * we'll try to use the path there, if we can't open that path, we'll - * retry with a new auto-generated path - */ - do { /* in case we need to try again for an :auto: named lock file */ - - if (!createConch && !forceNewLockPath) { - hostIdMatch = - !memcmp(&readBuf[PROXY_HEADERLEN], myHostID, - PROXY_HOSTIDLEN); - /* if the conch has data compare the contents */ - if (!pCtx->lockProxyPath) { - /* for auto-named local lock file, just check the host ID and we'll - * use the local lock file path that's already in there - */ - if (hostIdMatch) { - size_t pathLen = - (readLen - PROXY_PATHINDEX); - - if (pathLen >= MAXPATHLEN) { - pathLen = - MAXPATHLEN - 1; - } - memcpy(lockPath, - &readBuf - [PROXY_PATHINDEX], - pathLen); - lockPath[pathLen] = 0; - tempLockPath = lockPath; - tryOldLockPath = 1; - /* create a copy of the lock path if the conch is taken */ - goto end_takeconch; - } - } else if (hostIdMatch - && !strncmp(pCtx->lockProxyPath, - &readBuf - [PROXY_PATHINDEX], - readLen - - PROXY_PATHINDEX) - ) { - /* conch host and lock path match */ - goto end_takeconch; - } - } + if (fd < 0) { + rc = unixLogError(SQLITE_CANTOPEN_BKPT, "open", zName); + goto open_finished; + } + + } + assert(fd >= 0); + if (pOutFlags) { + *pOutFlags = flags; + } - /* if the conch isn't writable and doesn't match, we can't take it */ - if ((conchFile->openFlags & O_RDWR) == 0) { - rc = SQLITE_BUSY; - goto end_takeconch; - } + if (p->pUnused) { + p->pUnused->fd = fd; + p->pUnused->flags = flags; + } - /* either the conch didn't match or we need to create a new one */ - if (!pCtx->lockProxyPath) { - proxyGetLockPath(pCtx->dbPath, lockPath, - MAXPATHLEN); - tempLockPath = lockPath; - /* create a copy of the lock path _only_ if the conch is taken */ - } + if (isDelete) + unlink(zName); - /* update conch with host and path (this will fail if other process - * has a shared lock already), if the host id matches, use the big - * stick. - */ - futimes(conchFile->h, NULL); - if (hostIdMatch && !createConch) { - if (conchFile->pInode - && conchFile->pInode->nShared > 1) { - /* We are trying for an exclusive lock but another thread in this - * same process is still holding a shared lock. - */ - rc = SQLITE_BUSY; - } else { - rc = proxyConchLock(pFile, myHostID, - EXCLUSIVE_LOCK); - } - } else { - rc = proxyConchLock(pFile, myHostID, - EXCLUSIVE_LOCK); - } - if (rc == SQLITE_OK) { - char writeBuffer[PROXY_MAXCONCHLEN]; - int writeSize = 0; - - writeBuffer[0] = (char)PROXY_CONCHVERSION; - memcpy(&writeBuffer[PROXY_HEADERLEN], myHostID, - PROXY_HOSTIDLEN); - if (pCtx->lockProxyPath != NULL) { - strlcpy(&writeBuffer[PROXY_PATHINDEX], - pCtx->lockProxyPath, - MAXPATHLEN); - } else { - strlcpy(&writeBuffer[PROXY_PATHINDEX], - tempLockPath, MAXPATHLEN); - } - writeSize = - PROXY_PATHINDEX + - strlen(&writeBuffer[PROXY_PATHINDEX]); - robust_ftruncate(conchFile->h, writeSize); - rc = unixWrite((sqlite3_file *) conchFile, - writeBuffer, writeSize, 0); - full_fsync(conchFile->h, 0, 0); - /* If we created a new conch file (not just updated the contents of a - * valid conch file), try to match the permissions of the database - */ - if (rc == SQLITE_OK && createConch) { - struct stat buf; - int err = osFstat(pFile->h, &buf); - if (err == 0) { - mode_t cmode = - buf. - st_mode & (S_IRUSR | S_IWUSR - | S_IRGRP | - S_IWGRP | S_IROTH - | S_IWOTH); - /* try to match the database file R/W permissions, ignore failure */ -#ifndef SQLITE_PROXY_DEBUG - osFchmod(conchFile->h, cmode); -#else - do { - rc = osFchmod - (conchFile->h, - cmode); - } while (rc == (-1) - && errno == EINTR); - if (rc != 0) { - int code = errno; - fprintf(stderr, - "fchmod %o FAILED with %d %s\n", - cmode, code, - strerror(code)); - } else { - fprintf(stderr, - "fchmod %o SUCCEDED\n", - cmode); - } - } else { - int code = errno; - fprintf(stderr, - "STAT FAILED[%d] with %d %s\n", - err, code, - strerror(code)); -#endif - } - } - } - conchFile->pMethod->xUnlock((sqlite3_file *) conchFile, - SHARED_LOCK); - - end_takeconch: - if (rc == SQLITE_OK && pFile->openFlags) { - int fd; - if (pFile->h >= 0) { - robust_close(pFile, pFile->h, __LINE__); - } - pFile->h = -1; - fd = robust_open(pCtx->dbPath, pFile->openFlags, - 0); - if (fd >= 0) { - pFile->h = fd; - } else { - rc = SQLITE_CANTOPEN_BKPT; /* SQLITE_BUSY? proxyTakeConch called - during locking */ - } - } - if (rc == SQLITE_OK && !pCtx->lockProxy) { - char *path = - tempLockPath ? tempLockPath : pCtx-> - lockProxyPath; - rc = proxyCreateUnixFile(path, &pCtx->lockProxy, - 1); - if (rc != SQLITE_OK && rc != SQLITE_NOMEM - && tryOldLockPath) { - /* we couldn't create the proxy lock file with the old lock file path - * so try again via auto-naming - */ - forceNewLockPath = 1; - tryOldLockPath = 0; - continue; /* go back to the do {} while start point, try again */ - } - } - if (rc == SQLITE_OK) { - /* Need to make a copy of path if we extracted the value - * from the conch file or the path was allocated on the stack - */ - if (tempLockPath) { - pCtx->lockProxyPath = - sqlite3DbStrDup(0, tempLockPath); - if (!pCtx->lockProxyPath) { - rc = SQLITE_NOMEM_BKPT; - } - } - } - if (rc == SQLITE_OK) { - pCtx->conchHeld = 1; - - if (pCtx->lockProxy->pMethod == &afpIoMethods) { - afpLockingContext *afpCtx; - afpCtx = - (afpLockingContext *) pCtx-> - lockProxy->lockingContext; - afpCtx->dbPath = pCtx->lockProxyPath; - } - } else { - conchFile->pMethod-> - xUnlock((sqlite3_file *) conchFile, - NO_LOCK); - } - return rc; - } while (1); /* in case we need to retry the :auto: lock file - - * we should never get here except via the 'continue' call. - */ + /* Set up appropriate ctrlFlags */ + if (isDelete) + ctrlFlags |= UNIXFILE_DELETE; + if (isReadonly) + ctrlFlags |= UNIXFILE_RDONLY; + noLock = eType != SQLITE_OPEN_MAIN_DB; + if (noLock) + ctrlFlags |= UNIXFILE_NOLOCK; + if (syncDir) + ctrlFlags |= UNIXFILE_DIRSYNC; + if (flags & SQLITE_OPEN_URI) + ctrlFlags |= UNIXFILE_URI; + + rc = fillInUnixFile(pVfs, fd, pFile, zPath, ctrlFlags); + + open_finished: + if (rc != SQLITE_OK) { + sqlite3_free(p->pUnused); } + return rc; } /* - * If pFile holds a lock on a conch file, then release that lock. + * Delete the file at zPath. If the dirSync argument is true, fsync() + * the directory after deleting the file. */ static int -proxyReleaseConch(unixFile * pFile) +unixDelete(sqlite3_vfs * NotUsed, /* VFS containing this as the xDelete method */ + const char *zPath, /* Name of file to be deleted */ + int dirSync /* If true, fsync() directory after deleting file */ + ) { - int rc = SQLITE_OK; /* Subroutine return code */ - proxyLockingContext *pCtx; /* The locking context for the proxy lock */ - unixFile *conchFile; /* Name of the conch file */ - - pCtx = (proxyLockingContext *) pFile->lockingContext; - conchFile = pCtx->conchFile; - if (pCtx->conchHeld > 0) { - rc = conchFile->pMethod->xUnlock((sqlite3_file *) conchFile, - NO_LOCK); + int rc = SQLITE_OK; + UNUSED_PARAMETER(NotUsed); + SimulateIOError(return SQLITE_IOERR_DELETE); + if (unlink(zPath) == (-1)) { + if (errno == ENOENT) { + rc = SQLITE_IOERR_DELETE_NOENT; + } else { + rc = unixLogError(SQLITE_IOERR_DELETE, "unlink", zPath); + } + return rc; + } +#ifndef SQLITE_DISABLE_DIRSYNC + if ((dirSync & 1) != 0) { + int fd; + rc = openDirectory(zPath, &fd); + if (rc == SQLITE_OK) { + if (full_fsync(fd, 0, 0)) { + rc = unixLogError(SQLITE_IOERR_DIR_FSYNC, + "fsync", zPath); + } + robust_close(0, fd, __LINE__); + } else { + assert(rc == SQLITE_CANTOPEN); + rc = SQLITE_OK; + } } - pCtx->conchHeld = 0; +#endif return rc; } /* - * Given the name of a database file, compute the name of its conch file. - * Store the conch filename in memory obtained from sqlite3_malloc64(). - * Make *pConchPath point to the new name. Return SQLITE_OK on success - * or SQLITE_NOMEM if unable to obtain memory. + * Test the existence of or access permissions of file zPath. The + * test performed depends on the value of flags: * - * The caller is responsible for ensuring that the allocated memory - * space is eventually freed. + * SQLITE_ACCESS_EXISTS: Return 1 if the file exists + * SQLITE_ACCESS_READWRITE: Return 1 if the file is read and writable. + * SQLITE_ACCESS_READONLY: Return 1 if the file is readable. * - * *pConchPath is set to NULL if a memory allocation error occurs. + * Otherwise return 0. */ static int -proxyCreateConchPathname(char *dbPath, char **pConchPath) +unixAccess(sqlite3_vfs * NotUsed, /* The VFS containing this xAccess method */ + const char *zPath, /* Path of the file to examine */ + int flags, /* What do we want to learn about the zPath file? */ + int *pResOut /* Write result boolean here */ + ) { - int i; /* Loop counter */ - int len = (int)strlen(dbPath); /* Length of database filename - dbPath */ - char *conchPath; /* buffer in which to construct conch name */ + UNUSED_PARAMETER(NotUsed); + SimulateIOError(return SQLITE_IOERR_ACCESS; + ); + assert(pResOut != 0); - /* Allocate space for the conch filename and initialize the name to - * the name of the original database file. + /* The spec says there are three possible values for flags. But only + * two of them are actually used */ - *pConchPath = conchPath = (char *)sqlite3_malloc64(len + 8); - if (conchPath == 0) { - return SQLITE_NOMEM_BKPT; - } - memcpy(conchPath, dbPath, len + 1); + assert(flags == SQLITE_ACCESS_EXISTS + || flags == SQLITE_ACCESS_READWRITE); - /* now insert a "." before the last / character */ - for (i = (len - 1); i >= 0; i--) { - if (conchPath[i] == '/') { - i++; - break; - } - } - conchPath[i] = '.'; - while (i < len) { - conchPath[i + 1] = dbPath[i]; - i++; + if (flags == SQLITE_ACCESS_EXISTS) { + struct stat buf; + *pResOut = (0 == stat(zPath, &buf) && buf.st_size > 0); + } else { + *pResOut = access(zPath, W_OK | R_OK) == 0; } - - /* append the "-conch" suffix to the file */ - memcpy(&conchPath[i + 1], "-conch", 7); - assert((int)strlen(conchPath) == len + 7); - return SQLITE_OK; } -/* Takes a fully configured proxy locking-style unix file and switches - * the local lock file path - */ static int -switchLockProxyPath(unixFile * pFile, const char *path) +mkFullPathname(const char *zPath, /* Input path */ + char *zOut, /* Output buffer */ + int nOut /* Allocated size of buffer zOut */ + ) { - proxyLockingContext *pCtx = - (proxyLockingContext *) pFile->lockingContext; - char *oldPath = pCtx->lockProxyPath; - int rc = SQLITE_OK; - - if (pFile->eFileLock != NO_LOCK) { - return SQLITE_BUSY; - } - - /* nothing to do if the path is NULL, :auto: or matches the existing path */ - if (!path || path[0] == '\0' || !strcmp(path, ":auto:") || - (oldPath && !strncmp(oldPath, path, MAXPATHLEN))) { - return SQLITE_OK; - } else { - unixFile *lockProxy = pCtx->lockProxy; - pCtx->lockProxy = NULL; - pCtx->conchHeld = 0; - if (lockProxy != NULL) { - rc = lockProxy->pMethod-> - xClose((sqlite3_file *) lockProxy); - if (rc) - return rc; - sqlite3_free(lockProxy); + int nPath = sqlite3Strlen30(zPath); + int iOff = 0; + if (zPath[0] != '/') { + if (getcwd(zOut, nOut - 2) == 0) { + return unixLogError(SQLITE_CANTOPEN_BKPT, "getcwd", + zPath); } - sqlite3_free(oldPath); - pCtx->lockProxyPath = sqlite3DbStrDup(0, path); + iOff = sqlite3Strlen30(zOut); + zOut[iOff++] = '/'; } - - return rc; + if ((iOff + nPath + 1) > nOut) { + /* SQLite assumes that xFullPathname() nul-terminates the output buffer + * even if it returns an error. + */ + zOut[iOff] = '\0'; + return SQLITE_CANTOPEN_BKPT; + } + sqlite3_snprintf(nOut - iOff, &zOut[iOff], "%s", zPath); + return SQLITE_OK; } /* - * pFile is a file that has been opened by a prior xOpen call. dbPath - * is a string buffer at least MAXPATHLEN+1 characters in size. + * Turn a relative pathname into a full pathname. The relative path + * is stored as a nul-terminated string in the buffer pointed to by + * zPath. * - * This routine find the filename associated with pFile and writes it - * int dbPath. + * zOut points to a buffer of at least sqlite3_vfs.mxPathname bytes + * (in this case, MAX_PATHNAME bytes). The full-path is written to + * this buffer before returning. */ static int -proxyGetDbPathForUnixFile(unixFile * pFile, char *dbPath) +unixFullPathname(sqlite3_vfs * pVfs, /* Pointer to vfs object */ + const char *zPath, /* Possibly relative input path */ + int nOut, /* Size of output buffer in bytes */ + char *zOut /* Output buffer */ + ) { -#if defined(__APPLE__) - if (pFile->pMethod == &afpIoMethods) { - /* afp style keeps a reference to the db path in the filePath field - * of the struct - */ - assert((int)strlen((char *)pFile->lockingContext) <= - MAXPATHLEN); - strlcpy(dbPath, - ((afpLockingContext *) pFile->lockingContext)->dbPath, - MAXPATHLEN); - } else -#endif - if (pFile->pMethod == &dotlockIoMethods) { - /* dot lock style uses the locking context to store the dot lock - * file path - */ - int len = - strlen((char *)pFile->lockingContext) - - strlen(DOTLOCK_SUFFIX); - memcpy(dbPath, (char *)pFile->lockingContext, len + 1); - } else { - /* all other styles use the locking context to store the db file path */ - assert(strlen((char *)pFile->lockingContext) <= MAXPATHLEN); - strlcpy(dbPath, (char *)pFile->lockingContext, MAXPATHLEN); - } - return SQLITE_OK; + (void)pVfs; + return mkFullPathname(zPath, zOut, nOut); } /* - * Takes an already filled in unix file and alters it so all file locking - * will be performed on the local proxy lock file. The following fields - * are preserved in the locking context so that they can be restored and - * the unix structure properly cleaned up at close time: - * ->lockingContext - * ->pMethod + * Interfaces for opening a shared library, finding entry points + * within the shared library, and closing the shared library. */ -static int -proxyTransformUnixFile(unixFile * pFile, const char *path) +#include +static void * +unixDlOpen(sqlite3_vfs * NotUsed, const char *zFilename) { - proxyLockingContext *pCtx; - char dbPath[MAXPATHLEN + 1]; /* Name of the database file */ - char *lockPath = NULL; - int rc = SQLITE_OK; - - if (pFile->eFileLock != NO_LOCK) { - return SQLITE_BUSY; - } - proxyGetDbPathForUnixFile(pFile, dbPath); - if (!path || path[0] == '\0' || !strcmp(path, ":auto:")) { - lockPath = NULL; - } else { - lockPath = (char *)path; - } - - pCtx = sqlite3_malloc64(sizeof(*pCtx)); - if (pCtx == 0) { - return SQLITE_NOMEM_BKPT; - } - memset(pCtx, 0, sizeof(*pCtx)); - - rc = proxyCreateConchPathname(dbPath, &pCtx->conchFilePath); - if (rc == SQLITE_OK) { - rc = proxyCreateUnixFile(pCtx->conchFilePath, &pCtx->conchFile, - 0); - if (rc == SQLITE_CANTOPEN && ((pFile->openFlags & O_RDWR) == 0)) { - /* if (a) the open flags are not O_RDWR, (b) the conch isn't there, and - * (c) the file system is read-only, then enable no-locking access. - * Ugh, since O_RDONLY==0x0000 we test for !O_RDWR since unixOpen asserts - * that openFlags will have only one of O_RDONLY or O_RDWR. - */ - struct statfs fsInfo; - struct stat conchInfo; - int goLockless = 0; - - if (osStat(pCtx->conchFilePath, &conchInfo) == -1) { - int err = errno; - if ((err == ENOENT) - && (statfs(dbPath, &fsInfo) != -1)) { - goLockless = - (fsInfo.f_flags & MNT_RDONLY) == - MNT_RDONLY; - } - } - if (goLockless) { - pCtx->conchHeld = -1; /* read only FS/ lockless */ - rc = SQLITE_OK; - } - } - } - if (rc == SQLITE_OK && lockPath) { - pCtx->lockProxyPath = sqlite3DbStrDup(0, lockPath); - } - - if (rc == SQLITE_OK) { - pCtx->dbPath = sqlite3DbStrDup(0, dbPath); - if (pCtx->dbPath == NULL) { - rc = SQLITE_NOMEM_BKPT; - } - } - if (rc == SQLITE_OK) { - /* all memory is allocated, proxys are created and assigned, - * switch the locking context and pMethod then return. - */ - pCtx->oldLockingContext = pFile->lockingContext; - pFile->lockingContext = pCtx; - pCtx->pOldMethod = pFile->pMethod; - pFile->pMethod = &proxyIoMethods; - } else { - if (pCtx->conchFile) { - pCtx->conchFile->pMethod->xClose((sqlite3_file *) pCtx-> - conchFile); - sqlite3_free(pCtx->conchFile); - } - sqlite3DbFree(0, pCtx->lockProxyPath); - sqlite3_free(pCtx->conchFilePath); - sqlite3_free(pCtx); - } - return rc; + UNUSED_PARAMETER(NotUsed); + return dlopen(zFilename, RTLD_NOW | RTLD_GLOBAL); } /* - * This routine handles sqlite3_file_control() calls that are specific - * to proxy locking. + * SQLite calls this function immediately after a call to unixDlSym() or + * unixDlOpen() fails (returns a null pointer). If a more detailed error + * message is available, it is written to zBufOut. If no error message + * is available, zBufOut is left unmodified and SQLite uses a default + * error message. */ -static int -proxyFileControl(sqlite3_file * id, int op, void *pArg) +static void +unixDlError(sqlite3_vfs * NotUsed, int nBuf, char *zBufOut) { - switch (op) { - case SQLITE_FCNTL_GET_LOCKPROXYFILE:{ - unixFile *pFile = (unixFile *) id; - if (pFile->pMethod == &proxyIoMethods) { - proxyLockingContext *pCtx = - (proxyLockingContext *) pFile-> - lockingContext; - proxyTakeConch(pFile); - if (pCtx->lockProxyPath) { - *(const char **)pArg = - pCtx->lockProxyPath; - } else { - *(const char **)pArg = - ":auto: (not held)"; - } - } else { - *(const char **)pArg = NULL; - } - return SQLITE_OK; - } - case SQLITE_FCNTL_SET_LOCKPROXYFILE:{ - unixFile *pFile = (unixFile *) id; - int rc = SQLITE_OK; - int isProxyStyle = (pFile->pMethod == &proxyIoMethods); - if (pArg == NULL || (const char *)pArg == 0) { - if (isProxyStyle) { - rc = SQLITE_ERROR; - } else { - /* turn off proxy locking - already off - NOOP */ - rc = SQLITE_OK; - } - } else { - const char *proxyPath = (const char *)pArg; - if (isProxyStyle) { - proxyLockingContext *pCtx = - (proxyLockingContext *) pFile-> - lockingContext; - if (!strcmp(pArg, ":auto:") - || (pCtx->lockProxyPath && - !strncmp(pCtx->lockProxyPath, - proxyPath, MAXPATHLEN)) - ) { - rc = SQLITE_OK; - } else { - rc = switchLockProxyPath(pFile, - proxyPath); - } - } else { - /* turn on proxy file locking */ - rc = proxyTransformUnixFile(pFile, - proxyPath); - } - } - return rc; - } - default:{ - assert(0); /* The call assures that only valid opcodes are sent */ - } + const char *zErr; + UNUSED_PARAMETER(NotUsed); + zErr = dlerror(); + if (zErr) { + sqlite3_snprintf(nBuf, zBufOut, "%s", zErr); } - /*NOTREACHED*/ return SQLITE_ERROR; } -/* - * Within this division (the proxying locking implementation) the procedures - * above this point are all utilities. The lock-related methods of the - * proxy-locking sqlite3_io_method object follow. - */ +static + void (*unixDlSym(sqlite3_vfs * NotUsed, void *p, const char *zSym)) (void) { + /* + * GCC with -pedantic-errors says that C90 does not allow a void* to be + * cast into a pointer to a function. And yet the library dlsym() routine + * returns a void* which is really a pointer to a function. So how do we + * use dlsym() with -pedantic-errors? + * + * Variable x below is defined to be a pointer to a function taking + * parameters void* and const char* and returning a pointer to a function. + * We initialize x by assigning it a pointer to the dlsym() function. + * (That assignment requires a cast.) Then we call the function that + * x points to. + * + * This work-around is unlikely to work correctly on any system where + * you really cannot cast a function pointer into void*. But then, on the + * other hand, dlsym() will not work on such a system either, so we have + * not really lost anything. + */ + void (*(*x) (void *, const char *)) (void); + UNUSED_PARAMETER(NotUsed); + x = (void (*(*)(void *, const char *))(void))dlsym; + return (*x) (p, zSym); +} + +static void +unixDlClose(sqlite3_vfs * NotUsed, void *pHandle) +{ + UNUSED_PARAMETER(NotUsed); + dlclose(pHandle); +} /* - * This routine checks if there is a RESERVED lock held on the specified - * file by this or any other process. If such a lock is held, set *pResOut - * to a non-zero value otherwise *pResOut is set to zero. The return value - * is set to SQLITE_OK unless an I/O error occurs during lock checking. + * Write nBuf bytes of random data to the supplied buffer zBuf. */ static int -proxyCheckReservedLock(sqlite3_file * id, int *pResOut) +unixRandomness(sqlite3_vfs * NotUsed, int nBuf, char *zBuf) { - unixFile *pFile = (unixFile *) id; - int rc = proxyTakeConch(pFile); - if (rc == SQLITE_OK) { - proxyLockingContext *pCtx = - (proxyLockingContext *) pFile->lockingContext; - if (pCtx->conchHeld > 0) { - unixFile *proxy = pCtx->lockProxy; - return proxy->pMethod-> - xCheckReservedLock((sqlite3_file *) proxy, pResOut); - } else { /* conchHeld < 0 is lockless */ - pResOut = 0; - } - } - return rc; + UNUSED_PARAMETER(NotUsed); + assert((size_t) nBuf >= (sizeof(time_t) + sizeof(int))); + + /* We have to initialize zBuf to prevent valgrind from reporting + * errors. The reports issued by valgrind are incorrect - we would + * prefer that the randomness be increased by making use of the + * uninitialized space in zBuf - but valgrind errors tend to worry + * some users. Rather than argue, it seems easier just to initialize + * the whole array and silence valgrind, even if that means less randomness + * in the random seed. + * + * When testing, initializing zBuf[] to zero is all we do. That means + * that we always use the same random number sequence. This makes the + * tests repeatable. + */ + memset(zBuf, 0, nBuf); + randomnessPid = getpid(); + return nBuf; } /* - * Lock the file with the lock specified by parameter eFileLock - one - * of the following: - * - * (1) SHARED_LOCK - * (2) RESERVED_LOCK - * (3) PENDING_LOCK - * (4) EXCLUSIVE_LOCK - * - * Sometimes when requesting one lock state, additional lock states - * are inserted in between. The locking might fail on one of the later - * transitions leaving the lock state different from what it started but - * still short of its goal. The following chart shows the allowed - * transitions and the inserted intermediate states: - * - * UNLOCKED -> SHARED - * SHARED -> RESERVED - * SHARED -> (PENDING) -> EXCLUSIVE - * RESERVED -> (PENDING) -> EXCLUSIVE - * PENDING -> EXCLUSIVE - * - * This routine will only increase a lock. Use the sqlite3OsUnlock() - * routine to lower a locking level. + * Sleep for a little while. Return the amount of time slept. + * The argument is the number of microseconds we want to sleep. + * The return value is the number of microseconds of sleep actually + * requested from the underlying operating system, a number which + * might be greater than or equal to the argument, but not less + * than the argument. */ static int -proxyLock(sqlite3_file * id, int eFileLock) +unixSleep(sqlite3_vfs * NotUsed, int microseconds) { - unixFile *pFile = (unixFile *) id; - int rc = proxyTakeConch(pFile); - if (rc == SQLITE_OK) { - proxyLockingContext *pCtx = - (proxyLockingContext *) pFile->lockingContext; - if (pCtx->conchHeld > 0) { - unixFile *proxy = pCtx->lockProxy; - rc = proxy->pMethod->xLock((sqlite3_file *) proxy, - eFileLock); - pFile->eFileLock = proxy->eFileLock; - } else { - /* conchHeld < 0 is lockless */ - } - } - return rc; + int seconds = (microseconds + 999999) / 1000000; + sleep(seconds); + UNUSED_PARAMETER(NotUsed); + return seconds * 1000000; } +/* Fake system time in seconds since 1970. */ +int sqlite3_current_time = 0; + /* - * Lower the locking level on file descriptor pFile to eFileLock. eFileLock - * must be either NO_LOCK or SHARED_LOCK. + * Find the current time (in Universal Coordinated Time). Write into *piNow + * the current time and date as a Julian Day number times 86_400_000. In + * other words, write into *piNow the number of milliseconds since the Julian + * epoch of noon in Greenwich on November 24, 4714 B.C according to the + * proleptic Gregorian calendar. * - * If the locking level of the file descriptor is already at or below - * the requested locking level, this routine is a no-op. + * On success, return SQLITE_OK. Return SQLITE_ERROR if the time and date + * cannot be found. */ static int -proxyUnlock(sqlite3_file * id, int eFileLock) +unixCurrentTimeInt64(sqlite3_vfs * NotUsed, sqlite3_int64 * piNow) { - unixFile *pFile = (unixFile *) id; - int rc = proxyTakeConch(pFile); - if (rc == SQLITE_OK) { - proxyLockingContext *pCtx = - (proxyLockingContext *) pFile->lockingContext; - if (pCtx->conchHeld > 0) { - unixFile *proxy = pCtx->lockProxy; - rc = proxy->pMethod->xUnlock((sqlite3_file *) proxy, - eFileLock); - pFile->eFileLock = proxy->eFileLock; - } else { - /* conchHeld < 0 is lockless */ - } + static const sqlite3_int64 unixEpoch = + 24405875 * (sqlite3_int64) 8640000; + int rc = SQLITE_OK; + struct timeval sNow; + (void)gettimeofday(&sNow, 0); /* Cannot fail given valid arguments */ + *piNow = + unixEpoch + 1000 * (sqlite3_int64) sNow.tv_sec + + sNow.tv_usec / 1000; + +#ifdef SQLITE_TEST + if (sqlite3_current_time) { + *piNow = + 1000 * (sqlite3_int64) sqlite3_current_time + unixEpoch; } +#endif + UNUSED_PARAMETER(NotUsed); return rc; } /* - * Close a file that uses proxy locks. + * The xGetLastError() method is designed to return a better + * low-level error message when operating-system problems come up + * during SQLite operation. Only the integer return code is currently + * used. */ static int -proxyClose(sqlite3_file * id) +unixGetLastError(sqlite3_vfs * NotUsed, int NotUsed2, char *NotUsed3) { - if (ALWAYS(id)) { - unixFile *pFile = (unixFile *) id; - proxyLockingContext *pCtx = - (proxyLockingContext *) pFile->lockingContext; - unixFile *lockProxy = pCtx->lockProxy; - unixFile *conchFile = pCtx->conchFile; - int rc = SQLITE_OK; - - if (lockProxy) { - rc = lockProxy->pMethod-> - xUnlock((sqlite3_file *) lockProxy, NO_LOCK); - if (rc) - return rc; - rc = lockProxy->pMethod-> - xClose((sqlite3_file *) lockProxy); - if (rc) - return rc; - sqlite3_free(lockProxy); - pCtx->lockProxy = 0; - } - if (conchFile) { - if (pCtx->conchHeld) { - rc = proxyReleaseConch(pFile); - if (rc) - return rc; - } - rc = conchFile->pMethod-> - xClose((sqlite3_file *) conchFile); - if (rc) - return rc; - sqlite3_free(conchFile); - } - sqlite3DbFree(0, pCtx->lockProxyPath); - sqlite3_free(pCtx->conchFilePath); - sqlite3DbFree(0, pCtx->dbPath); - /* restore the original locking context and pMethod then close it */ - pFile->lockingContext = pCtx->oldLockingContext; - pFile->pMethod = pCtx->pOldMethod; - sqlite3_free(pCtx); - return pFile->pMethod->xClose(id); - } - return SQLITE_OK; + UNUSED_PARAMETER(NotUsed); + UNUSED_PARAMETER(NotUsed2); + UNUSED_PARAMETER(NotUsed3); + return errno; } -#endif /* defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE */ +/* + *********************** End of sqlite3_vfs methods *************************** + *****************************************************************************/ + /* * The proxy locking style is intended for use with AFP filesystems. * And since AFP is only supported on MacOSX, the proxy locking is also @@ -6205,47 +2737,11 @@ proxyClose(sqlite3_file * id) ****************** End of the proxy lock implementation ********************** *****************************************************************************/ -/* - * Initialize the operating system interface. - * - * This routine registers all VFS implementations for unix-like operating - * systems. This routine, and the sqlite3_os_end() routine that follows, - * should be the only routines in this file that are visible from other - * files. - * - * This routine is called once during SQLite initialization and by a - * single thread. The memory allocation subsystem have not - * necessarily been initialized when this routine \is called, and so they - * should not be used. - */ -int -sqlite3_os_init(void) -{ - /* - * The following macro defines an initializer for an sqlite3_vfs object. - * The name of the VFS is NAME. The pAppData is a pointer to a pointer - * to the "finder" function. (pAppData is a pointer to a pointer because - * silly C90 rules prohibit a void* from being cast to a function pointer - * and so we have to go through the intermediate pointer to avoid problems - * when compiling with -pedantic-errors on GCC.) - * - * The FINDER parameter to this macro is the name of the pointer to the - * finder-function. The finder-function returns a pointer to the - * sqlite_io_methods object that implements the desired locking - * behaviors. See the division above that contains the IOMETHODS - * macro for addition information on finder-functions. - * - * Most finders simply return a pointer to a fixed sqlite3_io_methods - * object. But the "autolockIoFinder" available on MacOSX does a little - * more than that; it looks at the filesystem type that hosts the - * database file and tries to choose an locking method appropriate for - * that filesystem time. - */ #define UNIXVFS(VFSNAME, FINDER) { \ 3, /* iVersion */ \ sizeof(unixFile), /* szOsFile */ \ MAX_PATHNAME, /* mxPathname */ \ - 0, /* pNext */ \ + NULL, /* pNext */ \ VFSNAME, /* zName */ \ (void*)&FINDER, /* pAppData */ \ unixOpen, /* xOpen */ \ @@ -6258,14 +2754,30 @@ sqlite3_os_init(void) unixDlClose, /* xDlClose */ \ unixRandomness, /* xRandomness */ \ unixSleep, /* xSleep */ \ - 0, /* xCurrentTime */ \ + NULL, /* xCurrentTime */ \ unixGetLastError, /* xGetLastError */ \ unixCurrentTimeInt64, /* xCurrentTimeInt64 */ \ - unixSetSystemCall, /* xSetSystemCall */ \ - unixGetSystemCall, /* xGetSystemCall */ \ - unixNextSystemCall, /* xNextSystemCall */ \ + NULL, /* xSetSystemCall */ \ + NULL, /* xGetSystemCall */ \ + NULL, /* xNextSystemCall */ \ } +/* + * Initialize the operating system interface. + * + * This routine registers all VFS implementations for unix-like operating + * systems. This routine, and the sqlite3_os_end() routine that follows, + * should be the only routines in this file that are visible from other + * files. + * + * This routine is called once during SQLite initialization and by a + * single thread. The memory allocation subsystem have not + * necessarily been initialized when this routine \is called, and so they + * should not be used. + */ +int +sqlite3_os_init(void) +{ /* * All default VFSes for unix are contained in the following array. * @@ -6274,36 +2786,13 @@ sqlite3_os_init(void) * array cannot be const. */ static sqlite3_vfs aVfs[] = { -#if SQLITE_ENABLE_LOCKING_STYLE && defined(__APPLE__) - UNIXVFS("unix", autolockIoFinder), -#endif UNIXVFS("unix-none", nolockIoFinder), - UNIXVFS("unix-dotfile", dotlockIoFinder), - UNIXVFS("unix-excl", posixIoFinder), - -#if SQLITE_ENABLE_LOCKING_STYLE - UNIXVFS("unix-posix", posixIoFinder), -#endif -#if SQLITE_ENABLE_LOCKING_STYLE - UNIXVFS("unix-flock", flockIoFinder), -#endif -#if SQLITE_ENABLE_LOCKING_STYLE && defined(__APPLE__) - UNIXVFS("unix-afp", afpIoFinder), - UNIXVFS("unix-nfs", nfsIoFinder), - UNIXVFS("unix-proxy", proxyIoFinder), -#endif + UNIXVFS("unix-excl", posixIoFinder) }; - unsigned int i; /* Loop counter */ - - /* Double-check that the aSyscall[] array has been constructed - * correctly. See ticket [bb3a86e890c8e96ab] - */ - assert(ArraySize(aSyscall) == 28); - /* Register all VFSes defined in the aVfs[] array */ - for (i = 0; i < (sizeof(aVfs) / sizeof(sqlite3_vfs)); i++) { + /* Register all VFSes defined in the aVfs[] array. */ + for (unsigned int i = 0; i < (sizeof(aVfs) / sizeof(sqlite3_vfs)); i++) sqlite3_vfs_register(&aVfs[i], i == 0); - } return SQLITE_OK; } diff --git a/src/box/sql/sqliteInt.h b/src/box/sql/sqliteInt.h index e7a02dc..69debfc 100644 --- a/src/box/sql/sqliteInt.h +++ b/src/box/sql/sqliteInt.h @@ -866,7 +866,6 @@ struct sqlite3_io_methods { int (*xFileSize) (sqlite3_file *, sqlite3_int64 * pSize); int (*xLock) (sqlite3_file *, int); int (*xUnlock) (sqlite3_file *, int); - int (*xCheckReservedLock) (sqlite3_file *, int *pResOut); int (*xFileControl) (sqlite3_file *, int op, void *pArg); int (*xSectorSize) (sqlite3_file *); int (*xDeviceCharacteristics) (sqlite3_file *); -- 2.7.4