diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/doc/se-files.txt | 153 | ||||
-rw-r--r-- | src/kern/linux/linux.cc | 2 | ||||
-rw-r--r-- | src/sim/Process.py | 3 | ||||
-rw-r--r-- | src/sim/RedirectPath.py | 43 | ||||
-rw-r--r-- | src/sim/SConscript | 2 | ||||
-rw-r--r-- | src/sim/System.py | 2 | ||||
-rw-r--r-- | src/sim/process.cc | 91 | ||||
-rw-r--r-- | src/sim/process.hh | 41 | ||||
-rw-r--r-- | src/sim/redirect_path.cc | 63 | ||||
-rw-r--r-- | src/sim/redirect_path.hh | 65 | ||||
-rw-r--r-- | src/sim/syscall_emul.cc | 70 | ||||
-rw-r--r-- | src/sim/syscall_emul.hh | 110 | ||||
-rw-r--r-- | src/sim/system.cc | 5 | ||||
-rw-r--r-- | src/sim/system.hh | 7 |
14 files changed, 563 insertions, 94 deletions
diff --git a/src/doc/se-files.txt b/src/doc/se-files.txt new file mode 100644 index 000000000..e5f3805df --- /dev/null +++ b/src/doc/se-files.txt @@ -0,0 +1,153 @@ +Copyright (c) 2015-Present Advanced Micro Devices, Inc. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer; +redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution; +neither the name of the copyright holders nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +Authors: Brandon Potter + +=============================================================================== + +This file exists to educate users and notify them that some filesystem open +system calls may have been redirected by system call emulation mode +(henceforth se-mode). + +To provide background, system calls to open files with SYS_OPEN (man 2 open) +inside se-mode will resolve by pass-through to glibc calls (man 3 open) on the +host machine. The host machine will open the file on behalf of the simulator. +Subsequently, se-mode acts as a shim for file access to the opened file. By +utilizing the host machine, se-mode gains quite a bit of utility without +needing to implement an actual filesystem. + +A scenario for using normal files might be `/bin/cat $HOME/my_data_file` +as the simulated application (and option). The simulator leverages the host +file system to provide access to my_data_file in this case. Several things +happen inside the simulator: + 1) The cat command will open $HOME/my_data_file by invoking the open +system call (SYS_OPEN). In se-mode, SYS_OPEN is trapped by the simulator and +the syscall_emul.hh:openImpl implementation is provided as a drop-in +replacement for what normally occurs inside a real operating system. + 2) The openImpl code will pass through several path checks and realize +that the file needs to be handled in the 'normal' case where se-mode utilizes +the host filesystem. + 3) The openImpl code will use the glibc open library call on +$HOME/my_data_file after normalizing invocation options. + 4) If the file successfully opens, se-mode will record the file descriptor +returned from the glibc open and provide a translated file descriptor to the +application. (If the glibc's file descriptor was passed back to the +application, it would be noticable that the application runtime environment +was wonky. The gem5.{opt,debug,fast} process needs to open files for its own +purposes and the file descriptors for the simulated application perspective +would appear out-of-order and arbitrary. They should appear in-order with the +lowest available file-desciptor assigned on calls to SYS_OPEN. So, se-mode +adds a level of indirection to resolve this problem.) + +However, there are files which users might not want to open on the host +machine; providing file access and/or file visibility to the simulated +application may not make sense in these cases. Historically, these files +have been handled by os-specific code in se-mode. The os-specific +implementation has been referred to as 'special files'. Examples of +special file implementations include /proc/meminfo and /etc/passwd. (See +src/kern/linux/linux.cc for more details.) + +A scenario for using special files might be running `/bin/cat /proc/meminfo` +as the simulated application (and option). Several things will happen inside +the simulator: + 1) The cat command will open the /proc/meminfo file by invoking the open +system call (SYS_OPEN). In se-mode, SYS_OPEN is trapped by the simulator and +the syscall_emul.hh:openImpl implementation is provided as a drop-in +replacement for what normally occurs inside a real operating system. + 2) The openImpl code checks to see if /proc/meminfo matches a special +file. When it notices the match, it invokes code to generate a replacement +file rather than open the file on the host machine. (As it turns out, opening +the host's version of /proc/meminfo will resolve to the gem5 executable which +is probably not what the application intended.) + 3) The generated file is provided a file descriptor (which itself has +special handling to preserve the illusion that the application is not running +inside a simulator under weird conditions). The file descriptor is passed +back to the application and it can subsequently use the file descriptor to +access the redirected /proc/meminfo file. + +Regarding special files, a subtle but important point is that these files +are generated dynamically during simulation (in C++ code). Certain files, +such as /proc/meminfo depend on the application state inside the simulator to +have valid contents. With some files, you generally cannot anticipate what +file contents should be before the application actually tries to inspect the +contents. These types of files should all be handled using the special files +method. + +As an aside, users might also want to restrict the contents of a file to +prevent non-determinism in the simulation. (This is another case for special +handling of files.) It can be annoying to try to generate statistics for your +new hardware widget (which of course will improve performance by some +non-trivial percentage) when variance in the statistics is caused by +randomness of file contents. A specific example which comes to mind is +reading the contents of /dev/random. Ideally, se-mode should introduce no +non-determinism. However, that is difficult (if not impossible) to achieve in +practice for every application thrown at the simulator. + +In addition to special files, there is another method to handle filesystem +redirection. Instead of dynamically generating a file and providing it to +the application, it is possible to pregenerate files on the host filesystem +and redirect open calls to the pregenerated files. This is achieved by +capturing the paths provided by the application SYS_OPEN and modifying the +path before issuing the pass-through call to the host filesystem glibc open. +The name for this feature is 'faux filesystem' (henceforth faux-fs). + +With faux-fs, users can add paths via command line (via --chroot) or by +modifying their configuration file to use the RedirectPath class. These +paths take the form of original_path-->set_of_modified_paths. For instance, +/proc/cpuinfo might be redirected to /usr/local/gem5_fs/cpuinfo __OR__ +/home/me/gem5_folder/cpuinfo __OR__ /nonsensical_name/foo_bar, etc.. The +matching pattern and directory/file-structure is controlled by the user. The +pattern match hits on the first available file which actually exists on the +host machine. + +As another subtle point, the faux-fs handling is fixed at simulator +configuration time. The path redirection becomes static after configuration +and the Python generated files in simout/fs/.. also exist after configuration. +The faux-fs mechanism is __NOT__ suitable for files such a /proc/meminfo +since those types of files rely on runtime application characteristics. + +Currently, faux-fs is setup to create a few files on behalf of the average +user. These files are all stuffed into the simout directory under a 'fs' +folder. By default, the path is $gem5_dir/m5out/fs. These files are all +hardcoded in the configuration since it is unlikely that an application wants +to see the host version of the files. At the time of writing, the list can be +viewed in configs/example/se.py by searching for RedirectPath. Most of +the faux-fs Python generated files depend on simulator configuration (i.e. +number of cores, caches, nodes, etc..). Sophisiticated runtimes might query +these files for hardware information in certain applications (i.e. +applications using MPI or ROCm since these runtimes utilize libnuma.so). + +Of note, dynamically executables will open shared object files in the same +manner as normal files. It is possible and maybe enen preferential to utilize +the faux-fs to create a platform independent way of running applications in +se-mode. Users can stuff all the shared libraries into a folder and commit the +folder as part of their repository state. The chroot option can be made to +point to the shared library folder (for each library) and these libraries will +be redirected away from host libraries. This can help to alleviate environment +problems between machines. + +If there is any confusion on path redirection, the system call debug traces +can be used to emit information regarding path redirection. diff --git a/src/kern/linux/linux.cc b/src/kern/linux/linux.cc index d571b81a1..b71ab3079 100644 --- a/src/kern/linux/linux.cc +++ b/src/kern/linux/linux.cc @@ -88,7 +88,7 @@ std::string Linux::etcPasswd(Process *process, ThreadContext *tc) { return csprintf("gem5-user:x:1000:1000:gem5-user,,,:%s:/bin/bash\n", - process->getcwd()); + process->tgtCwd); } std::string diff --git a/src/sim/Process.py b/src/sim/Process.py index 2ffc51a33..73a0145fd 100644 --- a/src/sim/Process.py +++ b/src/sim/Process.py @@ -29,6 +29,7 @@ from m5.SimObject import * from m5.params import * from m5.proxy import * +from os import getcwd class Process(SimObject): type = 'Process' @@ -58,7 +59,7 @@ class Process(SimObject): executable = Param.String('', "executable (overrides cmd[0] if set)") cmd = VectorParam.String("command line (executable plus arguments)") env = VectorParam.String([], "environment settings") - cwd = Param.String('', "current working directory") + cwd = Param.String(getcwd(), "current working directory") simpoint = Param.UInt64(0, 'simulation point at which to start simulation') drivers = VectorParam.EmulatedDriver([], 'Available emulated drivers') diff --git a/src/sim/RedirectPath.py b/src/sim/RedirectPath.py new file mode 100644 index 000000000..08511b891 --- /dev/null +++ b/src/sim/RedirectPath.py @@ -0,0 +1,43 @@ +# Copyright (c) 2015 Advanced Micro Devices, Inc. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# Authors: David Hashe + +from m5.SimObject import SimObject +from m5.params import * +from m5.proxy import * + +class RedirectPath(SimObject): + """ Stores paths for filesystem redirection during syscalls. If a path + matches 'appPath', then the syscall is redirected to the first 'hostPath' + that contains the non-overlapping portion of the path as a valid file. If + there are no hits, then the syscall is redirected to the first value. + """ + type = 'RedirectPath' + cxx_header = "sim/redirect_path.hh" + + app_path = Param.String("/", "filesystem path from an app's perspective") + host_paths = VectorParam.String(["/"], "file path on host filesystem") diff --git a/src/sim/SConscript b/src/sim/SConscript index 54e251287..8ab2c72f9 100644 --- a/src/sim/SConscript +++ b/src/sim/SConscript @@ -38,6 +38,7 @@ SimObject('VoltageDomain.py') SimObject('System.py') SimObject('DVFSHandler.py') SimObject('SubSystem.py') +SimObject('RedirectPath.py') Source('arguments.cc') Source('async.cc') @@ -56,6 +57,7 @@ Source('init_signals.cc') Source('main.cc', tags='main') Source('port.cc') Source('python.cc', add_tags='python') +Source('redirect_path.cc') Source('root.cc') Source('serialize.cc') Source('drain.cc') diff --git a/src/sim/System.py b/src/sim/System.py index 51b1a743f..c012cb256 100644 --- a/src/sim/System.py +++ b/src/sim/System.py @@ -83,6 +83,8 @@ class System(MemObject): cache_line_size = Param.Unsigned(64, "Cache line size in bytes") + redirect_paths = VectorParam.RedirectPath([], "Path redirections") + exit_on_work_items = Param.Bool(False, "Exit from the simulation loop when " "encountering work item annotations.") work_item_id = Param.Int(-1, "specific work item id") diff --git a/src/sim/process.cc b/src/sim/process.cc index 5cec2958e..10c68fe6c 100644 --- a/src/sim/process.cc +++ b/src/sim/process.cc @@ -50,6 +50,7 @@ #include <unistd.h> #include <array> +#include <climits> #include <csignal> #include <map> #include <string> @@ -67,6 +68,7 @@ #include "sim/emul_driver.hh" #include "sim/fd_array.hh" #include "sim/fd_entry.hh" +#include "sim/redirect_path.hh" #include "sim/syscall_desc.hh" #include "sim/system.hh" @@ -101,6 +103,14 @@ using namespace std; using namespace TheISA; +static std::string +normalize(std::string& directory) +{ + if (directory.back() != '/') + directory += '/'; + return directory; +} + Process::Process(ProcessParams *params, EmulationPageTable *pTable, ObjectFile *obj_file) : SimObject(params), system(params->system), @@ -111,8 +121,10 @@ Process::Process(ProcessParams *params, EmulationPageTable *pTable, initVirtMem(system->getSystemPort(), this, SETranslatingPortProxy::Always), objFile(obj_file), - argv(params->cmd), envp(params->env), cwd(params->cwd), + argv(params->cmd), envp(params->env), executable(params->executable), + tgtCwd(normalize(params->cwd)), + hostCwd(checkPathRedirect(tgtCwd)), _uid(params->uid), _euid(params->euid), _gid(params->gid), _egid(params->egid), _pid(params->pid), _ppid(params->ppid), @@ -441,6 +453,42 @@ Process::findDriver(std::string filename) return nullptr; } +std::string +Process::checkPathRedirect(const std::string &filename) +{ + // If the input parameter contains a relative path, convert it. Note, + // the return value for this method should always return an absolute + // path on the host filesystem. The return value will be used to + // open and manipulate the path specified by the input parameter. Since + // all filesystem handling in syscall mode is passed through to the host, + // we deal only with host paths. + auto host_fs_abs_path = absolutePath(filename, true); + + for (auto path : system->redirectPaths) { + // Search through the redirect paths to see if a starting substring of + // our path falls into any buckets which need to redirected. + if (startswith(host_fs_abs_path, path->appPath())) { + std::string tail = host_fs_abs_path.substr(path->appPath().size()); + + // If this path needs to be redirected, search through a list + // of targets to see if we can match a valid file (or directory). + for (auto host_path : path->hostPaths()) { + if (access((host_path + tail).c_str(), R_OK) == 0) { + // Return the valid match. + return host_path + tail; + } + } + // The path needs to be redirected, but the file or directory + // does not exist on the host filesystem. Return the first + // host path as a default. + return path->hostPaths()[0] + tail; + } + } + + // The path does not need to be redirected. + return host_fs_abs_path; +} + void Process::updateBias() { @@ -489,6 +537,33 @@ Process::getStartPC() return interp ? interp->entryPoint() : objFile->entryPoint(); } +std::string +Process::absolutePath(const std::string &filename, bool host_filesystem) +{ + if (filename.empty() || startswith(filename, "/")) + return filename; + + // Verify that the current working directories are initialized properly. + // These members should be set initially via params from 'Process.py', + // although they may change over time depending on what the application + // does during simulation. + assert(!tgtCwd.empty()); + assert(!hostCwd.empty()); + + // Construct the absolute path given the current working directory for + // either the host filesystem or target filesystem. The distinction only + // matters if filesystem redirection is utilized in the simulation. + auto path_base = host_filesystem ? hostCwd : tgtCwd; + + // Add a trailing '/' if the current working directory did not have one. + normalize(path_base); + + // Append the filename onto the current working path. + auto absolute_path = path_base + filename; + + return absolute_path; +} + Process * ProcessParams::create() { @@ -649,17 +724,3 @@ ProcessParams::create() fatal("Unknown error creating process object."); return process; } - -std::string -Process::fullPath(const std::string &file_name) -{ - if (file_name[0] == '/' || cwd.empty()) - return file_name; - - std::string full = cwd; - - if (cwd[cwd.size() - 1] != '/') - full += '/'; - - return full + file_name; -} diff --git a/src/sim/process.hh b/src/sim/process.hh index a1ca84bf5..27c569602 100644 --- a/src/sim/process.hh +++ b/src/sim/process.hh @@ -91,8 +91,6 @@ class Process : public SimObject inline void setpgid(uint64_t pgid) { _pgid = pgid; } const char *progName() const { return executable.c_str(); } - std::string fullPath(const std::string &filename); - std::string getcwd() const { return cwd; } /** * Find an emulated device driver. @@ -186,9 +184,46 @@ class Process : public SimObject ObjectFile *objFile; std::vector<std::string> argv; std::vector<std::string> envp; - std::string cwd; std::string executable; + /** + * Return an absolute path given a relative path paired with the current + * working directory of the process running under simulation. + * + * @param path The relative path (generally a filename) that needs the + * current working directory prepended. + * @param host_fs A flag which determines whether to return a + * path for the host filesystem or the filesystem of the process running + * under simulation. Only matters if filesysem redirection is used to + * replace files (or directories) that would normally appear via the + * host filesystem. + * @return String containing an absolute path. + */ + std::string absolutePath(const std::string &path, bool host_fs); + + /** + * Redirect file path if it matches any keys initialized by system object. + * @param filename An input parameter containing either a relative path + * or an absolute path. If given a relative path, the path will be + * prepended to the current working directory of the simulation with + * respect to the host filesystem. + * @return String containing an absolute path. + */ + std::string checkPathRedirect(const std::string &filename); + + /** + * The cwd members are used to track changes to the current working + * directory for the purpose of executing system calls which depend on + * relative paths (i.e. open, chdir). + * + * The tgt member and host member may differ if the path for the current + * working directory is redirected to point to a different location + * (i.e. `cd /proc` should point to '$(gem5_repo)/m5out/fs/proc' + * instead of '/proc'). + */ + std::string tgtCwd; + std::string hostCwd; + // Id of the owner of the process uint64_t _uid; uint64_t _euid; diff --git a/src/sim/redirect_path.cc b/src/sim/redirect_path.cc new file mode 100644 index 000000000..8c41ffabd --- /dev/null +++ b/src/sim/redirect_path.cc @@ -0,0 +1,63 @@ +/* + * Copyright (c) 2015 Advanced Micro Devices, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: David Hashe + */ + +#include "sim/redirect_path.hh" + +#include <unistd.h> + +static std::string +normalizePath(std::string path) +{ + char buf[PATH_MAX]; + std::string gem5_cwd = getcwd(buf, PATH_MAX); + + if (!startswith(path, "/")) { + path = realpath((gem5_cwd + "/" + path).c_str(), buf); + } + if (path[path.length()-1] != '/') path.push_back('/'); + + return path; +} + +RedirectPath::RedirectPath(const RedirectPathParams *p) + : SimObject(p) +{ + _appPath = normalizePath(p->app_path); + + for (auto hp : p->host_paths) { + _hostPaths.push_back(normalizePath(hp)); + } +} + +RedirectPath* +RedirectPathParams::create() +{ + return new RedirectPath(this); +} diff --git a/src/sim/redirect_path.hh b/src/sim/redirect_path.hh new file mode 100644 index 000000000..7dbb4eaa7 --- /dev/null +++ b/src/sim/redirect_path.hh @@ -0,0 +1,65 @@ +/* + * Copyright (c) 2015 Advanced Micro Devices, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: David Hashe + */ + +#ifndef __SIM_REDIRECT_PATH_HH__ +#define __SIM_REDIRECT_PATH_HH__ + +#include <string> +#include <vector> + +#include "params/RedirectPath.hh" +#include "sim/sim_object.hh" + +/** + * RedirectPath stores a mapping from one 'appPath' to a vector of + * 'hostPath'. Each 'appPath' and 'hostPath' is a filesystem path. + * Used by process.cc to redirect syscall accesses to different directories. + */ +class RedirectPath : public SimObject +{ + public: + RedirectPath(const RedirectPathParams *p); + + const std::string& appPath() { return _appPath; }; + const std::vector<std::string>& hostPaths() { return _hostPaths; }; + + protected: + /** + * An appPath is a path which needs to be redirected and replaced + * by one of the corresponding hostPath (when accessing files on the host + * filesystem.) + */ + // _appPath holds the path as it would appear from an app's perspective. + std::string _appPath; + // _hostPaths holds a set of host filesystem paths + std::vector<std::string> _hostPaths; +}; + +#endif diff --git a/src/sim/syscall_emul.cc b/src/sim/syscall_emul.cc index fbfe21a93..a9490fa57 100644 --- a/src/sim/syscall_emul.cc +++ b/src/sim/syscall_emul.cc @@ -388,7 +388,7 @@ getcwdFunc(SyscallDesc *desc, int num, Process *p, ThreadContext *tc) BufferArg buf(buf_ptr, size); // Is current working directory defined? - string cwd = p->getcwd(); + string cwd = p->tgtCwd; if (!cwd.empty()) { if (cwd.length() >= size) { // Buffer too small @@ -425,8 +425,8 @@ readlinkFunc(SyscallDesc *desc, int num, Process *p, ThreadContext *tc, if (!tc->getMemProxy().tryReadString(path, p->getSyscallArg(tc, index))) return -EFAULT; - // Adjust path for current working directory - path = p->fullPath(path); + // Adjust path for cwd and redirection + path = p->checkPathRedirect(path); Addr buf_ptr = p->getSyscallArg(tc, index); size_t bufsiz = p->getSyscallArg(tc, index); @@ -491,7 +491,7 @@ unlinkHelper(SyscallDesc *desc, int num, Process *p, ThreadContext *tc, if (!tc->getMemProxy().tryReadString(path, p->getSyscallArg(tc, index))) return -EFAULT; - path = p->fullPath(path); + path = p->checkPathRedirect(path); int result = unlink(path.c_str()); return (result == -1) ? -errno : result; @@ -510,8 +510,8 @@ linkFunc(SyscallDesc *desc, int num, Process *p, ThreadContext *tc) if (!virt_mem.tryReadString(new_path, p->getSyscallArg(tc, index))) return -EFAULT; - path = p->fullPath(path); - new_path = p->fullPath(new_path); + path = p->absolutePath(path, true); + new_path = p->absolutePath(new_path, true); int result = link(path.c_str(), new_path.c_str()); return (result == -1) ? -errno : result; @@ -530,8 +530,8 @@ symlinkFunc(SyscallDesc *desc, int num, Process *p, ThreadContext *tc) if (!virt_mem.tryReadString(new_path, p->getSyscallArg(tc, index))) return -EFAULT; - path = p->fullPath(path); - new_path = p->fullPath(new_path); + path = p->absolutePath(path, true); + new_path = p->absolutePath(new_path, true); int result = symlink(path.c_str(), new_path.c_str()); return (result == -1) ? -errno : result; @@ -540,18 +540,15 @@ symlinkFunc(SyscallDesc *desc, int num, Process *p, ThreadContext *tc) SyscallReturn mkdirFunc(SyscallDesc *desc, int num, Process *p, ThreadContext *tc) { - string path; - int index = 0; + std::string path; if (!tc->getMemProxy().tryReadString(path, p->getSyscallArg(tc, index))) return -EFAULT; - // Adjust path for current working directory - path = p->fullPath(path); - + path = p->checkPathRedirect(path); mode_t mode = p->getSyscallArg(tc, index); - int result = mkdir(path.c_str(), mode); + auto result = mkdir(path.c_str(), mode); return (result == -1) ? -errno : result; } @@ -569,9 +566,9 @@ renameFunc(SyscallDesc *desc, int num, Process *p, ThreadContext *tc) if (!tc->getMemProxy().tryReadString(new_name, p->getSyscallArg(tc, index))) return -EFAULT; - // Adjust path for current working directory - old_name = p->fullPath(old_name); - new_name = p->fullPath(new_name); + // Adjust path for cwd and redirection + old_name = p->checkPathRedirect(old_name); + new_name = p->checkPathRedirect(new_name); int64_t result = rename(old_name.c_str(), new_name.c_str()); return (result == -1) ? -errno : result; @@ -588,8 +585,8 @@ truncateFunc(SyscallDesc *desc, int num, Process *p, ThreadContext *tc) off_t length = p->getSyscallArg(tc, index); - // Adjust path for current working directory - path = p->fullPath(path); + // Adjust path for cwd and redirection + path = p->checkPathRedirect(path); int result = truncate(path.c_str(), length); return (result == -1) ? -errno : result; @@ -623,8 +620,8 @@ truncate64Func(SyscallDesc *desc, int num, int64_t length = process->getSyscallArg(tc, index, 64); - // Adjust path for current working directory - path = process->fullPath(path); + // Adjust path for cwd and redirection + path = process->checkPathRedirect(path); #if NO_STAT64 int result = truncate(path.c_str(), length); @@ -680,8 +677,8 @@ chownFunc(SyscallDesc *desc, int num, Process *p, ThreadContext *tc) uint32_t group = p->getSyscallArg(tc, index); gid_t hostGroup = group; - // Adjust path for current working directory - path = p->fullPath(path); + // Adjust path for cwd and redirection + path = p->checkPathRedirect(path); int result = chown(path.c_str(), hostOwner, hostGroup); return (result == -1) ? -errno : result; @@ -1068,8 +1065,8 @@ accessFunc(SyscallDesc *desc, int callnum, Process *p, ThreadContext *tc, if (!tc->getMemProxy().tryReadString(path, p->getSyscallArg(tc, index))) return -EFAULT; - // Adjust path for current working directory - path = p->fullPath(path); + // Adjust path for cwd and redirection + path = p->checkPathRedirect(path); mode_t mode = p->getSyscallArg(tc, index); @@ -1091,7 +1088,7 @@ mknodFunc(SyscallDesc *desc, int num, Process *p, ThreadContext *tc) if (!tc->getMemProxy().tryReadString(path, p->getSyscallArg(tc, index))) return -EFAULT; - path = p->fullPath(path); + path = p->checkPathRedirect(path); mode_t mode = p->getSyscallArg(tc, index); dev_t dev = p->getSyscallArg(tc, index); @@ -1107,10 +1104,23 @@ chdirFunc(SyscallDesc *desc, int num, Process *p, ThreadContext *tc) if (!tc->getMemProxy().tryReadString(path, p->getSyscallArg(tc, index))) return -EFAULT; - path = p->fullPath(path); + std::string tgt_cwd; + if (startswith(path, "/")) { + tgt_cwd = path; + } else { + char buf[PATH_MAX]; + tgt_cwd = realpath((p->tgtCwd + "/" + path).c_str(), buf); + } + std::string host_cwd = p->checkPathRedirect(tgt_cwd); - auto result = chdir(path.c_str()); - return (result == -1) ? -errno : result; + int result = chdir(host_cwd.c_str()); + + if (result == -1) + return -errno; + + p->hostCwd = host_cwd; + p->tgtCwd = tgt_cwd; + return result; } SyscallReturn @@ -1121,7 +1131,7 @@ rmdirFunc(SyscallDesc *desc, int num, Process *p, ThreadContext *tc) if (!tc->getMemProxy().tryReadString(path, p->getSyscallArg(tc, index))) return -EFAULT; - path = p->fullPath(path); + path = p->checkPathRedirect(path); auto result = rmdir(path.c_str()); return (result == -1) ? -errno : result; diff --git a/src/sim/syscall_emul.hh b/src/sim/syscall_emul.hh index 87c16c489..1d14af921 100644 --- a/src/sim/syscall_emul.hh +++ b/src/sim/syscall_emul.hh @@ -837,14 +837,18 @@ openImpl(SyscallDesc *desc, int callnum, Process *p, ThreadContext *tc, * In every case, we should have a full path (which is relevant to the * host) to work with after this block has been passed. */ - if (!isopenat || (isopenat && tgt_dirfd == OS::TGT_AT_FDCWD)) { - path = p->fullPath(path); + std::string redir_path = path; + std::string abs_path = path; + if (!isopenat || tgt_dirfd == OS::TGT_AT_FDCWD) { + abs_path = p->absolutePath(path, true); + redir_path = p->checkPathRedirect(path); } else if (!startswith(path, "/")) { std::shared_ptr<FDEntry> fdep = ((*p->fds)[tgt_dirfd]); auto ffdp = std::dynamic_pointer_cast<FileFDEntry>(fdep); if (!ffdp) return -EBADF; - path.insert(0, ffdp->getFileName() + "/"); + abs_path = ffdp->getFileName() + path; + redir_path = p->checkPathRedirect(abs_path); } /** @@ -853,13 +857,13 @@ openImpl(SyscallDesc *desc, int callnum, Process *p, ThreadContext *tc, * the process class through Python; this allows us to create a file * descriptor for subsequent ioctl or mmap calls. */ - if (startswith(path, "/dev/")) { - std::string filename = path.substr(strlen("/dev/")); + if (startswith(abs_path, "/dev/")) { + std::string filename = abs_path.substr(strlen("/dev/")); EmulatedDriver *drv = p->findDriver(filename); if (drv) { DPRINTF_SYSCALL(Verbose, "open%s: passing call to " "driver open with path[%s]\n", - isopenat ? "at" : "", path.c_str()); + isopenat ? "at" : "", abs_path.c_str()); return drv->open(p, tc, mode, host_flags); } /** @@ -869,28 +873,49 @@ openImpl(SyscallDesc *desc, int callnum, Process *p, ThreadContext *tc, } /** - * Some special paths and files cannot be called on the host and need - * to be handled as special cases inside the simulator. - * If the full path that was created above does not match any of the - * special cases, pass it through to the open call on the host to let - * the host open the file on our behalf. - * If the host cannot open the file, return the host's error code back - * through the system call to the simulated process. + * We make several attempts resolve a call to open. + * + * 1) Resolve any path redirection before hand. This will set the path + * up with variable 'redir_path' which may contain a modified path or + * the original path value. This should already be done in prior code. + * 2) Try to handle the access using 'special_paths'. Some special_paths + * and files cannot be called on the host and need to be handled as + * special cases inside the simulator. These special_paths are handled by + * C++ routines to provide output back to userspace. + * 3) If the full path that was created above does not match any of the + * special cases, pass it through to the open call on the __HOST__ to let + * the host open the file on our behalf. Again, the openImpl tries to + * USE_THE_HOST_FILESYSTEM_OPEN (with a possible redirection to the + * faux-filesystem files). The faux-filesystem is dynamically created + * during simulator configuration using Python functions. + * 4) If the host cannot open the file, the open attempt failed in "3)". + * Return the host's error code back through the system call to the + * simulated process. If running a debug trace, also notify the user that + * the open call failed. + * + * Any success will set sim_fd to something other than -1 and skip the + * next conditions effectively bypassing them. */ int sim_fd = -1; + std::string used_path; std::vector<std::string> special_paths = - { "/proc/", "/system/", "/sys/", "/platform/", "/etc/passwd" }; + { "/proc/meminfo/", "/system/", "/sys/", "/platform/", + "/etc/passwd" }; for (auto entry : special_paths) { - if (startswith(path, entry)) - sim_fd = OS::openSpecialFile(path, p, tc); + if (startswith(path, entry)) { + sim_fd = OS::openSpecialFile(abs_path, p, tc); + used_path = abs_path; + } } if (sim_fd == -1) { - sim_fd = open(path.c_str(), host_flags, mode); + sim_fd = open(redir_path.c_str(), host_flags, mode); + used_path = redir_path; } if (sim_fd == -1) { int local = -errno; - DPRINTF_SYSCALL(Verbose, "open%s: failed -> path:%s\n", - isopenat ? "at" : "", path.c_str()); + DPRINTF_SYSCALL(Verbose, "open%s: failed -> path:%s " + "(inferred from:%s)\n", isopenat ? "at" : "", + used_path.c_str(), path.c_str()); return local; } @@ -904,8 +929,9 @@ openImpl(SyscallDesc *desc, int callnum, Process *p, ThreadContext *tc, */ auto ffdp = std::make_shared<FileFDEntry>(sim_fd, host_flags, path, 0); int tgt_fd = p->fds->allocFD(ffdp); - DPRINTF_SYSCALL(Verbose, "open%s: sim_fd[%d], target_fd[%d] -> path:%s\n", - isopenat ? "at" : "", sim_fd, tgt_fd, path.c_str()); + DPRINTF_SYSCALL(Verbose, "open%s: sim_fd[%d], target_fd[%d] -> path:%s\n" + "(inferred from:%s)\n", isopenat ? "at" : "", + sim_fd, tgt_fd, used_path.c_str(), path.c_str()); return tgt_fd; } @@ -995,9 +1021,9 @@ renameatFunc(SyscallDesc *desc, int callnum, Process *process, process->getSyscallArg(tc, index))) return -EFAULT; - // Adjust path for current working directory - old_name = process->fullPath(old_name); - new_name = process->fullPath(new_name); + // Adjust path for cwd and redirection + old_name = process->checkPathRedirect(old_name); + new_name = process->checkPathRedirect(new_name); int result = rename(old_name.c_str(), new_name.c_str()); return (result == -1) ? -errno : result; @@ -1043,8 +1069,8 @@ chmodFunc(SyscallDesc *desc, int callnum, Process *process, // XXX translate mode flags via OS::something??? hostMode = mode; - // Adjust path for current working directory - path = process->fullPath(path); + // Adjust path for cwd and redirection + path = process->checkPathRedirect(path); // do the chmod int result = chmod(path.c_str(), hostMode); @@ -1244,8 +1270,8 @@ statFunc(SyscallDesc *desc, int callnum, Process *process, } Addr bufPtr = process->getSyscallArg(tc, index); - // Adjust path for current working directory - path = process->fullPath(path); + // Adjust path for cwd and redirection + path = process->checkPathRedirect(path); struct stat hostBuf; int result = stat(path.c_str(), &hostBuf); @@ -1273,8 +1299,8 @@ stat64Func(SyscallDesc *desc, int callnum, Process *process, return -EFAULT; Addr bufPtr = process->getSyscallArg(tc, index); - // Adjust path for current working directory - path = process->fullPath(path); + // Adjust path for cwd and redirection + path = process->checkPathRedirect(path); #if NO_STAT64 struct stat hostBuf; @@ -1310,8 +1336,8 @@ fstatat64Func(SyscallDesc *desc, int callnum, Process *process, return -EFAULT; Addr bufPtr = process->getSyscallArg(tc, index); - // Adjust path for current working directory - path = process->fullPath(path); + // Adjust path for cwd and redirection + path = process->checkPathRedirect(path); #if NO_STAT64 struct stat hostBuf; @@ -1376,8 +1402,8 @@ lstatFunc(SyscallDesc *desc, int callnum, Process *process, } Addr bufPtr = process->getSyscallArg(tc, index); - // Adjust path for current working directory - path = process->fullPath(path); + // Adjust path for cwd and redirection + path = process->checkPathRedirect(path); struct stat hostBuf; int result = lstat(path.c_str(), &hostBuf); @@ -1405,8 +1431,8 @@ lstat64Func(SyscallDesc *desc, int callnum, Process *process, } Addr bufPtr = process->getSyscallArg(tc, index); - // Adjust path for current working directory - path = process->fullPath(path); + // Adjust path for cwd and redirection + path = process->checkPathRedirect(path); #if NO_STAT64 struct stat hostBuf; @@ -1469,8 +1495,8 @@ statfsFunc(SyscallDesc *desc, int callnum, Process *process, } Addr bufPtr = process->getSyscallArg(tc, index); - // Adjust path for current working directory - path = process->fullPath(path); + // Adjust path for cwd and redirection + path = process->checkPathRedirect(path); struct statfs hostBuf; int result = statfs(path.c_str(), &hostBuf); @@ -1531,7 +1557,7 @@ cloneFunc(SyscallDesc *desc, int callnum, Process *p, ThreadContext *tc) pp->executable.assign(*(new std::string(p->progName()))); pp->cmd.push_back(*(new std::string(p->progName()))); pp->system = p->system; - pp->cwd.assign(p->getcwd()); + pp->cwd.assign(p->tgtCwd); pp->input.assign("stdin"); pp->output.assign("stdout"); pp->errout.assign("stderr"); @@ -2096,8 +2122,8 @@ utimesFunc(SyscallDesc *desc, int callnum, Process *process, hostTimeval[i].tv_usec = TheISA::gtoh((*tp)[i].tv_usec); } - // Adjust path for current working directory - path = process->fullPath(path); + // Adjust path for cwd and redirection + path = process->checkPathRedirect(path); int result = utimes(path.c_str(), hostTimeval); @@ -2159,7 +2185,7 @@ execveFunc(SyscallDesc *desc, int callnum, Process *p, ThreadContext *tc) pp->input.assign("cin"); pp->output.assign("cout"); pp->errout.assign("cerr"); - pp->cwd.assign(p->getcwd()); + pp->cwd.assign(p->tgtCwd); pp->system = p->system; /** * Prevent process object creation with identical PIDs (which will trip diff --git a/src/sim/system.cc b/src/sim/system.cc index 2113fc079..65ad6cdb0 100644 --- a/src/sim/system.cc +++ b/src/sim/system.cc @@ -70,6 +70,7 @@ #include "sim/byteswap.hh" #include "sim/debug.hh" #include "sim/full_system.hh" +#include "sim/redirect_path.hh" /** * To avoid linking errors with LTO, only include the header if we @@ -111,8 +112,10 @@ System::System(Params *p) thermalModel(p->thermal_model), _params(p), totalNumInsts(0), - instEventQueue("system instruction-based event queue") + instEventQueue("system instruction-based event queue"), + redirectPaths(p->redirect_paths) { + // add self to global system list systemList.push_back(this); diff --git a/src/sim/system.hh b/src/sim/system.hh index 69448d35f..6227ae660 100644 --- a/src/sim/system.hh +++ b/src/sim/system.hh @@ -64,6 +64,7 @@ #include "mem/port_proxy.hh" #include "params/System.hh" #include "sim/futex_map.hh" +#include "sim/redirect_path.hh" #include "sim/se_signal.hh" /** @@ -628,6 +629,11 @@ class System : public MemObject // receiver will delete the signal upon reception. std::list<BasicSignal> signalList; + // Used by syscall-emulation mode. This member contains paths which need + // to be redirected to the faux-filesystem (a duplicate filesystem + // intended to replace certain files on the host filesystem). + std::vector<RedirectPath*> redirectPaths; + protected: /** @@ -647,7 +653,6 @@ class System : public MemObject * @param section relevant section in the checkpoint */ virtual void unserializeSymtab(CheckpointIn &cp) {} - }; void printSystems(); |