DoxigAlpha

ComponentIterator

A path component iterator that can move forwards and backwards. The 'root' of the path (/ for POSIX, things like C:\, \\server\share\, etc for Windows) is treated specially and will never be returned by any of the first, last, next, or previous functions. Multiple consecutive path separators are skipped (treated as a single separator) when iterating. All returned component names/paths are slices of the original path. There is no normalization of paths performed while iterating.

Fields of this type

Fields

#
path:[]const T
root_end_index:usize
= 0
start_index:usize
= 0
end_index:usize
= 0

Type definitions in this namespace

Types

#

After `init`, `next` will return the first component after the root

Functions

#
init
After `init`, `next` will return the first component after the root
root
Returns the root of the path if it is an absolute path, or null otherwise.
first
Returns the first component (from the beginning of the path).
last
Returns the last component (from the end of the path).
next
Returns the next component (the component to the right of the most recently
peekNext
Like `next`, but does not modify the iterator state.
previous
Returns the previous component (the component to the left of the most recently
peekPrevious
Like `previous`, but does not modify the iterator state.

Source

Implementation

#
pub fn ComponentIterator(comptime path_type: PathType, comptime T: type) type {
    return struct {
        path: []const T,
        root_end_index: usize = 0,
        start_index: usize = 0,
        end_index: usize = 0,

        const Self = @This();

        pub const Component = struct {
            /// The current component's path name, e.g. 'b'.
            /// This will never contain path separators.
            name: []const T,
            /// The full path up to and including the current component, e.g. '/a/b'
            /// This will never contain trailing path separators.
            path: []const T,
        };

        const InitError = switch (path_type) {
            .windows => error{BadPathName},
            else => error{},
        };

        /// After `init`, `next` will return the first component after the root
        /// (there is no need to call `first` after `init`).
        /// To iterate backwards (from the end of the path to the beginning), call `last`
        /// after `init` and then iterate via `previous` calls.
        /// For Windows paths, `error.BadPathName` is returned if the `path` has an explicit
        /// namespace prefix (`\\.\`, `\\?\`, or `\??\`) or if it is a UNC path with more
        /// than two path separators at the beginning.
        pub fn init(path: []const T) InitError!Self {
            const root_end_index: usize = switch (path_type) {
                .posix, .uefi => posix: {
                    // Root on UEFI and POSIX only differs by the path separator
                    var root_end_index: usize = 0;
                    while (true) : (root_end_index += 1) {
                        if (root_end_index >= path.len or !path_type.isSep(T, path[root_end_index])) {
                            break;
                        }
                    }
                    break :posix root_end_index;
                },
                .windows => windows: {
                    // Namespaces other than the Win32 file namespace are tricky
                    // and basically impossible to determine a 'root' for, since it's
                    // possible to construct an effectively arbitrarily long 'root',
                    // e.g. `\\.\GLOBALROOT\??\UNC\localhost\C$\foo` is a
                    // possible path that would be effectively equivalent to
                    // `C:\foo`, and the `GLOBALROOT\??\` part can also be recursive,
                    // so `GLOBALROOT\??\GLOBALROOT\??\...` would work for any number
                    // of repetitions. Therefore, paths with an explicit namespace prefix
                    // (\\.\, \??\, \\?\) are not allowed here.
                    if (std.os.windows.getNamespacePrefix(T, path) != .none) {
                        return error.BadPathName;
                    }
                    const windows_path_type = std.os.windows.getUnprefixedPathType(T, path);
                    break :windows switch (windows_path_type) {
                        .relative => 0,
                        .root_local_device => path.len,
                        .rooted => 1,
                        .unc_absolute => unc: {
                            var end_index: usize = 2;
                            // Any extra separators between the first two and the server name are not allowed
                            // and will always lead to STATUS_OBJECT_PATH_INVALID if it is attempted
                            // to be used.
                            if (end_index < path.len and path_type.isSep(T, path[end_index])) {
                                return error.BadPathName;
                            }
                            // Server
                            while (end_index < path.len and !path_type.isSep(T, path[end_index])) {
                                end_index += 1;
                            }
                            // Slash(es) after server
                            while (end_index < path.len and path_type.isSep(T, path[end_index])) {
                                end_index += 1;
                            }
                            // Share
                            while (end_index < path.len and !path_type.isSep(T, path[end_index])) {
                                end_index += 1;
                            }
                            // Slash(es) after share
                            while (end_index < path.len and path_type.isSep(T, path[end_index])) {
                                end_index += 1;
                            }
                            break :unc end_index;
                        },
                        .drive_absolute => drive: {
                            var end_index: usize = 3;
                            while (end_index < path.len and path_type.isSep(T, path[end_index])) {
                                end_index += 1;
                            }
                            break :drive end_index;
                        },
                        .drive_relative => 2,
                    };
                },
            };
            return .{
                .path = path,
                .root_end_index = root_end_index,
                .start_index = root_end_index,
                .end_index = root_end_index,
            };
        }

        /// Returns the root of the path if it is an absolute path, or null otherwise.
        /// For POSIX paths, this will be `/`.
        /// For Windows paths, this will be something like `C:\`, `\\server\share\`, etc.
        /// For UEFI paths, this will be `\`.
        pub fn root(self: Self) ?[]const T {
            if (self.root_end_index == 0) return null;
            return self.path[0..self.root_end_index];
        }

        /// Returns the first component (from the beginning of the path).
        /// For example, if the path is `/a/b/c` then this will return the `a` component.
        /// After calling `first`, `previous` will always return `null`, and `next` will return
        /// the component to the right of the one returned by `first`, if any exist.
        pub fn first(self: *Self) ?Component {
            self.start_index = self.root_end_index;
            self.end_index = self.start_index;
            while (self.end_index < self.path.len and !path_type.isSep(T, self.path[self.end_index])) {
                self.end_index += 1;
            }
            if (self.end_index == self.start_index) return null;
            return .{
                .name = self.path[self.start_index..self.end_index],
                .path = self.path[0..self.end_index],
            };
        }

        /// Returns the last component (from the end of the path).
        /// For example, if the path is `/a/b/c` then this will return the `c` component.
        /// After calling `last`, `next` will always return `null`, and `previous` will return
        /// the component to the left of the one returned by `last`, if any exist.
        pub fn last(self: *Self) ?Component {
            self.end_index = self.path.len;
            while (true) {
                if (self.end_index == self.root_end_index) {
                    self.start_index = self.end_index;
                    return null;
                }
                if (!path_type.isSep(T, self.path[self.end_index - 1])) break;
                self.end_index -= 1;
            }
            self.start_index = self.end_index;
            while (true) {
                if (self.start_index == self.root_end_index) break;
                if (path_type.isSep(T, self.path[self.start_index - 1])) break;
                self.start_index -= 1;
            }
            if (self.start_index == self.end_index) return null;
            return .{
                .name = self.path[self.start_index..self.end_index],
                .path = self.path[0..self.end_index],
            };
        }

        /// Returns the next component (the component to the right of the most recently
        /// returned component), or null if no such component exists.
        /// For example, if the path is `/a/b/c` and the most recently returned component
        /// is `b`, then this will return the `c` component.
        pub fn next(self: *Self) ?Component {
            const peek_result = self.peekNext() orelse return null;
            self.start_index = peek_result.path.len - peek_result.name.len;
            self.end_index = peek_result.path.len;
            return peek_result;
        }

        /// Like `next`, but does not modify the iterator state.
        pub fn peekNext(self: Self) ?Component {
            var start_index = self.end_index;
            while (start_index < self.path.len and path_type.isSep(T, self.path[start_index])) {
                start_index += 1;
            }
            var end_index = start_index;
            while (end_index < self.path.len and !path_type.isSep(T, self.path[end_index])) {
                end_index += 1;
            }
            if (start_index == end_index) return null;
            return .{
                .name = self.path[start_index..end_index],
                .path = self.path[0..end_index],
            };
        }

        /// Returns the previous component (the component to the left of the most recently
        /// returned component), or null if no such component exists.
        /// For example, if the path is `/a/b/c` and the most recently returned component
        /// is `b`, then this will return the `a` component.
        pub fn previous(self: *Self) ?Component {
            const peek_result = self.peekPrevious() orelse return null;
            self.start_index = peek_result.path.len - peek_result.name.len;
            self.end_index = peek_result.path.len;
            return peek_result;
        }

        /// Like `previous`, but does not modify the iterator state.
        pub fn peekPrevious(self: Self) ?Component {
            var end_index = self.start_index;
            while (true) {
                if (end_index == self.root_end_index) return null;
                if (!path_type.isSep(T, self.path[end_index - 1])) break;
                end_index -= 1;
            }
            var start_index = end_index;
            while (true) {
                if (start_index == self.root_end_index) break;
                if (path_type.isSep(T, self.path[start_index - 1])) break;
                start_index -= 1;
            }
            if (start_index == end_index) return null;
            return .{
                .name = self.path[start_index..end_index],
                .path = self.path[0..end_index],
            };
        }
    };
}