initial attempt at cellfs

This commit is contained in:
2025-11-22 19:07:22 -06:00
parent 34de9e6dc4
commit 532cfd0ed0
3 changed files with 639 additions and 1 deletions

484
scripts/cellfs.cm Normal file
View File

@@ -0,0 +1,484 @@
var cellfs = this
// CellFS: A filesystem implementation using miniz and raw OS filesystem
// Reimplements PhysFS functionality for archives and direct file access
// Internal state
var mounts = [] // Array of {path, type, handle} - type: 'zip' or 'dir'
var write_dir = null
var path_cache = {} // Cache for resolve_path results
// Helper to normalize paths (but preserve leading slash for mount points)
function normalize_path(path, preserve_leading_slash) {
if (!path) return preserve_leading_slash ? "/" : ""
var had_leading_slash = path.startsWith('/')
// Remove leading/trailing slashes and normalize
path = path.replace(/^\/+|\/+$/g, "")
// Restore leading slash if requested and it was there originally
if (preserve_leading_slash && had_leading_slash) {
path = "/" + path
}
return path
}
// Helper to check if path is absolute
function is_absolute(path) {
return path.startsWith("/")
}
// Helper to join paths
function join_paths(base, rel) {
base = base.replace(/\/+$/, "")
rel = rel.replace(/^\/+/, "")
return base + "/" + rel
}
// Helper to get directory from path
function dirname(path) {
var idx = path.lastIndexOf("/")
if (idx == -1) return ""
return path.substring(0, idx)
}
// Helper to get basename from path
function basename(path) {
var idx = path.lastIndexOf("/")
if (idx == -1) return path
return path.substring(idx + 1)
}
// Find mount point for a given path
function find_mount(path) {
for (var i = mounts.length - 1; i >= 0; i--) {
var mount = mounts[i]
if (path.startsWith(mount.path)) {
return mount
}
}
return null
}
// Resolve a virtual path to actual filesystem or archive
function resolve_path(vpath) {
var original_vpath = vpath
vpath = normalize_path(vpath)
// Check cache first
if (path_cache[vpath]) {
return path_cache[vpath]
}
if (!vpath) {
var result = {type: 'dir', path: '.', mount_path: ''}
path_cache[vpath] = result
return result
}
var mount = find_mount(vpath)
if (!mount) {
// No mount found, treat as direct filesystem access
var result = {type: 'dir', path: vpath, mount_path: ''}
path_cache[vpath] = result
return result
}
// Calculate relative path within mount
var rel_path = vpath.substring(mount.path.length)
rel_path = rel_path.replace(/^\/+/, "")
var result = {
type: mount.type,
path: rel_path,
mount_path: mount.path,
handle: mount.handle
}
path_cache[vpath] = result
return result
}
// Check if file exists
function exists(path) {
try {
stat(path)
return true
} catch (e) {
return false
}
}
// Get file stats
function stat(path) {
var resolved = resolve_path(path)
if (resolved.type == 'zip') {
// For ZIP archives, get file info from miniz
var zip = resolved.handle
if (!zip) throw new Error("Invalid ZIP handle")
var file_path = resolved.path
if (!file_path) {
// Root directory stats
return {
filesize: 0,
modtime: 0,
createtime: 0,
accesstime: 0,
isDirectory: true
}
}
try {
var mod_time = zip.mod(file_path)
// For ZIP files, we don't have full stat info, just mod time
return {
filesize: 0, // Would need to extract to get size
modtime: mod_time * 1000, // Convert to milliseconds
createtime: mod_time * 1000,
accesstime: mod_time * 1000,
isDirectory: false
}
} catch (e) {
throw new Error("File not found in archive: " + file_path)
}
} else {
// Direct filesystem access using fd
var fd_mod = use('fd')
var full_path = resolved.path
try {
var fd_stat = fd_mod.fstat(fd_mod.open(full_path, 'r'))
return {
filesize: fd_stat.size,
modtime: fd_stat.mtime,
createtime: fd_stat.ctime,
accesstime: fd_stat.atime,
isDirectory: fd_stat.isDirectory
}
} catch (e) {
throw new Error("File not found: " + full_path)
}
}
}
// Read entire file as bytes
function slurpbytes(path) {
var resolved = resolve_path(path)
if (resolved.type == 'zip') {
var zip = resolved.handle
if (!zip) throw new Error("Invalid ZIP handle")
try {
return zip.slurp(resolved.path)
} catch (e) {
throw new Error("Failed to read from archive: " + e.message)
}
} else {
// Direct filesystem access
var fd_mod = use('fd')
var fd = fd_mod.open(resolved.path, 'r')
try {
var fd_stat = fd_mod.fstat(fd)
var f = fd_mod.read(fd, fd_stat.size)
fd_mod.close(fd)
return f
} catch (e) {
throw new Error("Failed to read file: " + e.message)
}
}
}
// Read entire file as string
function slurp(path) {
var bytes = slurpbytes(path)
return bytes
// Convert bytes to string - assuming UTF-8
return String.fromCharCode.apply(null, new Uint8Array(bytes))
}
// Write data to file
function slurpwrite(data, path) {
var resolved = resolve_path(path)
if (resolved.type == 'zip') {
throw new Error("Cannot write to ZIP archives")
}
// Direct filesystem access
var fd_mod = use('fd')
var flags = resolved.path == path ? 'w' : 'w' // Overwrite
var fd = fd_mod.open(resolved.path, flags)
try {
if (typeof data == 'string') {
fd_mod.write(fd, data)
} else {
// Assume ArrayBuffer/Uint8Array
fd_mod.write(fd, data)
}
} finally {
fd_mod.close(fd)
}
}
// Mount an archive or directory
function mount(source, mount_point, prepend) {
prepend = prepend != null ? prepend : false
var miniz_mod = use('miniz')
// Try to load as ZIP first
try {
// For ZIP mounting, try to read the source file directly first
var zip_data = null
try {
var fd_mod = use('fd')
var fd = fd_mod.open(source, 'r')
// Get file size first
var fd_stat = fd_mod.fstat(fd)
// Read entire file
zip_data = fd_mod.read(fd, fd_stat.size)
fd_mod.close(fd)
} catch (e) {
// If direct read fails, try through resolve_path
zip_data = slurpbytes(source)
}
var zip = miniz_mod.read(zip_data)
// Debug: check if zip is valid
if (!zip || typeof zip.count != 'function') {
throw new Error("Invalid ZIP reader")
}
var mount_info = {
path: normalize_path(mount_point || "/", true),
type: 'zip',
handle: zip,
source: source
}
if (prepend) {
mounts.unshift(mount_info)
} else {
mounts.push(mount_info)
}
return
} catch (e) {
// Not a ZIP, treat as directory
log.console("ZIP mounting failed for " + source + ": " + e.message)
}
// Mount as directory
var mount_info = {
path: normalize_path(mount_point || "/", true),
type: 'dir',
handle: null,
source: source
}
if (prepend) {
mounts.unshift(mount_info)
} else {
mounts.push(mount_info)
}
// Clear cache since mounts changed
path_cache = {}
}
// Unmount a path
function unmount(path) {
path = normalize_path(path)
for (var i = 0; i < mounts.length; i++) {
if (mounts[i].path == path) {
mounts.splice(i, 1)
// Clear cache since mounts changed
path_cache = {}
return
}
}
throw new Error("Mount point not found: " + path)
}
// Set write directory
function writepath(path) {
write_dir = path
}
// Simple glob matching (basic implementation)
function match(pattern, str) {
// Very basic glob matching - could be enhanced
if (pattern == str) return true
if (pattern == "*") return true
if (pattern.includes("*")) {
var regex = new RegExp(pattern.replace(/\*/g, ".*"))
return regex.test(str)
}
return false
}
// Basic globfs implementation
function globfs(patterns, start_path) {
start_path = start_path || ""
var results = []
// For simplicity, just enumerate and filter
try {
var files = enumerate(start_path, true)
for (var file of files) {
for (var pattern of patterns) {
if (match(pattern, file)) {
results.push(file)
break
}
}
}
} catch (e) {
// Ignore errors
}
return results
}
// Enumerate files in directory
function enumerate(path, recurse) {
recurse = recurse != undefined ? recurse : false
var resolved = resolve_path(path)
if (resolved.type == 'zip') {
var zip = resolved.handle
if (!zip) return []
var files = []
var prefix = resolved.path ? resolved.path + "/" : ""
for (var i = 0; i < zip.count(); i++) {
var filename = zip.get_filename(i)
if (!filename) continue
if (prefix && !filename.startsWith(prefix)) continue
var rel_name = filename.substring(prefix.length)
if (!rel_name) continue
// For non-recursive, don't include subdirectories
if (!recurse && rel_name.includes("/")) continue
files.push(join_paths(path, rel_name))
}
return files
} else {
// Direct filesystem enumeration - simplified for now
// In a full implementation, would need directory reading capabilities
return []
}
}
// Check if path is directory
function is_directory(path) {
try {
var st = stat(path)
return st.isDirectory
} catch (e) {
return false
}
}
// Get mount point for path
function mountpoint(path) {
var mount = find_mount(path)
return mount ? mount.path : null
}
// Get search paths
function searchpath() {
var paths = []
for (var mount of mounts) {
paths.push(mount.path)
}
return paths
}
// File object for writing
function open(path) {
var resolved = resolve_path(path)
if (resolved.type == 'zip') {
throw new Error("Cannot open files for writing in ZIP archives")
}
var fd_mod = use('fd')
var fd = fd_mod.open(resolved.path, 'w')
return {
close: function() { fd_mod.close(fd) },
write: function(data) { fd_mod.write(fd, data) },
buffer: function(size) { /* Not implemented */ },
tell: function() { /* Not implemented */ return 0 },
eof: function() { /* Not implemented */ return false }
}
}
// Directory operations
function mkdir(path) {
var resolved = resolve_path(path)
if (resolved.type == 'zip') {
throw new Error("Cannot create directories in ZIP archives")
}
var fd_mod = use('fd')
fd_mod.mkdir(resolved.path)
}
function rm(path) {
var resolved = resolve_path(path)
if (resolved.type == 'zip') {
throw new Error("Cannot remove files from ZIP archives")
}
var fd_mod = use('fd')
fd_mod.rmdir(resolved.path) // or rm depending on type
}
// Base directory (simplified)
function basedir() {
return "."
}
// User directory (simplified)
function prefdir(org, app) {
return "./user_data"
}
// Get real directory (simplified)
function realdir(path) {
return dirname(path)
}
// Export functions
cellfs.exists = exists
cellfs.stat = stat
cellfs.slurpbytes = slurpbytes
cellfs.slurp = slurp
cellfs.slurpwrite = slurpwrite
cellfs.mount = mount
cellfs.unmount = unmount
cellfs.writepath = writepath
cellfs.match = match
cellfs.globfs = globfs
cellfs.enumerate = enumerate
cellfs.is_directory = is_directory
cellfs.mountpoint = mountpoint
cellfs.searchpath = searchpath
cellfs.open = open
cellfs.mkdir = mkdir
cellfs.rm = rm
cellfs.basedir = basedir
cellfs.prefdir = prefdir
cellfs.realdir = realdir
return cellfs

View File

@@ -101,7 +101,7 @@ static JSValue js_miniz_compress(JSContext *js, JSValue this_val,
if (!cstring) if (!cstring)
return JS_EXCEPTION; return JS_EXCEPTION;
in_ptr = cstring; in_ptr = cstring;
} else { /* assume ArrayBuffer / TypedArray */ } else {
in_ptr = js_get_blob_data(js, &in_len, argv[0]); in_ptr = js_get_blob_data(js, &in_len, argv[0]);
if (!in_ptr) if (!in_ptr)
return JS_ThrowTypeError(js, return JS_ThrowTypeError(js,

154
tests/cellfs.ce Normal file
View File

@@ -0,0 +1,154 @@
// CellFS vs IO Performance Test
// Compares the speed of cellfs (miniz + fd) vs physfs-based io
var cellfs = use('cellfs')
var io = use('io')
var time = use('time')
var json = use('json')
log.console("CellFS vs IO Performance Test")
log.console("=================================")
// Test file operations
var test_file = "test.txt"
var test_content = "Hello, World! This is a test file for performance comparison.\n"
// Create test data
log.console("Creating test file...")
io.writepath('.')
// Make cellfs mirror all of io's search paths
var io_paths = io.searchpath()
for (var i = 0; i < io_paths.length; i++) {
var path = io_paths[i]
try {
// Ensure path starts with /
if (!path.startsWith('/')) {
path = '/' + path
}
cellfs.mount(path, path)
} catch (e) {
// Some paths might not be mountable, skip them
}
}
io.slurpwrite(test_content, test_file)
// Verify both systems have the same search paths
log.console(`IO search paths: ${json.encode(io.searchpath())}`)
log.console(`CellFS search paths: ${json.encode(cellfs.searchpath())}`)
log.console("Testing read operations...")
// Test io.slurpbytes
var start_time = time.number()
for (var i = 0; i < 100; i++) {
var content = io.slurpbytes(test_file)
}
var io_time = time.number() - start_time
log.console(`IO slurpbytes (100 iterations): ${io_time}ms`)
// Test cellfs.slurpbytes
start_time = time.number()
for (var i = 0; i < 100; i++) {
var content = cellfs.slurpbytes(test_file)
}
var cellfs_time = time.number() - start_time
log.console(`CellFS slurpbytes (100 iterations): ${cellfs_time}ms`)
// Compare results
var speedup = io_time / cellfs_time
log.console(`CellFS is ${speedup.toFixed(2)}x ${speedup > 1 ? "faster" : "slower"} than IO for reading`)
// Test stat operations
log.console("\nTesting stat operations...")
start_time = time.number()
for (var i = 0; i < 1000; i++) {
var stats = io.stat(test_file)
}
io_time = time.number() - start_time
log.console(`IO stat (1000 iterations): ${io_time}ms`)
start_time = time.number()
for (var i = 0; i < 1000; i++) {
var stats = cellfs.stat(test_file)
}
cellfs_time = time.number() - start_time
log.console(`CellFS stat (1000 iterations): ${cellfs_time}ms`)
speedup = io_time / cellfs_time
log.console(`CellFS is ${speedup.toFixed(2)}x ${speedup > 1 ? "faster" : "slower"} than IO for stat`)
// Test exists operations
log.console("\nTesting exists operations...")
start_time = time.number()
for (var i = 0; i < 1000; i++) {
var exists = io.exists(test_file)
}
io_time = time.number() - start_time
log.console(`IO exists (1000 iterations): ${io_time}ms`)
start_time = time.number()
for (var i = 0; i < 1000; i++) {
var exists = cellfs.exists(test_file)
}
cellfs_time = time.number() - start_time
log.console(`CellFS exists (1000 iterations): ${cellfs_time}ms`)
speedup = io_time / cellfs_time
log.console(`CellFS is ${speedup.toFixed(2)}x ${speedup > 1 ? "faster" : "slower"} than IO for exists`)
// Test ZIP archive operations
log.console("\nTesting ZIP archive operations...")
// Create a test ZIP file using miniz
var miniz = use('miniz')
var zip_writer = miniz.write("test_archive.zip")
// Use io.slurpbytes to get content as ArrayBuffer for miniz
var content_bytes = io.slurpbytes(test_file)
zip_writer.add_file("test.txt", content_bytes)
zip_writer = null // Close it
// Mount the ZIP with io
io.mount("test_archive.zip", "/test_zip")
// Mount the ZIP with cellfs
cellfs.mount("test_archive.zip", "/test_zip")
log.console("Testing ZIP file reading...")
start_time = time.number()
for (var i = 0; i < 100; i++) {
var content = io.slurp("test.txt")
}
io_time = time.number() - start_time
log.console(`IO ZIP read (100 iterations): ${io_time}ms`)
start_time = time.number()
for (var i = 0; i < 100; i++) {
var content = cellfs.slurp("test.txt")
}
cellfs_time = time.number() - start_time
log.console(`CellFS ZIP read (100 iterations): ${cellfs_time}ms`)
speedup = io_time / cellfs_time
log.console(`CellFS is ${speedup.toFixed(2)}x ${speedup > 1 ? "faster" : "slower"} than IO for ZIP reading`)
// Cleanup
//io.rm(test_file)
//io.rm("test_archive.zip")
//io.unmount("/test_zip")
//cellfs.unmount("/test_zip")
// Unmount all the paths we mounted in cellfs
for (var path of io_paths) {
try {
cellfs.unmount(path)
} catch (e) {
// Ignore unmount errors
}
}
log.console("\nTest completed!")