From 06108df3d4092b1f96c4edb0bbc35e117125bcab Mon Sep 17 00:00:00 2001
From: John Alanbrook <john@pockle.world>
Date: Sat, 31 May 2025 17:57:17 -0500
Subject: [PATCH] fix blob and http

---
 source/blob.h     |  30 ++++++++++
 source/qjs_blob.c |  64 ++++++++++++++++++--
 source/qjs_http.c | 149 +++++++++++++++++++++++++++++++++++++++++-----
 3 files changed, 222 insertions(+), 21 deletions(-)

diff --git a/source/blob.h b/source/blob.h
index 6269c8a5..f8a8706f 100644
--- a/source/blob.h
+++ b/source/blob.h
@@ -56,6 +56,7 @@ int blob_write_fit(blob *b, int64_t fit, int length);
 int blob_write_kim(blob *b, int64_t value);
 int blob_write_pad(blob *b, int block_size);
 int blob_write_text(blob *b, const char *text);
+int blob_write_text_raw(blob *b, const char *text);
 
 int blob_write_bytes(blob *b, void *data, size_t length);
 
@@ -66,6 +67,7 @@ int blob_read_dec64(const blob *b, size_t from, double *out_value);
 int blob_read_fit(const blob *b, size_t from, int length, int64_t *out_value);
 int blob_read_kim(const blob *b, size_t from, int64_t *out_value, size_t *bits_read);
 int blob_read_text(const blob *b, size_t from, char **out_text, size_t *bits_read);
+int blob_read_text_raw(const blob *b, size_t from, char **out_text, size_t *bits_read);
 int blob_pad_check(const blob *b, size_t from, int block_size);
 
 // Utility functions
@@ -388,6 +390,13 @@ int blob_write_text(blob *b, const char *text) {
   return 0;
 }
 
+int blob_write_text_raw(blob *b, const char *text) {
+  if (!b || !text || b->is_stone) return -1;
+  
+  size_t len = strlen(text);
+  return blob_write_bytes(b, (void *)text, len);
+}
+
 int blob_write_bytes(blob *b, void *data, size_t length) {
   if (!b || !data || b->is_stone) return -1;
   
@@ -538,6 +547,27 @@ int blob_read_text(const blob *b, size_t from, char **out_text, size_t *bits_rea
   return 0;
 }
 
+int blob_read_text_raw(const blob *b, size_t from, char **out_text, size_t *bits_read) {
+  if (!b || !b->is_stone || !out_text || !bits_read) return -1;
+  if (from >= b->bit_length) return -1;
+  
+  // Check that from is byte-aligned
+  if (from % 8 != 0) return -1;
+  
+  size_t from_byte = from / 8;
+  size_t length_bytes = (b->bit_length - from) / 8;
+  
+  char *str = malloc(length_bytes + 1);
+  if (!str) return -1;
+  
+  memcpy(str, b->data + from_byte, length_bytes);
+  str[length_bytes] = '\0';
+  
+  *out_text = str;
+  *bits_read = length_bytes * 8;
+  return 0;
+}
+
 int blob_pad_check(const blob *b, size_t from, int block_size) {
   if (!b || !b->is_stone) return 0;
   if (block_size <= 0) return 0;
diff --git a/source/qjs_blob.c b/source/qjs_blob.c
index eb97e25a..134ce3d2 100644
--- a/source/qjs_blob.c
+++ b/source/qjs_blob.c
@@ -289,6 +289,30 @@ static JSValue js_blob_write_text(JSContext *ctx, JSValueConst this_val,
   return JS_UNDEFINED;
 }
 
+// blob.write_text_raw(text)
+static JSValue js_blob_write_text_raw(JSContext *ctx, JSValueConst this_val,
+                                      int argc, JSValueConst *argv) {
+  if (argc < 1) {
+    return JS_ThrowTypeError(ctx, "write_text_raw(text) requires 1 argument");
+  }
+  blob *bd = js2blob(ctx, this_val);
+  if (!bd) {
+    return JS_ThrowTypeError(ctx, "write_text_raw: not called on a blob");
+  }
+  
+  const char *str = JS_ToCString(ctx, argv[0]);
+  if (!str) return JS_EXCEPTION;
+  
+  int result = blob_write_text_raw(bd, str);
+  JS_FreeCString(ctx, str);
+  
+  if (result < 0) {
+    return JS_ThrowTypeError(ctx, "write_text_raw: cannot write to stone blob or OOM");
+  }
+  
+  return JS_UNDEFINED;
+}
+
 // blob.read_logical(from)
 static JSValue js_blob_read_logical(JSContext *ctx, JSValueConst this_val,
                                     int argc, JSValueConst *argv) {
@@ -433,9 +457,6 @@ static JSValue js_blob_read_kim(JSContext *ctx, JSValueConst this_val,
 // blob.read_text(from)
 static JSValue js_blob_read_text(JSContext *ctx, JSValueConst this_val,
                                  int argc, JSValueConst *argv) {
-  if (argc < 1) {
-    return JS_ThrowTypeError(ctx, "read_text(from) requires 1 argument");
-  }
   blob *bd = js2blob(ctx, this_val);
   if (!bd) {
     return JS_ThrowTypeError(ctx, "read_text: not called on a blob");
@@ -464,6 +485,39 @@ static JSValue js_blob_read_text(JSContext *ctx, JSValueConst this_val,
   return obj;
 }
 
+// blob.read_text_raw(from)
+static JSValue js_blob_read_text_raw(JSContext *ctx, JSValueConst this_val,
+                                     int argc, JSValueConst *argv) {
+  blob *bd = js2blob(ctx, this_val);
+  if (!bd) {
+    return JS_ThrowTypeError(ctx, "read_text_raw: not called on a blob");
+  }
+  
+  if (!bd->is_stone) {
+    return JS_ThrowTypeError(ctx, "read_text_raw: blob must be stone");
+  }
+  
+  int64_t from = 0;
+  if (argc >= 1) {
+    if (JS_ToInt64(ctx, &from, argv[0]) < 0) return JS_EXCEPTION;
+  }
+  
+  char *text;
+  size_t bits_read;
+  if (blob_read_text_raw(bd, from, &text, &bits_read) < 0) {
+    return JS_ThrowRangeError(ctx, "read_text_raw: out of range or not byte-aligned");
+  }
+  
+  JSValue result = JS_NewString(ctx, text);
+  free(text);
+  
+  // Return object with text and total bits read
+  JSValue obj = JS_NewObject(ctx);
+  JS_SetPropertyStr(ctx, obj, "text", result);
+  JS_SetPropertyStr(ctx, obj, "bits_read", JS_NewInt64(ctx, bits_read));
+  return obj;
+}
+
 // blob.pad?(from, block_size)
 static JSValue js_blob_pad_q(JSContext *ctx, JSValueConst this_val,
                              int argc, JSValueConst *argv) {
@@ -557,7 +611,7 @@ static const JSCFunctionListEntry js_blob_funcs[] = {
     JS_CFUNC_DEF("write_fit", 2, js_blob_write_fit),
     JS_CFUNC_DEF("write_kim", 1, js_blob_write_kim),
     JS_CFUNC_DEF("write_pad", 1, js_blob_write_pad),
-    JS_CFUNC_DEF("write_text", 1, js_blob_write_text),
+    JS_CFUNC_DEF("write_text", 1, js_blob_write_text_raw),
     
     // Read methods
     JS_CFUNC_DEF("read_logical", 1, js_blob_read_logical),
@@ -565,7 +619,7 @@ static const JSCFunctionListEntry js_blob_funcs[] = {
     JS_CFUNC_DEF("read_dec64", 1, js_blob_read_dec64),
     JS_CFUNC_DEF("read_fit", 2, js_blob_read_fit),
     JS_CFUNC_DEF("read_kim", 1, js_blob_read_kim),
-    JS_CFUNC_DEF("read_text", 1, js_blob_read_text),
+    JS_CFUNC_DEF("read_text", 1, js_blob_read_text_raw),
     JS_CFUNC_DEF("pad?", 2, js_blob_pad_q),
     
     // Other methods
diff --git a/source/qjs_http.c b/source/qjs_http.c
index 0a7a19e7..354f37e7 100644
--- a/source/qjs_http.c
+++ b/source/qjs_http.c
@@ -254,16 +254,79 @@ exit:
     return ret;
 }
 
-// Extract body from HTTP response
-static char *extract_body(const char *response, size_t response_len, size_t *body_len) {
-    const char *body_start = strstr(response, "\r\n\r\n");
-    if (!body_start) {
-        return NULL;
+
+/* ---------------------------------------------------------------------
+   Very small, non-streaming de-chunker.
+   Removes the hex size lines and keeps the actual data.
+   Returns 0 on success, -1 on format errors.
+   -------------------------------------------------------------------*/
+static int decode_chunked(buffer_t *out,
+                          const char *src, size_t src_len)
+{
+  printf("[CHUNK] Starting decode_chunked, input len: %zu\n", src_len);
+  int chunk_count = 0;
+  
+  while (src_len) {
+    /* read hex size line */
+    size_t n = 0;
+    while (n < src_len && src[n] != '\r') { n++; }
+    if (n == src_len || n + 1 >= src_len || src[n+1] != '\n') {
+        printf("[CHUNK] Error: incomplete chunk size line\n");
+        return -1;
+    }
+
+    char size_buf[32];
+    if (n >= sizeof(size_buf)) {
+        printf("[CHUNK] Error: chunk size line too long (%zu)\n", n);
+        return -1;
+    }
+    memcpy(size_buf, src, n);  size_buf[n] = 0;
+
+    char *endptr;
+    long chunk_len = strtol(size_buf, &endptr, 16);
+    if (endptr == size_buf || chunk_len < 0) {
+        printf("[CHUNK] Error: invalid chunk size '%s'\n", size_buf);
+        return -1;
     }
-    body_start += 4;
     
-    *body_len = response_len - (body_start - response);
-    return (char *)body_start;
+    printf("[CHUNK] Chunk %d: size=%ld (0x%lx)\n", chunk_count++, chunk_len, chunk_len);
+    
+    if (chunk_len == 0) {
+        printf("[CHUNK] Found terminating chunk, done\n");
+        return 0;           /* done */
+    }
+
+    /* skip "<hex>\r\n" */
+    src      += n + 2;
+    src_len  -= n + 2;
+
+    if ((size_t)chunk_len > src_len) {
+        printf("[CHUNK] Error: chunk size %ld exceeds remaining data %zu\n", chunk_len, src_len);
+        return -1;
+    }
+    if (buffer_append(out, src, chunk_len) < 0) return -1;
+
+    /* skip chunk data */
+    src      += chunk_len;
+    src_len  -= chunk_len;
+    
+    /* skip trailing \r\n after chunk data */
+    if (src_len < 2 || src[0] != '\r' || src[1] != '\n') {
+        printf("[CHUNK] Error: missing CRLF after chunk data (remaining=%zu)\n", src_len);
+        if (src_len > 0) {
+            printf("[CHUNK] Next bytes: ");
+            for (size_t i = 0; i < 10 && i < src_len; i++) {
+                printf("\\x%02x", (unsigned char)src[i]);
+            }
+            printf("\n");
+        }
+        return -1;
+    }
+    src      += 2;
+    src_len  -= 2;
+  }
+  printf("[CHUNK] Finished without terminating chunk\n");
+  return 0;
 }
 
 // Parse HTTP headers
@@ -790,22 +853,76 @@ static JSValue js_fetch(JSContext *ctx, JSValueConst this_val, int argc, JSValue
     }
     
     if (ret == 0 && response_buf.data) {
-        // Extract body
-        size_t body_len;
-        char *body = extract_body(response_buf.data, response_buf.size, &body_len);
-        
-        if (body) {
-            // Return body as ArrayBuffer
-            result = js_new_blob_stoned_copy(ctx, (uint8_t *)body, body_len);
-        } else {
+        // Split headers / body
+        char *body = strstr(response_buf.data, "\r\n\r\n");
+        if (!body) {
             result = JS_ThrowInternalError(ctx, "Failed to parse HTTP response");
+            goto cleanup;
         }
+        size_t header_len = body + 4 - response_buf.data;
+        size_t body_len = response_buf.size - header_len;
+        body += 4;
+
+        // Look for "Transfer-Encoding: chunked"
+        int is_chunked = (strstr(response_buf.data, "Transfer-Encoding: chunked") ||
+                          strstr(response_buf.data, "transfer-encoding: chunked"));
+
+        printf("[HTTP] Total response size: %zu bytes\n", response_buf.size);
+        printf("[HTTP] Header length: %zu bytes\n", header_len);
+        printf("[HTTP] Body length before decoding: %zu bytes\n", body_len);
+        printf("[HTTP] Is chunked: %s\n", is_chunked ? "yes" : "no");
+        
+        // Print first 100 chars of body before decoding
+        printf("[HTTP] First 100 chars of raw body: ");
+        for (size_t i = 0; i < 100 && i < body_len; i++) {
+            if (body[i] >= 32 && body[i] <= 126) {
+                printf("%c", body[i]);
+            } else {
+                printf("\\x%02x", (unsigned char)body[i]);
+            }
+        }
+        printf("\n");
+
+        buffer_t clean_body;
+        buffer_init(&clean_body);
+
+        if (is_chunked) {
+            if (decode_chunked(&clean_body, body, body_len) < 0) {
+                buffer_free(&clean_body);
+                result = JS_ThrowInternalError(ctx, "Failed to decode chunked response");
+                goto cleanup;
+            }
+            body = clean_body.data;
+            body_len = clean_body.size;
+        } else {
+            // not chunked → just copy
+            buffer_append(&clean_body, body, body_len);
+            body = clean_body.data;
+            body_len = clean_body.size;
+        }
+        
+        printf("[HTTP] Body length after decoding: %zu bytes\n", body_len);
+        printf("[HTTP] First 100 chars of decoded body: ");
+        for (size_t i = 0; i < 100 && i < body_len; i++) {
+            if (body[i] >= 32 && body[i] <= 126) {
+                printf("%c", body[i]);
+            } else {
+                printf("\\x%02x", (unsigned char)body[i]);
+            }
+        }
+        printf("\n");
+
+        // build blob from *body / body_len* exactly as before
+        result = js_new_blob_stoned_copy(ctx, (uint8_t *)body, body_len);
+        
+        buffer_free(&clean_body);
     } else {
         char error_buf[256];
         mbedtls_strerror(ret, error_buf, sizeof(error_buf));
         result = JS_ThrowInternalError(ctx, "Request failed: %s", error_buf);
     }
     
+cleanup:
     // Cleanup
     JS_FreeCString(ctx, url);
     free(host);