189 lines
4.2 KiB
Plaintext
189 lines
4.2 KiB
Plaintext
// string_processing.cm — String-heavy kernel
|
|
// Concat, split, search, replace, interning path stress.
|
|
|
|
function make_lorem(paragraphs) {
|
|
var base = "Lorem ipsum dolor sit amet consectetur adipiscing elit sed do eiusmod tempor incididunt ut labore et dolore magna aliqua Ut enim ad minim veniam quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat"
|
|
var result = ""
|
|
var i = 0
|
|
for (i = 0; i < paragraphs; i++) {
|
|
if (i > 0) result = result + " "
|
|
result = result + base
|
|
}
|
|
return result
|
|
}
|
|
|
|
// Build a lookup table from text
|
|
function build_index(txt) {
|
|
var words = array(txt, " ")
|
|
var index = {}
|
|
var i = 0
|
|
var w = null
|
|
for (i = 0; i < length(words); i++) {
|
|
w = words[i]
|
|
if (!index[w]) {
|
|
index[w] = []
|
|
}
|
|
push(index[w], i)
|
|
}
|
|
return index
|
|
}
|
|
|
|
// Levenshtein-like distance (simplified)
|
|
function edit_distance(a, b) {
|
|
var la = length(a)
|
|
var lb = length(b)
|
|
if (la == 0) return lb
|
|
if (lb == 0) return la
|
|
|
|
// Use flat array for 2 rows of DP matrix
|
|
var prev = array(lb + 1, 0)
|
|
var curr = array(lb + 1, 0)
|
|
var i = 0
|
|
var j = 0
|
|
var cost = 0
|
|
var del = 0
|
|
var ins = 0
|
|
var sub = 0
|
|
var tmp = null
|
|
var ca = array(a)
|
|
var cb = array(b)
|
|
|
|
for (j = 0; j <= lb; j++) prev[j] = j
|
|
for (i = 1; i <= la; i++) {
|
|
curr[0] = i
|
|
for (j = 1; j <= lb; j++) {
|
|
cost = ca[i - 1] == cb[j - 1] ? 0 : 1
|
|
del = prev[j] + 1
|
|
ins = curr[j - 1] + 1
|
|
sub = prev[j - 1] + cost
|
|
curr[j] = del
|
|
if (ins < curr[j]) curr[j] = ins
|
|
if (sub < curr[j]) curr[j] = sub
|
|
}
|
|
tmp = prev
|
|
prev = curr
|
|
curr = tmp
|
|
}
|
|
return prev[lb]
|
|
}
|
|
|
|
var lorem_5 = make_lorem(5)
|
|
var lorem_20 = make_lorem(20)
|
|
|
|
return {
|
|
// Split text into words and count
|
|
string_split_count: function(n) {
|
|
var i = 0
|
|
var words = null
|
|
var count = 0
|
|
for (i = 0; i < n; i++) {
|
|
words = array(lorem_5, " ")
|
|
count += length(words)
|
|
}
|
|
return count
|
|
},
|
|
|
|
// Build word index (split + hash + array ops)
|
|
string_index_build: function(n) {
|
|
var i = 0
|
|
var idx = null
|
|
for (i = 0; i < n; i++) {
|
|
idx = build_index(lorem_5)
|
|
}
|
|
return idx
|
|
},
|
|
|
|
// Search for substrings
|
|
string_search: function(n) {
|
|
var targets = ["dolor", "minim", "quis", "magna", "ipsum"]
|
|
var i = 0
|
|
var j = 0
|
|
var count = 0
|
|
for (i = 0; i < n; i++) {
|
|
for (j = 0; j < length(targets); j++) {
|
|
if (search(lorem_20, targets[j])) count++
|
|
}
|
|
}
|
|
return count
|
|
},
|
|
|
|
// Replace operations
|
|
string_replace: function(n) {
|
|
var i = 0
|
|
var result = null
|
|
for (i = 0; i < n; i++) {
|
|
result = replace(lorem_5, "dolor", "DOLOR")
|
|
result = replace(result, "ipsum", "IPSUM")
|
|
result = replace(result, "amet", "AMET")
|
|
}
|
|
return result
|
|
},
|
|
|
|
// String concatenation builder
|
|
string_builder: function(n) {
|
|
var i = 0
|
|
var j = 0
|
|
var s = null
|
|
var total = 0
|
|
for (i = 0; i < n; i++) {
|
|
s = ""
|
|
for (j = 0; j < 50; j++) {
|
|
s = s + "key=" + text(j) + "&value=" + text(j * 17) + "&"
|
|
}
|
|
total += length(s)
|
|
}
|
|
return total
|
|
},
|
|
|
|
// Edit distance (DP + array + string ops)
|
|
edit_distance: function(n) {
|
|
var words = ["kitten", "sitting", "saturday", "sunday", "intention", "execution"]
|
|
var i = 0
|
|
var j = 0
|
|
var total = 0
|
|
for (i = 0; i < n; i++) {
|
|
for (j = 0; j < length(words) - 1; j++) {
|
|
total += edit_distance(words[j], words[j + 1])
|
|
}
|
|
}
|
|
return total
|
|
},
|
|
|
|
// Upper/lower/trim chain
|
|
string_transforms: function(n) {
|
|
var src = " Hello World "
|
|
var i = 0
|
|
var x = 0
|
|
var result = null
|
|
for (i = 0; i < n; i++) {
|
|
result = trim(src)
|
|
result = upper(result)
|
|
result = lower(result)
|
|
x += length(result)
|
|
}
|
|
return x
|
|
},
|
|
|
|
// Starts_with / ends_with (interning path)
|
|
string_prefix_suffix: function(n) {
|
|
var strs = [
|
|
"application/json",
|
|
"text/html",
|
|
"image/png",
|
|
"application/xml",
|
|
"text/plain"
|
|
]
|
|
var i = 0
|
|
var j = 0
|
|
var count = 0
|
|
for (i = 0; i < n; i++) {
|
|
for (j = 0; j < length(strs); j++) {
|
|
if (starts_with(strs[j], "application/")) count++
|
|
if (ends_with(strs[j], "/json")) count++
|
|
if (starts_with(strs[j], "text/")) count++
|
|
}
|
|
}
|
|
return count
|
|
}
|
|
}
|