mirror of
https://github.com/voltrevo/ValueScript.git
synced 2026-04-18 03:00:27 -04:00
Improve string indexing, codePointAt
This commit is contained in:
@@ -9,7 +9,7 @@ pub fn op_sub_string(string_data: &Rc<String>, subscript: &Val) -> Val {
|
||||
let method_str = method.as_str();
|
||||
|
||||
return match method_str {
|
||||
"length" => Val::Number(string_data.len() as f64),
|
||||
"length" => Val::Number(string_data.as_bytes().len() as f64),
|
||||
_ => get_string_method(method_str),
|
||||
};
|
||||
}
|
||||
@@ -22,7 +22,10 @@ pub fn op_sub_string(string_data: &Rc<String>, subscript: &Val) -> Val {
|
||||
return Val::Undefined;
|
||||
}
|
||||
|
||||
string_from_byte(string_bytes[right_index])
|
||||
match unicode_at(string_bytes, right_index) {
|
||||
Some(string) => Val::String(Rc::new(string)),
|
||||
None => Val::String(Rc::new(String::from(""))),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn get_string_method(method: &str) -> Val {
|
||||
@@ -30,6 +33,7 @@ pub fn get_string_method(method: &str) -> Val {
|
||||
"at" => Val::Static(&AT),
|
||||
"charAt" => Val::Static(&CHAR_AT),
|
||||
"charCodeAt" => Val::Static(&CHAR_CODE_AT),
|
||||
"codePointAt" => Val::Static(&CODE_POINT_AT),
|
||||
_ => Val::Undefined,
|
||||
}
|
||||
}
|
||||
@@ -45,9 +49,10 @@ static AT: NativeFunction = NativeFunction {
|
||||
Some(i) => i,
|
||||
};
|
||||
|
||||
let byte = string_bytes[index];
|
||||
|
||||
Val::String(Rc::new(String::from_utf8_lossy(&[byte]).into_owned()))
|
||||
match unicode_at(string_bytes, index) {
|
||||
Some(string) => Val::String(Rc::new(string)),
|
||||
None => Val::String(Rc::new(String::from(""))),
|
||||
}
|
||||
}
|
||||
_ => std::panic!("Not implemented: exceptions/string indirection"),
|
||||
}
|
||||
@@ -58,7 +63,10 @@ static CHAR_AT: NativeFunction = NativeFunction {
|
||||
fn_: |this: &mut Val, params: Vec<Val>| -> Val {
|
||||
match this {
|
||||
Val::String(string_data) => match byte_at(string_data, params.get(0)) {
|
||||
Some(byte) => string_from_byte(byte),
|
||||
Some(byte) => match std::char::from_u32(byte as u32) {
|
||||
Some(c) => Val::String(Rc::new(c.to_string())),
|
||||
None => Val::String(Rc::new(String::from(""))),
|
||||
},
|
||||
None => Val::String(Rc::new(String::from(""))),
|
||||
},
|
||||
_ => std::panic!("Not implemented: exceptions/string indirection"),
|
||||
@@ -78,6 +86,73 @@ static CHAR_CODE_AT: NativeFunction = NativeFunction {
|
||||
},
|
||||
};
|
||||
|
||||
static CODE_POINT_AT: NativeFunction = NativeFunction {
|
||||
fn_: |this: &mut Val, params: Vec<Val>| -> Val {
|
||||
match this {
|
||||
Val::String(string_data) => {
|
||||
let string_bytes = string_data.as_bytes();
|
||||
|
||||
let index = match params.get(0) {
|
||||
Some(i) => match i.to_index() {
|
||||
None => return Val::Undefined,
|
||||
Some(i) => i,
|
||||
},
|
||||
_ => return Val::Undefined,
|
||||
};
|
||||
|
||||
match code_point_at(string_bytes, index) {
|
||||
Some(code_point) => Val::Number(code_point as f64),
|
||||
None => Val::Undefined,
|
||||
}
|
||||
}
|
||||
_ => std::panic!("Not implemented: exceptions/string indirection"),
|
||||
}
|
||||
},
|
||||
};
|
||||
|
||||
fn unicode_at(bytes: &[u8], index: usize) -> Option<String> {
|
||||
match code_point_at(bytes, index) {
|
||||
Some(code_point) => Some(
|
||||
std::char::from_u32(code_point)
|
||||
.expect("Invalid code point") // TODO: Find out if this is reachable and what to do about it
|
||||
.to_string(),
|
||||
),
|
||||
None => None,
|
||||
}
|
||||
}
|
||||
|
||||
fn code_point_at(bytes: &[u8], index: usize) -> Option<u32> {
|
||||
if index >= bytes.len() {
|
||||
return None;
|
||||
}
|
||||
|
||||
let byte = bytes[index];
|
||||
|
||||
let leading_ones = byte.leading_ones() as usize;
|
||||
|
||||
if leading_ones == 0 {
|
||||
return Some(byte as u32);
|
||||
}
|
||||
|
||||
if leading_ones == 1 || leading_ones > 4 || index + leading_ones > bytes.len() {
|
||||
return None;
|
||||
}
|
||||
|
||||
let mut value = (byte & (0x7F >> leading_ones)) as u32;
|
||||
|
||||
for i in 1..leading_ones {
|
||||
let next_byte = bytes[index + i];
|
||||
|
||||
if next_byte.leading_ones() != 1 {
|
||||
return None;
|
||||
}
|
||||
|
||||
value = (value << 6) | (next_byte & 0x3F) as u32;
|
||||
}
|
||||
|
||||
Some(value)
|
||||
}
|
||||
|
||||
fn byte_at(string: &String, index_param: Option<&Val>) -> Option<u8> {
|
||||
let mut index = match index_param {
|
||||
Some(i) => i.to_number(),
|
||||
@@ -104,12 +179,3 @@ fn byte_at(string: &String, index_param: Option<&Val>) -> Option<u8> {
|
||||
|
||||
Some(string_bytes[index_usize])
|
||||
}
|
||||
|
||||
fn string_from_byte(byte: u8) -> Val {
|
||||
// TODO: Val::Strings need to change to not use rust's string type,
|
||||
// because they need to represent an actual byte array underneath. This
|
||||
// occurs for invalid utf8 sequences which are getting converted to U+FFFD
|
||||
// here. To be analogous to js, the information of the actual byte needs
|
||||
// to be preserved, but that can't be represented in rust's string type.
|
||||
Val::String(Rc::new(String::from_utf8_lossy(&[byte]).into_owned()))
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user