mirror of
https://github.com/tlsnotary/tlsn-utils.git
synced 2026-01-08 04:13:59 -05:00
migrate spansy to tlsn-utils
This commit is contained in:
@@ -1,7 +1,11 @@
|
|||||||
[workspace]
|
[workspace]
|
||||||
members = ["utils", "utils-aio"]
|
members = ["utils", "utils-aio", "spansy"]
|
||||||
|
|
||||||
[workspace.dependencies]
|
[workspace.dependencies]
|
||||||
|
tlsn-utils = { path = "utils" }
|
||||||
|
tlsn-utils-aio = { path = "utils-aio" }
|
||||||
|
spansy = { path = "spansy" }
|
||||||
|
|
||||||
rand = "0.8"
|
rand = "0.8"
|
||||||
thiserror = "1"
|
thiserror = "1"
|
||||||
async-trait = "0.1"
|
async-trait = "0.1"
|
||||||
|
|||||||
176
LICENSE-APACHE
Normal file
176
LICENSE-APACHE
Normal file
@@ -0,0 +1,176 @@
|
|||||||
|
Apache License
|
||||||
|
Version 2.0, January 2004
|
||||||
|
http://www.apache.org/licenses/
|
||||||
|
|
||||||
|
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
||||||
|
|
||||||
|
1. Definitions.
|
||||||
|
|
||||||
|
"License" shall mean the terms and conditions for use, reproduction,
|
||||||
|
and distribution as defined by Sections 1 through 9 of this document.
|
||||||
|
|
||||||
|
"Licensor" shall mean the copyright owner or entity authorized by
|
||||||
|
the copyright owner that is granting the License.
|
||||||
|
|
||||||
|
"Legal Entity" shall mean the union of the acting entity and all
|
||||||
|
other entities that control, are controlled by, or are under common
|
||||||
|
control with that entity. For the purposes of this definition,
|
||||||
|
"control" means (i) the power, direct or indirect, to cause the
|
||||||
|
direction or management of such entity, whether by contract or
|
||||||
|
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
||||||
|
outstanding shares, or (iii) beneficial ownership of such entity.
|
||||||
|
|
||||||
|
"You" (or "Your") shall mean an individual or Legal Entity
|
||||||
|
exercising permissions granted by this License.
|
||||||
|
|
||||||
|
"Source" form shall mean the preferred form for making modifications,
|
||||||
|
including but not limited to software source code, documentation
|
||||||
|
source, and configuration files.
|
||||||
|
|
||||||
|
"Object" form shall mean any form resulting from mechanical
|
||||||
|
transformation or translation of a Source form, including but
|
||||||
|
not limited to compiled object code, generated documentation,
|
||||||
|
and conversions to other media types.
|
||||||
|
|
||||||
|
"Work" shall mean the work of authorship, whether in Source or
|
||||||
|
Object form, made available under the License, as indicated by a
|
||||||
|
copyright notice that is included in or attached to the work
|
||||||
|
(an example is provided in the Appendix below).
|
||||||
|
|
||||||
|
"Derivative Works" shall mean any work, whether in Source or Object
|
||||||
|
form, that is based on (or derived from) the Work and for which the
|
||||||
|
editorial revisions, annotations, elaborations, or other modifications
|
||||||
|
represent, as a whole, an original work of authorship. For the purposes
|
||||||
|
of this License, Derivative Works shall not include works that remain
|
||||||
|
separable from, or merely link (or bind by name) to the interfaces of,
|
||||||
|
the Work and Derivative Works thereof.
|
||||||
|
|
||||||
|
"Contribution" shall mean any work of authorship, including
|
||||||
|
the original version of the Work and any modifications or additions
|
||||||
|
to that Work or Derivative Works thereof, that is intentionally
|
||||||
|
submitted to Licensor for inclusion in the Work by the copyright owner
|
||||||
|
or by an individual or Legal Entity authorized to submit on behalf of
|
||||||
|
the copyright owner. For the purposes of this definition, "submitted"
|
||||||
|
means any form of electronic, verbal, or written communication sent
|
||||||
|
to the Licensor or its representatives, including but not limited to
|
||||||
|
communication on electronic mailing lists, source code control systems,
|
||||||
|
and issue tracking systems that are managed by, or on behalf of, the
|
||||||
|
Licensor for the purpose of discussing and improving the Work, but
|
||||||
|
excluding communication that is conspicuously marked or otherwise
|
||||||
|
designated in writing by the copyright owner as "Not a Contribution."
|
||||||
|
|
||||||
|
"Contributor" shall mean Licensor and any individual or Legal Entity
|
||||||
|
on behalf of whom a Contribution has been received by Licensor and
|
||||||
|
subsequently incorporated within the Work.
|
||||||
|
|
||||||
|
2. Grant of Copyright License. Subject to the terms and conditions of
|
||||||
|
this License, each Contributor hereby grants to You a perpetual,
|
||||||
|
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||||
|
copyright license to reproduce, prepare Derivative Works of,
|
||||||
|
publicly display, publicly perform, sublicense, and distribute the
|
||||||
|
Work and such Derivative Works in Source or Object form.
|
||||||
|
|
||||||
|
3. Grant of Patent License. Subject to the terms and conditions of
|
||||||
|
this License, each Contributor hereby grants to You a perpetual,
|
||||||
|
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||||
|
(except as stated in this section) patent license to make, have made,
|
||||||
|
use, offer to sell, sell, import, and otherwise transfer the Work,
|
||||||
|
where such license applies only to those patent claims licensable
|
||||||
|
by such Contributor that are necessarily infringed by their
|
||||||
|
Contribution(s) alone or by combination of their Contribution(s)
|
||||||
|
with the Work to which such Contribution(s) was submitted. If You
|
||||||
|
institute patent litigation against any entity (including a
|
||||||
|
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
||||||
|
or a Contribution incorporated within the Work constitutes direct
|
||||||
|
or contributory patent infringement, then any patent licenses
|
||||||
|
granted to You under this License for that Work shall terminate
|
||||||
|
as of the date such litigation is filed.
|
||||||
|
|
||||||
|
4. Redistribution. You may reproduce and distribute copies of the
|
||||||
|
Work or Derivative Works thereof in any medium, with or without
|
||||||
|
modifications, and in Source or Object form, provided that You
|
||||||
|
meet the following conditions:
|
||||||
|
|
||||||
|
(a) You must give any other recipients of the Work or
|
||||||
|
Derivative Works a copy of this License; and
|
||||||
|
|
||||||
|
(b) You must cause any modified files to carry prominent notices
|
||||||
|
stating that You changed the files; and
|
||||||
|
|
||||||
|
(c) You must retain, in the Source form of any Derivative Works
|
||||||
|
that You distribute, all copyright, patent, trademark, and
|
||||||
|
attribution notices from the Source form of the Work,
|
||||||
|
excluding those notices that do not pertain to any part of
|
||||||
|
the Derivative Works; and
|
||||||
|
|
||||||
|
(d) If the Work includes a "NOTICE" text file as part of its
|
||||||
|
distribution, then any Derivative Works that You distribute must
|
||||||
|
include a readable copy of the attribution notices contained
|
||||||
|
within such NOTICE file, excluding those notices that do not
|
||||||
|
pertain to any part of the Derivative Works, in at least one
|
||||||
|
of the following places: within a NOTICE text file distributed
|
||||||
|
as part of the Derivative Works; within the Source form or
|
||||||
|
documentation, if provided along with the Derivative Works; or,
|
||||||
|
within a display generated by the Derivative Works, if and
|
||||||
|
wherever such third-party notices normally appear. The contents
|
||||||
|
of the NOTICE file are for informational purposes only and
|
||||||
|
do not modify the License. You may add Your own attribution
|
||||||
|
notices within Derivative Works that You distribute, alongside
|
||||||
|
or as an addendum to the NOTICE text from the Work, provided
|
||||||
|
that such additional attribution notices cannot be construed
|
||||||
|
as modifying the License.
|
||||||
|
|
||||||
|
You may add Your own copyright statement to Your modifications and
|
||||||
|
may provide additional or different license terms and conditions
|
||||||
|
for use, reproduction, or distribution of Your modifications, or
|
||||||
|
for any such Derivative Works as a whole, provided Your use,
|
||||||
|
reproduction, and distribution of the Work otherwise complies with
|
||||||
|
the conditions stated in this License.
|
||||||
|
|
||||||
|
5. Submission of Contributions. Unless You explicitly state otherwise,
|
||||||
|
any Contribution intentionally submitted for inclusion in the Work
|
||||||
|
by You to the Licensor shall be under the terms and conditions of
|
||||||
|
this License, without any additional terms or conditions.
|
||||||
|
Notwithstanding the above, nothing herein shall supersede or modify
|
||||||
|
the terms of any separate license agreement you may have executed
|
||||||
|
with Licensor regarding such Contributions.
|
||||||
|
|
||||||
|
6. Trademarks. This License does not grant permission to use the trade
|
||||||
|
names, trademarks, service marks, or product names of the Licensor,
|
||||||
|
except as required for reasonable and customary use in describing the
|
||||||
|
origin of the Work and reproducing the content of the NOTICE file.
|
||||||
|
|
||||||
|
7. Disclaimer of Warranty. Unless required by applicable law or
|
||||||
|
agreed to in writing, Licensor provides the Work (and each
|
||||||
|
Contributor provides its Contributions) on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||||
|
implied, including, without limitation, any warranties or conditions
|
||||||
|
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
||||||
|
PARTICULAR PURPOSE. You are solely responsible for determining the
|
||||||
|
appropriateness of using or redistributing the Work and assume any
|
||||||
|
risks associated with Your exercise of permissions under this License.
|
||||||
|
|
||||||
|
8. Limitation of Liability. In no event and under no legal theory,
|
||||||
|
whether in tort (including negligence), contract, or otherwise,
|
||||||
|
unless required by applicable law (such as deliberate and grossly
|
||||||
|
negligent acts) or agreed to in writing, shall any Contributor be
|
||||||
|
liable to You for damages, including any direct, indirect, special,
|
||||||
|
incidental, or consequential damages of any character arising as a
|
||||||
|
result of this License or out of the use or inability to use the
|
||||||
|
Work (including but not limited to damages for loss of goodwill,
|
||||||
|
work stoppage, computer failure or malfunction, or any and all
|
||||||
|
other commercial damages or losses), even if such Contributor
|
||||||
|
has been advised of the possibility of such damages.
|
||||||
|
|
||||||
|
9. Accepting Warranty or Additional Liability. While redistributing
|
||||||
|
the Work or Derivative Works thereof, You may choose to offer,
|
||||||
|
and charge a fee for, acceptance of support, warranty, indemnity,
|
||||||
|
or other liability obligations and/or rights consistent with this
|
||||||
|
License. However, in accepting such obligations, You may act only
|
||||||
|
on Your own behalf and on Your sole responsibility, not on behalf
|
||||||
|
of any other Contributor, and only if You agree to indemnify,
|
||||||
|
defend, and hold each Contributor harmless for any liability
|
||||||
|
incurred by, or claims asserted against, such Contributor by reason
|
||||||
|
of your accepting any such warranty or additional liability.
|
||||||
|
|
||||||
|
END OF TERMS AND CONDITIONS
|
||||||
21
LICENSE-MIT
Normal file
21
LICENSE-MIT
Normal file
@@ -0,0 +1,21 @@
|
|||||||
|
MIT License
|
||||||
|
|
||||||
|
Copyright (c) 2023 sinu <65924192+sinui0@users.noreply.github.com>
|
||||||
|
|
||||||
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
of this software and associated documentation files (the "Software"), to deal
|
||||||
|
in the Software without restriction, including without limitation the rights
|
||||||
|
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
copies of the Software, and to permit persons to whom the Software is
|
||||||
|
furnished to do so, subject to the following conditions:
|
||||||
|
|
||||||
|
The above copyright notice and this permission notice shall be included in all
|
||||||
|
copies or substantial portions of the Software.
|
||||||
|
|
||||||
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||||
|
SOFTWARE.
|
||||||
4
spansy/.gitignore
vendored
Normal file
4
spansy/.gitignore
vendored
Normal file
@@ -0,0 +1,4 @@
|
|||||||
|
/target
|
||||||
|
.cargo-ok
|
||||||
|
.DS_Store
|
||||||
|
Cargo.lock
|
||||||
7
spansy/CHANGELOG.md
Normal file
7
spansy/CHANGELOG.md
Normal file
@@ -0,0 +1,7 @@
|
|||||||
|
# Changelog
|
||||||
|
All notable changes to this project will be documented in this file.
|
||||||
|
|
||||||
|
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
|
||||||
|
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
||||||
|
|
||||||
|
## [Unreleased]
|
||||||
22
spansy/Cargo.toml
Normal file
22
spansy/Cargo.toml
Normal file
@@ -0,0 +1,22 @@
|
|||||||
|
[package]
|
||||||
|
name = "spansy"
|
||||||
|
version = "0.1.0"
|
||||||
|
edition = "2021"
|
||||||
|
description = "Parsing with span information"
|
||||||
|
repository = "https://github.com/tlsnotary/tlsn-utils"
|
||||||
|
license = "MIT OR Apache-2.0"
|
||||||
|
|
||||||
|
[features]
|
||||||
|
default = []
|
||||||
|
serde = ["dep:serde", "bytes/serde"]
|
||||||
|
|
||||||
|
[dependencies]
|
||||||
|
tlsn-utils.workspace = true
|
||||||
|
|
||||||
|
bytes.workspace = true
|
||||||
|
serde = { workspace = true, features = ["derive"], optional = true }
|
||||||
|
thiserror.workspace = true
|
||||||
|
|
||||||
|
httparse = "1.8"
|
||||||
|
pest = { version = "2.7" }
|
||||||
|
pest_derive = { version = "2.7" }
|
||||||
3
spansy/README.md
Normal file
3
spansy/README.md
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
# spansy
|
||||||
|
|
||||||
|
Crate for parsing span information from common formats such as HTTP and JSON.
|
||||||
51
spansy/src/helpers.rs
Normal file
51
spansy/src/helpers.rs
Normal file
@@ -0,0 +1,51 @@
|
|||||||
|
use std::ops::Range;
|
||||||
|
|
||||||
|
/// Returns the range within the source string corresponding to the span.
|
||||||
|
///
|
||||||
|
/// # Panics
|
||||||
|
///
|
||||||
|
/// Panics if the span is not within the source string.
|
||||||
|
pub(crate) fn get_span_range(src: &[u8], span: &[u8]) -> Range<usize> {
|
||||||
|
let src_start = src.as_ptr() as usize;
|
||||||
|
let src_end = src_start + src.len();
|
||||||
|
let span_start = span.as_ptr() as usize;
|
||||||
|
let span_end = span_start + span.len();
|
||||||
|
|
||||||
|
assert!(
|
||||||
|
span_start >= src_start && span_end <= src_end,
|
||||||
|
"span is not within source string: src={src_start}..{src_end}, span={span_start}..{span_end}"
|
||||||
|
);
|
||||||
|
|
||||||
|
span_start - src_start..span_end - src_start
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::*;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_get_span_range() {
|
||||||
|
let src = b"foobar";
|
||||||
|
|
||||||
|
assert_eq!(get_span_range(src, &src[..]), 0..src.len());
|
||||||
|
assert_eq!(get_span_range(src, &src[0..1]), 0..1);
|
||||||
|
assert_eq!(get_span_range(src, &src[1..2]), 1..2);
|
||||||
|
assert_eq!(get_span_range(src, &src[3..6]), 3..6);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
#[should_panic]
|
||||||
|
fn test_get_span_range_outside_src_begin() {
|
||||||
|
let src = b"foobar";
|
||||||
|
|
||||||
|
get_span_range(&src[1..3], &src[..3]);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
#[should_panic]
|
||||||
|
fn test_get_span_range_outside_src_end() {
|
||||||
|
let src = b"foobar";
|
||||||
|
|
||||||
|
get_span_range(&src[1..3], &src[2..]);
|
||||||
|
}
|
||||||
|
}
|
||||||
251
spansy/src/http/mod.rs
Normal file
251
spansy/src/http/mod.rs
Normal file
@@ -0,0 +1,251 @@
|
|||||||
|
//! HTTP span parsing.
|
||||||
|
|
||||||
|
mod span;
|
||||||
|
mod types;
|
||||||
|
|
||||||
|
use bytes::Bytes;
|
||||||
|
|
||||||
|
pub use span::{parse_request, parse_response};
|
||||||
|
pub use types::{
|
||||||
|
Body, Code, Header, HeaderName, HeaderValue, Method, Reason, Request, RequestLine, Response,
|
||||||
|
Status, Target,
|
||||||
|
};
|
||||||
|
|
||||||
|
use crate::ParseError;
|
||||||
|
|
||||||
|
use self::span::{parse_request_from_bytes, parse_response_from_bytes};
|
||||||
|
/// An iterator yielding parsed HTTP requests.
|
||||||
|
#[derive(Debug)]
|
||||||
|
pub struct Requests {
|
||||||
|
src: Bytes,
|
||||||
|
/// The current position in the source string.
|
||||||
|
pos: usize,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Requests {
|
||||||
|
/// Returns a new `Requests` iterator.
|
||||||
|
pub fn new(src: Bytes) -> Self {
|
||||||
|
Self { src, pos: 0 }
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns a new `Requests` iterator.
|
||||||
|
pub fn new_from_slice(src: &[u8]) -> Self {
|
||||||
|
Self {
|
||||||
|
src: Bytes::copy_from_slice(src),
|
||||||
|
pos: 0,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Iterator for Requests {
|
||||||
|
type Item = Result<Request, ParseError>;
|
||||||
|
|
||||||
|
fn next(&mut self) -> Option<Self::Item> {
|
||||||
|
if self.pos >= self.src.len() {
|
||||||
|
None
|
||||||
|
} else {
|
||||||
|
Some(parse_request_from_bytes(&self.src, self.pos).map(|req| {
|
||||||
|
self.pos += req.span.len();
|
||||||
|
req
|
||||||
|
}))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// An iterator yielding parsed HTTP responses.
|
||||||
|
#[derive(Debug)]
|
||||||
|
pub struct Responses {
|
||||||
|
src: Bytes,
|
||||||
|
/// The current position in the source string.
|
||||||
|
pos: usize,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Responses {
|
||||||
|
/// Returns a new `Responses` iterator.
|
||||||
|
pub fn new(src: Bytes) -> Self {
|
||||||
|
Self { src, pos: 0 }
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns a new `Responses` iterator.
|
||||||
|
pub fn new_from_slice(src: &[u8]) -> Self {
|
||||||
|
Self {
|
||||||
|
src: Bytes::copy_from_slice(src),
|
||||||
|
pos: 0,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Iterator for Responses {
|
||||||
|
type Item = Result<Response, ParseError>;
|
||||||
|
|
||||||
|
fn next(&mut self) -> Option<Self::Item> {
|
||||||
|
if self.pos >= self.src.len() {
|
||||||
|
None
|
||||||
|
} else {
|
||||||
|
Some(parse_response_from_bytes(&self.src, self.pos).map(|resp| {
|
||||||
|
self.pos += resp.span.len();
|
||||||
|
resp
|
||||||
|
}))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use crate::Spanned;
|
||||||
|
|
||||||
|
use super::*;
|
||||||
|
|
||||||
|
const MULTIPLE_REQUESTS: &[u8] = b"GET / HTTP/1.1\r\nHost: localhost\r\n\r\n\
|
||||||
|
POST /hello HTTP/1.1\r\nHost: localhost\r\nContent-Length: 14\r\n\r\n\
|
||||||
|
Hello, world!\n";
|
||||||
|
|
||||||
|
const MULTIPLE_RESPONSES: &[u8] = b"HTTP/1.1 200 OK\r\nContent-Length: 0\r\n\r\n\
|
||||||
|
HTTP/1.1 200 OK\r\nContent-Length: 14\r\n\r\nHello, world!\n\
|
||||||
|
HTTP/1.1 204 OK\r\nContent-Length: 0\r\n\r\n";
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_parse_requests() {
|
||||||
|
let reqs = Requests::new_from_slice(MULTIPLE_REQUESTS)
|
||||||
|
.collect::<Result<Vec<_>, _>>()
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
assert_eq!(reqs.len(), 2);
|
||||||
|
|
||||||
|
assert_eq!(reqs[0].request.method.as_str(), "GET");
|
||||||
|
assert!(reqs[0].body.is_none());
|
||||||
|
assert_eq!(
|
||||||
|
reqs[0]
|
||||||
|
.headers_with_name("host")
|
||||||
|
.next()
|
||||||
|
.unwrap()
|
||||||
|
.value
|
||||||
|
.as_bytes(),
|
||||||
|
b"localhost"
|
||||||
|
);
|
||||||
|
|
||||||
|
assert_eq!(reqs[1].request.method.as_str(), "POST");
|
||||||
|
assert_eq!(
|
||||||
|
reqs[1]
|
||||||
|
.headers_with_name("host")
|
||||||
|
.next()
|
||||||
|
.unwrap()
|
||||||
|
.value
|
||||||
|
.as_bytes(),
|
||||||
|
b"localhost"
|
||||||
|
);
|
||||||
|
assert_eq!(
|
||||||
|
reqs[1]
|
||||||
|
.headers_with_name("content-length")
|
||||||
|
.next()
|
||||||
|
.unwrap()
|
||||||
|
.value
|
||||||
|
.as_bytes(),
|
||||||
|
b"14"
|
||||||
|
);
|
||||||
|
assert_eq!(
|
||||||
|
reqs[1].body.as_ref().unwrap().span(),
|
||||||
|
b"Hello, world!\n".as_slice()
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_parse_responses() {
|
||||||
|
let resps = Responses::new_from_slice(MULTIPLE_RESPONSES)
|
||||||
|
.collect::<Result<Vec<_>, _>>()
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
assert_eq!(resps.len(), 3);
|
||||||
|
|
||||||
|
assert_eq!(resps[0].status.code.as_str(), "200");
|
||||||
|
assert_eq!(
|
||||||
|
resps[0]
|
||||||
|
.headers_with_name("content-length")
|
||||||
|
.next()
|
||||||
|
.unwrap()
|
||||||
|
.value
|
||||||
|
.as_bytes(),
|
||||||
|
b"0"
|
||||||
|
);
|
||||||
|
assert!(resps[0].body.is_none());
|
||||||
|
|
||||||
|
assert_eq!(resps[1].status.code.as_str(), "200");
|
||||||
|
assert_eq!(
|
||||||
|
resps[1]
|
||||||
|
.headers_with_name("content-length")
|
||||||
|
.next()
|
||||||
|
.unwrap()
|
||||||
|
.value
|
||||||
|
.as_bytes(),
|
||||||
|
b"14"
|
||||||
|
);
|
||||||
|
assert_eq!(
|
||||||
|
resps[1].body.as_ref().unwrap().span(),
|
||||||
|
b"Hello, world!\n".as_slice()
|
||||||
|
);
|
||||||
|
|
||||||
|
assert_eq!(resps[2].status.code.as_str(), "204");
|
||||||
|
assert_eq!(
|
||||||
|
resps[2]
|
||||||
|
.headers_with_name("content-length")
|
||||||
|
.next()
|
||||||
|
.unwrap()
|
||||||
|
.value
|
||||||
|
.as_bytes(),
|
||||||
|
b"0"
|
||||||
|
);
|
||||||
|
assert!(resps[2].body.is_none());
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_parse_request_duplicate_headers() {
|
||||||
|
let req_bytes = b"GET / HTTP/1.1\r\nHost: localhost\r\nAccept: application/json\r\n\
|
||||||
|
Accept: application/xml\r\n\r\n";
|
||||||
|
let reqs = Requests::new_from_slice(req_bytes)
|
||||||
|
.collect::<Result<Vec<_>, _>>()
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
assert_eq!(reqs.len(), 1);
|
||||||
|
let req = reqs.first().unwrap();
|
||||||
|
|
||||||
|
let headers: Vec<_> = req.headers_with_name("host").collect();
|
||||||
|
assert_eq!(headers.len(), 1);
|
||||||
|
assert_eq!(headers.first().unwrap().value.as_bytes(), b"localhost");
|
||||||
|
|
||||||
|
let headers: Vec<_> = req.headers_with_name("accept").collect();
|
||||||
|
assert_eq!(headers.len(), 2);
|
||||||
|
assert_eq!(
|
||||||
|
headers
|
||||||
|
.iter()
|
||||||
|
.map(|h| h.value.as_bytes())
|
||||||
|
.collect::<Vec<_>>(),
|
||||||
|
vec!["application/json".as_bytes(), "application/xml".as_bytes()],
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_parse_response_duplicate_headers() {
|
||||||
|
let resp_bytes = b"HTTP/1.1 200 OK\r\nSet-Cookie: lang=en; Path=/\r\n\
|
||||||
|
Set-Cookie: fang=fen; Path=/\r\nContent-Length: 14\r\n\r\n{\"foo\": \"bar\"}";
|
||||||
|
let resps = Responses::new_from_slice(resp_bytes)
|
||||||
|
.collect::<Result<Vec<_>, _>>()
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
assert_eq!(resps.len(), 1);
|
||||||
|
let resp = resps.first().unwrap();
|
||||||
|
|
||||||
|
let headers: Vec<_> = resp.headers_with_name("set-cookie").collect();
|
||||||
|
assert_eq!(headers.len(), 2);
|
||||||
|
assert_eq!(
|
||||||
|
headers
|
||||||
|
.iter()
|
||||||
|
.map(|h| h.value.as_bytes())
|
||||||
|
.collect::<Vec<_>>(),
|
||||||
|
vec!["lang=en; Path=/".as_bytes(), "fang=fen; Path=/".as_bytes()],
|
||||||
|
);
|
||||||
|
|
||||||
|
let headers: Vec<_> = resp.headers_with_name("content-length").collect();
|
||||||
|
assert_eq!(headers.len(), 1);
|
||||||
|
assert_eq!(headers.first().unwrap().value.as_bytes(), b"14");
|
||||||
|
}
|
||||||
|
}
|
||||||
435
spansy/src/http/span.rs
Normal file
435
spansy/src/http/span.rs
Normal file
@@ -0,0 +1,435 @@
|
|||||||
|
use bytes::Bytes;
|
||||||
|
|
||||||
|
use crate::{
|
||||||
|
helpers::get_span_range,
|
||||||
|
http::{
|
||||||
|
Body, Code, Header, HeaderName, HeaderValue, Method, Reason, Request, RequestLine,
|
||||||
|
Response, Status, Target,
|
||||||
|
},
|
||||||
|
ParseError, Span,
|
||||||
|
};
|
||||||
|
|
||||||
|
const MAX_HEADERS: usize = 128;
|
||||||
|
|
||||||
|
/// Parses an HTTP request.
|
||||||
|
pub fn parse_request(src: &[u8]) -> Result<Request, ParseError> {
|
||||||
|
parse_request_from_bytes(&Bytes::copy_from_slice(src), 0)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Parses an HTTP request from a `Bytes` buffer starting from the `offset`.
|
||||||
|
pub(crate) fn parse_request_from_bytes(src: &Bytes, offset: usize) -> Result<Request, ParseError> {
|
||||||
|
let mut headers = [httparse::EMPTY_HEADER; MAX_HEADERS];
|
||||||
|
|
||||||
|
let (method, path, head_end) = {
|
||||||
|
let mut request = httparse::Request::new(&mut headers);
|
||||||
|
|
||||||
|
let head_end = match request.parse(&src[offset..]) {
|
||||||
|
Ok(httparse::Status::Complete(head_end)) => head_end + offset,
|
||||||
|
Ok(httparse::Status::Partial) => {
|
||||||
|
return Err(ParseError(format!("incomplete request: {:?}", src)))
|
||||||
|
}
|
||||||
|
Err(err) => return Err(ParseError(err.to_string())),
|
||||||
|
};
|
||||||
|
|
||||||
|
let method = request
|
||||||
|
.method
|
||||||
|
.ok_or_else(|| ParseError("method missing from request".to_string()))?;
|
||||||
|
|
||||||
|
let path = request
|
||||||
|
.path
|
||||||
|
.ok_or_else(|| ParseError("path missing from request".to_string()))?;
|
||||||
|
|
||||||
|
(method, path, head_end)
|
||||||
|
};
|
||||||
|
|
||||||
|
let request_line_end = src[offset..]
|
||||||
|
.windows(2)
|
||||||
|
.position(|w| w == b"\r\n")
|
||||||
|
.expect("request line is terminated with CRLF");
|
||||||
|
let request_line_range = offset..offset + request_line_end + 2;
|
||||||
|
|
||||||
|
let headers = headers
|
||||||
|
.iter()
|
||||||
|
.take_while(|h| *h != &httparse::EMPTY_HEADER)
|
||||||
|
.map(|header| from_header(src, header))
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
// httparse allocates a new buffer to store the method for performance reasons,
|
||||||
|
// so we have to search for the span in the source. This is quick as the method
|
||||||
|
// is at the front.
|
||||||
|
let method = src[offset..]
|
||||||
|
.windows(method.len())
|
||||||
|
.find(|w| *w == method.as_bytes())
|
||||||
|
.expect("method is present");
|
||||||
|
|
||||||
|
let mut request = Request {
|
||||||
|
span: Span::new_bytes(src.clone(), offset..head_end),
|
||||||
|
request: RequestLine {
|
||||||
|
span: Span::new_str(src.clone(), request_line_range),
|
||||||
|
method: Method(Span::new_str(src.clone(), get_span_range(src, method))),
|
||||||
|
target: Target(Span::new_from_str(src.clone(), path)),
|
||||||
|
},
|
||||||
|
headers,
|
||||||
|
body: None,
|
||||||
|
};
|
||||||
|
|
||||||
|
let body_len = request_body_len(&request)?;
|
||||||
|
|
||||||
|
if body_len > 0 {
|
||||||
|
let range = head_end..head_end + body_len;
|
||||||
|
|
||||||
|
if range.end > src.len() {
|
||||||
|
return Err(ParseError(format!(
|
||||||
|
"body range {}..{} exceeds source {}",
|
||||||
|
range.start,
|
||||||
|
range.end,
|
||||||
|
src.len()
|
||||||
|
)));
|
||||||
|
}
|
||||||
|
|
||||||
|
request.span = Span::new_bytes(src.clone(), offset..range.end);
|
||||||
|
|
||||||
|
request.body = Some(Body {
|
||||||
|
span: Span::new_bytes(src.clone(), range),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(request)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Parses an HTTP response.
|
||||||
|
pub fn parse_response(src: &[u8]) -> Result<Response, ParseError> {
|
||||||
|
parse_response_from_bytes(&Bytes::copy_from_slice(src), 0)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Parses an HTTP response from a `Bytes` buffer starting from the `offset`.
|
||||||
|
pub(crate) fn parse_response_from_bytes(
|
||||||
|
src: &Bytes,
|
||||||
|
offset: usize,
|
||||||
|
) -> Result<Response, ParseError> {
|
||||||
|
let mut headers = [httparse::EMPTY_HEADER; MAX_HEADERS];
|
||||||
|
|
||||||
|
let (reason, code, head_end) = {
|
||||||
|
let mut response = httparse::Response::new(&mut headers);
|
||||||
|
|
||||||
|
let head_end = match response.parse(&src[offset..]) {
|
||||||
|
Ok(httparse::Status::Complete(head_end)) => head_end + offset,
|
||||||
|
Ok(httparse::Status::Partial) => {
|
||||||
|
return Err(ParseError(format!("incomplete response: {:?}", src)))
|
||||||
|
}
|
||||||
|
Err(err) => return Err(ParseError(err.to_string())),
|
||||||
|
};
|
||||||
|
|
||||||
|
let code = response
|
||||||
|
.code
|
||||||
|
.ok_or_else(|| ParseError("code missing from response".to_string()))
|
||||||
|
.map(|c| c.to_string())?;
|
||||||
|
|
||||||
|
let reason = response
|
||||||
|
.reason
|
||||||
|
.ok_or_else(|| ParseError("reason missing from response".to_string()))?;
|
||||||
|
|
||||||
|
(reason, code, head_end)
|
||||||
|
};
|
||||||
|
|
||||||
|
let status_line_end = src[offset..]
|
||||||
|
.windows(2)
|
||||||
|
.position(|w| w == b"\r\n")
|
||||||
|
.expect("status line is terminated with CRLF");
|
||||||
|
let status_line_range = offset..offset + status_line_end + 2;
|
||||||
|
|
||||||
|
let headers = headers
|
||||||
|
.iter()
|
||||||
|
.take_while(|h| *h != &httparse::EMPTY_HEADER)
|
||||||
|
.map(|header| from_header(src, header))
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
// httparse doesn't preserve the response code span, so we find it.
|
||||||
|
let code = src[offset..]
|
||||||
|
.windows(3)
|
||||||
|
.find(|w| *w == code.as_bytes())
|
||||||
|
.expect("code is present");
|
||||||
|
|
||||||
|
let mut response = Response {
|
||||||
|
span: Span::new_bytes(src.clone(), offset..head_end),
|
||||||
|
status: Status {
|
||||||
|
span: Span::new_str(src.clone(), status_line_range),
|
||||||
|
code: Code(Span::new_str(src.clone(), get_span_range(src, code))),
|
||||||
|
reason: Reason(Span::new_from_str(src.clone(), reason)),
|
||||||
|
},
|
||||||
|
headers,
|
||||||
|
body: None,
|
||||||
|
};
|
||||||
|
|
||||||
|
let body_len = response_body_len(&response)?;
|
||||||
|
|
||||||
|
if body_len > 0 {
|
||||||
|
let range = head_end..head_end + body_len;
|
||||||
|
|
||||||
|
if range.end > src.len() {
|
||||||
|
return Err(ParseError(format!(
|
||||||
|
"body range {}..{} exceeds source {}",
|
||||||
|
range.start,
|
||||||
|
range.end,
|
||||||
|
src.len()
|
||||||
|
)));
|
||||||
|
}
|
||||||
|
|
||||||
|
response.span = Span::new_bytes(src.clone(), offset..range.end);
|
||||||
|
|
||||||
|
response.body = Some(Body {
|
||||||
|
span: Span::new_bytes(src.clone(), range),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(response)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Converts a `httparse::Header` to a `Header`.
|
||||||
|
fn from_header(src: &Bytes, header: &httparse::Header) -> Header {
|
||||||
|
let name_range = get_span_range(src, header.name.as_bytes());
|
||||||
|
let value_range = get_span_range(src, header.value);
|
||||||
|
|
||||||
|
let crlf_idx = src[value_range.end..]
|
||||||
|
.windows(2)
|
||||||
|
.position(|b| b == b"\r\n")
|
||||||
|
.expect("CRLF is present in a valid header");
|
||||||
|
|
||||||
|
// Capture the entire header including trailing whitespace and the CRLF.
|
||||||
|
let header_range = name_range.start..value_range.end + crlf_idx + 2;
|
||||||
|
|
||||||
|
Header {
|
||||||
|
span: Span::new_bytes(src.clone(), header_range),
|
||||||
|
name: HeaderName(Span::new_str(src.clone(), name_range)),
|
||||||
|
value: HeaderValue(Span::new_bytes(src.clone(), value_range)),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Calculates the length of the request body according to RFC 9112, section 6.
|
||||||
|
fn request_body_len(request: &Request) -> Result<usize, ParseError> {
|
||||||
|
// The presence of a message body in a request is signaled by a Content-Length
|
||||||
|
// or Transfer-Encoding header field.
|
||||||
|
|
||||||
|
// If a message is received with both a Transfer-Encoding and a Content-Length header field,
|
||||||
|
// the Transfer-Encoding overrides the Content-Length
|
||||||
|
if request
|
||||||
|
.headers_with_name("Transfer-Encoding")
|
||||||
|
.next()
|
||||||
|
.is_some()
|
||||||
|
{
|
||||||
|
Err(ParseError(
|
||||||
|
"Transfer-Encoding not supported yet".to_string(),
|
||||||
|
))
|
||||||
|
} else if let Some(h) = request.headers_with_name("Content-Length").next() {
|
||||||
|
// If a valid Content-Length header field is present without Transfer-Encoding, its decimal value
|
||||||
|
// defines the expected message body length in octets.
|
||||||
|
std::str::from_utf8(h.value.0.as_bytes())?
|
||||||
|
.parse::<usize>()
|
||||||
|
.map_err(|err| ParseError(format!("failed to parse Content-Length value: {err}")))
|
||||||
|
} else {
|
||||||
|
// If this is a request message and none of the above are true, then the message body length is zero
|
||||||
|
Ok(0)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Calculates the length of the response body according to RFC 9112, section 6.
|
||||||
|
fn response_body_len(response: &Response) -> Result<usize, ParseError> {
|
||||||
|
// Any response to a HEAD request and any response with a 1xx (Informational), 204 (No Content), or 304 (Not Modified)
|
||||||
|
// status code is always terminated by the first empty line after the header fields, regardless of the header fields
|
||||||
|
// present in the message, and thus cannot contain a message body or trailer section.
|
||||||
|
match response
|
||||||
|
.status
|
||||||
|
.code
|
||||||
|
.as_str()
|
||||||
|
.parse::<usize>()
|
||||||
|
.expect("code is valid utf-8")
|
||||||
|
{
|
||||||
|
100..=199 | 204 | 304 => return Ok(0),
|
||||||
|
_ => {}
|
||||||
|
}
|
||||||
|
|
||||||
|
if response
|
||||||
|
.headers_with_name("Transfer-Encoding")
|
||||||
|
.next()
|
||||||
|
.is_some()
|
||||||
|
{
|
||||||
|
Err(ParseError(
|
||||||
|
"Transfer-Encoding not supported yet".to_string(),
|
||||||
|
))
|
||||||
|
} else if let Some(h) = response.headers_with_name("Content-Length").next() {
|
||||||
|
// If a valid Content-Length header field is present without Transfer-Encoding, its decimal value
|
||||||
|
// defines the expected message body length in octets.
|
||||||
|
std::str::from_utf8(h.value.0.as_bytes())?
|
||||||
|
.parse::<usize>()
|
||||||
|
.map_err(|err| ParseError(format!("failed to parse Content-Length value: {err}")))
|
||||||
|
} else {
|
||||||
|
// If this is a response message and none of the above are true, then there is no way to
|
||||||
|
// determine the length of the message body except by reading it until the connection is closed.
|
||||||
|
|
||||||
|
// We currently consider this an error because we have no outer context information.
|
||||||
|
Err(ParseError(
|
||||||
|
"A response with a body must contain either a Content-Length or Transfer-Encoding header".to_string(),
|
||||||
|
))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use crate::Spanned;
|
||||||
|
|
||||||
|
use super::*;
|
||||||
|
|
||||||
|
const TEST_REQUEST: &[u8] = b"\
|
||||||
|
GET /home.html HTTP/1.1\r\n\
|
||||||
|
Host: developer.mozilla.org\r\n\
|
||||||
|
User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10.9; rv:50.0) Gecko/20100101 Firefox/50.0\r\n\
|
||||||
|
Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.\r\n\
|
||||||
|
Accept-Language: en-US,en;q=0.\r\n\
|
||||||
|
Accept-Encoding: gzip, deflate, b\r\n\
|
||||||
|
Referer: https://developer.mozilla.org/testpage.htm\r\n\
|
||||||
|
Connection: keep-alive\r\n\
|
||||||
|
Content-Length: 12\r\n\
|
||||||
|
Cache-Control: max-age=0\r\n\r\n\
|
||||||
|
Hello World!";
|
||||||
|
|
||||||
|
const TEST_RESPONSE: &[u8] = b"\
|
||||||
|
HTTP/1.1 200 OK\r\n\
|
||||||
|
Date: Mon, 27 Jul 2009 12:28:53 GMT\r\n\
|
||||||
|
Server: Apache/2.2.14 (Win32)\r\n\
|
||||||
|
Last-Modified: Wed, 22 Jul 2009 19:15:56 GMT\r\n\
|
||||||
|
Content-Length: 52\r\n\
|
||||||
|
Content-Type: text/html\r\n\
|
||||||
|
Connection: Closed\r\n\r\n\
|
||||||
|
<html>\n\
|
||||||
|
<body>\n\
|
||||||
|
<h1>Hello, World!</h1>\n\
|
||||||
|
</body>\n\
|
||||||
|
</html>";
|
||||||
|
|
||||||
|
const TEST_REQUEST2: &[u8] = b"\
|
||||||
|
GET /info.html HTTP/1.1\r\n\
|
||||||
|
Host: tlsnotary.org\r\n\
|
||||||
|
User-Agent: client\r\n\
|
||||||
|
Content-Length: 4\r\n\r\n\
|
||||||
|
ping";
|
||||||
|
|
||||||
|
const TEST_RESPONSE2: &[u8] = b"\
|
||||||
|
HTTP/1.1 200 OK\r\n\
|
||||||
|
Server: server\r\n\
|
||||||
|
Content-Length: 4\r\n\
|
||||||
|
Content-Type: text/plain\r\n\
|
||||||
|
Connection: keep-alive\r\n\r\n\
|
||||||
|
pong";
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_parse_request() {
|
||||||
|
let req = parse_request(TEST_REQUEST).unwrap();
|
||||||
|
|
||||||
|
assert_eq!(req.span(), TEST_REQUEST);
|
||||||
|
assert_eq!(req.request.method.as_str(), "GET");
|
||||||
|
assert_eq!(
|
||||||
|
req.headers_with_name("Host").next().unwrap().value.span(),
|
||||||
|
b"developer.mozilla.org".as_slice()
|
||||||
|
);
|
||||||
|
assert_eq!(
|
||||||
|
req.headers_with_name("User-Agent")
|
||||||
|
.next()
|
||||||
|
.unwrap()
|
||||||
|
.value
|
||||||
|
.span(),
|
||||||
|
b"Mozilla/5.0 (Macintosh; Intel Mac OS X 10.9; rv:50.0) Gecko/20100101 Firefox/50.0"
|
||||||
|
.as_slice()
|
||||||
|
);
|
||||||
|
assert_eq!(req.body.unwrap().span(), b"Hello World!".as_slice());
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_parse_header_trailing_whitespace() {
|
||||||
|
let req = parse_request(b"GET / HTTP/1.1\r\nHost: example.com \r\n\r\n").unwrap();
|
||||||
|
let header = req.headers_with_name("Host").next().unwrap();
|
||||||
|
|
||||||
|
assert_eq!(header.span.as_bytes(), b"Host: example.com \r\n".as_slice());
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_parse_response() {
|
||||||
|
let res = parse_response(TEST_RESPONSE).unwrap();
|
||||||
|
|
||||||
|
assert_eq!(res.span(), TEST_RESPONSE);
|
||||||
|
assert_eq!(res.status.code.as_str(), "200");
|
||||||
|
assert_eq!(res.status.reason.as_str(), "OK");
|
||||||
|
assert_eq!(
|
||||||
|
res.headers_with_name("Server").next().unwrap().value.span(),
|
||||||
|
b"Apache/2.2.14 (Win32)".as_slice()
|
||||||
|
);
|
||||||
|
assert_eq!(
|
||||||
|
res.headers_with_name("Connection")
|
||||||
|
.next()
|
||||||
|
.unwrap()
|
||||||
|
.value
|
||||||
|
.span(),
|
||||||
|
b"Closed".as_slice()
|
||||||
|
);
|
||||||
|
assert_eq!(
|
||||||
|
res.body.unwrap().span(),
|
||||||
|
b"<html>\n<body>\n<h1>Hello, World!</h1>\n</body>\n</html>".as_slice()
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Make sure the first request is not parsed.
|
||||||
|
#[test]
|
||||||
|
fn test_parse_request_from_bytes() {
|
||||||
|
let mut request = Vec::new();
|
||||||
|
request.extend(TEST_REQUEST2);
|
||||||
|
request.extend(TEST_REQUEST);
|
||||||
|
let request = Bytes::copy_from_slice(&request);
|
||||||
|
let req = parse_request_from_bytes(&request, TEST_REQUEST2.len()).unwrap();
|
||||||
|
|
||||||
|
assert_eq!(req.span(), TEST_REQUEST);
|
||||||
|
assert_eq!(req.request.method.as_str(), "GET");
|
||||||
|
assert_eq!(
|
||||||
|
req.headers_with_name("Host").next().unwrap().value.span(),
|
||||||
|
b"developer.mozilla.org".as_slice()
|
||||||
|
);
|
||||||
|
assert_eq!(
|
||||||
|
req.headers_with_name("User-Agent")
|
||||||
|
.next()
|
||||||
|
.unwrap()
|
||||||
|
.value
|
||||||
|
.span(),
|
||||||
|
b"Mozilla/5.0 (Macintosh; Intel Mac OS X 10.9; rv:50.0) Gecko/20100101 Firefox/50.0"
|
||||||
|
.as_slice()
|
||||||
|
);
|
||||||
|
assert_eq!(req.body.unwrap().span(), b"Hello World!".as_slice());
|
||||||
|
}
|
||||||
|
|
||||||
|
// Make sure the first response is not parsed.
|
||||||
|
#[test]
|
||||||
|
fn test_parse_response_from_bytes() {
|
||||||
|
let mut response = Vec::new();
|
||||||
|
response.extend(TEST_RESPONSE2);
|
||||||
|
response.extend(TEST_RESPONSE);
|
||||||
|
let response = Bytes::copy_from_slice(&response);
|
||||||
|
let res = parse_response_from_bytes(&response, TEST_RESPONSE2.len()).unwrap();
|
||||||
|
|
||||||
|
assert_eq!(res.span(), TEST_RESPONSE);
|
||||||
|
assert_eq!(res.status.code.as_str(), "200");
|
||||||
|
assert_eq!(res.status.reason.as_str(), "OK");
|
||||||
|
assert_eq!(
|
||||||
|
res.headers_with_name("Server").next().unwrap().value.span(),
|
||||||
|
b"Apache/2.2.14 (Win32)".as_slice()
|
||||||
|
);
|
||||||
|
assert_eq!(
|
||||||
|
res.headers_with_name("Connection")
|
||||||
|
.next()
|
||||||
|
.unwrap()
|
||||||
|
.value
|
||||||
|
.span(),
|
||||||
|
b"Closed".as_slice()
|
||||||
|
);
|
||||||
|
assert_eq!(
|
||||||
|
res.body.unwrap().span(),
|
||||||
|
b"<html>\n<body>\n<h1>Hello, World!</h1>\n</body>\n</html>".as_slice()
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
368
spansy/src/http/types.rs
Normal file
368
spansy/src/http/types.rs
Normal file
@@ -0,0 +1,368 @@
|
|||||||
|
use utils::range::{RangeDifference, RangeSet};
|
||||||
|
|
||||||
|
use crate::{Span, Spanned};
|
||||||
|
|
||||||
|
/// An HTTP header name.
|
||||||
|
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||||
|
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
|
||||||
|
pub struct HeaderName(pub(crate) Span<str>);
|
||||||
|
|
||||||
|
impl HeaderName {
|
||||||
|
/// Returns the header name as a string slice.
|
||||||
|
pub fn as_str(&self) -> &str {
|
||||||
|
self.0.as_str()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Shifts the span range by the given offset.
|
||||||
|
pub fn offset(&mut self, offset: usize) {
|
||||||
|
self.0.offset(offset);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Spanned<str> for HeaderName {
|
||||||
|
fn span(&self) -> &Span<str> {
|
||||||
|
&self.0
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// An HTTP header value.
|
||||||
|
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||||
|
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
|
||||||
|
pub struct HeaderValue(pub(crate) Span);
|
||||||
|
|
||||||
|
impl HeaderValue {
|
||||||
|
/// Returns the header value as a byte slice.
|
||||||
|
pub fn as_bytes(&self) -> &[u8] {
|
||||||
|
self.0.as_bytes()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Shifts the span range by the given offset.
|
||||||
|
pub fn offset(&mut self, offset: usize) {
|
||||||
|
self.0.offset(offset);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Spanned for HeaderValue {
|
||||||
|
fn span(&self) -> &Span {
|
||||||
|
&self.0
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// An HTTP header, including optional whitespace and the trailing CRLF.
|
||||||
|
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||||
|
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
|
||||||
|
pub struct Header {
|
||||||
|
pub(crate) span: Span,
|
||||||
|
/// The header name.
|
||||||
|
pub name: HeaderName,
|
||||||
|
/// The header value.
|
||||||
|
pub value: HeaderValue,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Header {
|
||||||
|
/// Returns the indices of the header excluding the value.
|
||||||
|
///
|
||||||
|
/// The indices will include any optional whitespace and the CRLF.
|
||||||
|
pub fn without_value(&self) -> RangeSet<usize> {
|
||||||
|
self.span.indices.difference(&self.value.span().indices)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Shifts the span range by the given offset.
|
||||||
|
pub fn offset(&mut self, offset: usize) {
|
||||||
|
self.span.offset(offset);
|
||||||
|
self.name.offset(offset);
|
||||||
|
self.value.offset(offset);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Spanned for Header {
|
||||||
|
fn span(&self) -> &Span {
|
||||||
|
&self.span
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// An HTTP request method.
|
||||||
|
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||||
|
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
|
||||||
|
pub struct Method(pub(crate) Span<str>);
|
||||||
|
|
||||||
|
impl Method {
|
||||||
|
/// Returns the method as a string slice.
|
||||||
|
pub fn as_str(&self) -> &str {
|
||||||
|
self.0.as_str()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Shifts the span range by the given offset.
|
||||||
|
pub fn offset(&mut self, offset: usize) {
|
||||||
|
self.0.offset(offset);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Spanned<str> for Method {
|
||||||
|
fn span(&self) -> &Span<str> {
|
||||||
|
&self.0
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// An HTTP request target.
|
||||||
|
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||||
|
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
|
||||||
|
pub struct Target(pub(crate) Span<str>);
|
||||||
|
|
||||||
|
impl Target {
|
||||||
|
/// Returns the target as a string slice.
|
||||||
|
pub fn as_str(&self) -> &str {
|
||||||
|
self.0.as_str()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Shifts the span range by the given offset.
|
||||||
|
pub fn offset(&mut self, offset: usize) {
|
||||||
|
self.0.offset(offset);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Spanned<str> for Target {
|
||||||
|
fn span(&self) -> &Span<str> {
|
||||||
|
&self.0
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// An HTTP request line, including the trailing CRLF.
|
||||||
|
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||||
|
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
|
||||||
|
pub struct RequestLine {
|
||||||
|
pub(crate) span: Span<str>,
|
||||||
|
|
||||||
|
/// The request method.
|
||||||
|
pub method: Method,
|
||||||
|
/// The request target.
|
||||||
|
pub target: Target,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl RequestLine {
|
||||||
|
/// Returns the indices of the request line excluding the request target.
|
||||||
|
pub fn without_target(&self) -> RangeSet<usize> {
|
||||||
|
self.span.indices.difference(&self.target.0.indices)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Shifts the span range by the given offset.
|
||||||
|
pub fn offset(&mut self, offset: usize) {
|
||||||
|
self.span.offset(offset);
|
||||||
|
self.method.offset(offset);
|
||||||
|
self.target.offset(offset);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Spanned<str> for RequestLine {
|
||||||
|
fn span(&self) -> &Span<str> {
|
||||||
|
&self.span
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// An HTTP request.
|
||||||
|
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||||
|
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
|
||||||
|
pub struct Request {
|
||||||
|
pub(crate) span: Span,
|
||||||
|
/// The request line.
|
||||||
|
pub request: RequestLine,
|
||||||
|
/// Request headers.
|
||||||
|
pub headers: Vec<Header>,
|
||||||
|
/// Request body.
|
||||||
|
pub body: Option<Body>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Request {
|
||||||
|
/// Returns an iterator of request headers with the given name (case-insensitive).
|
||||||
|
///
|
||||||
|
/// This method returns an iterator because it is valid for HTTP records to contain
|
||||||
|
/// duplicate header names.
|
||||||
|
pub fn headers_with_name<'a>(&'a self, name: &'a str) -> impl Iterator<Item = &'a Header> {
|
||||||
|
self.headers
|
||||||
|
.iter()
|
||||||
|
.filter(|h| h.name.0.as_str().eq_ignore_ascii_case(name))
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns the indices of the request excluding the target, headers and body.
|
||||||
|
pub fn without_data(&self) -> RangeSet<usize> {
|
||||||
|
let mut indices = self.span.indices.difference(&self.request.target.0.indices);
|
||||||
|
for header in &self.headers {
|
||||||
|
indices = indices.difference(header.span.indices());
|
||||||
|
}
|
||||||
|
if let Some(body) = &self.body {
|
||||||
|
indices = indices.difference(body.span.indices());
|
||||||
|
}
|
||||||
|
indices
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Shifts the span range by the given offset.
|
||||||
|
pub fn offset(&mut self, offset: usize) {
|
||||||
|
self.span.offset(offset);
|
||||||
|
self.request.offset(offset);
|
||||||
|
for header in &mut self.headers {
|
||||||
|
header.offset(offset);
|
||||||
|
}
|
||||||
|
if let Some(body) = &mut self.body {
|
||||||
|
body.offset(offset);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Spanned for Request {
|
||||||
|
fn span(&self) -> &Span {
|
||||||
|
&self.span
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// An HTTP response code.
|
||||||
|
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||||
|
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
|
||||||
|
pub struct Code(pub(crate) Span<str>);
|
||||||
|
|
||||||
|
impl Code {
|
||||||
|
/// Returns the response code as a string slice.
|
||||||
|
pub fn as_str(&self) -> &str {
|
||||||
|
self.0.as_str()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Shifts the span range by the given offset.
|
||||||
|
pub fn offset(&mut self, offset: usize) {
|
||||||
|
self.0.offset(offset);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Spanned<str> for Code {
|
||||||
|
fn span(&self) -> &Span<str> {
|
||||||
|
&self.0
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// An HTTP response reason phrase.
|
||||||
|
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||||
|
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
|
||||||
|
pub struct Reason(pub(crate) Span<str>);
|
||||||
|
|
||||||
|
impl Reason {
|
||||||
|
/// Returns the response reason phrase as a string slice.
|
||||||
|
pub fn as_str(&self) -> &str {
|
||||||
|
self.0.as_str()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Shifts the span range by the given offset.
|
||||||
|
pub fn offset(&mut self, offset: usize) {
|
||||||
|
self.0.offset(offset);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Spanned<str> for Reason {
|
||||||
|
fn span(&self) -> &Span<str> {
|
||||||
|
&self.0
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// An HTTP response status.
|
||||||
|
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||||
|
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
|
||||||
|
pub struct Status {
|
||||||
|
pub(crate) span: Span<str>,
|
||||||
|
|
||||||
|
/// The response code.
|
||||||
|
pub code: Code,
|
||||||
|
/// The reason phrase.
|
||||||
|
pub reason: Reason,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Status {
|
||||||
|
/// Shifts the span range by the given offset.
|
||||||
|
pub fn offset(&mut self, offset: usize) {
|
||||||
|
self.span.offset(offset);
|
||||||
|
self.code.offset(offset);
|
||||||
|
self.reason.offset(offset);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Spanned<str> for Status {
|
||||||
|
fn span(&self) -> &Span<str> {
|
||||||
|
&self.span
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// An HTTP response.
|
||||||
|
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||||
|
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
|
||||||
|
pub struct Response {
|
||||||
|
pub(crate) span: Span,
|
||||||
|
/// The response status.
|
||||||
|
pub status: Status,
|
||||||
|
/// Response headers.
|
||||||
|
pub headers: Vec<Header>,
|
||||||
|
/// Response body.
|
||||||
|
pub body: Option<Body>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Response {
|
||||||
|
/// Returns an iterator of response headers with the given name (case-insensitive).
|
||||||
|
///
|
||||||
|
/// This method returns an iterator because it is valid for HTTP records to contain
|
||||||
|
/// duplicate header names.
|
||||||
|
pub fn headers_with_name<'a>(&'a self, name: &'a str) -> impl Iterator<Item = &'a Header> {
|
||||||
|
self.headers
|
||||||
|
.iter()
|
||||||
|
.filter(|h| h.name.0.as_str().eq_ignore_ascii_case(name))
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns the indices of the response excluding the headers and body.
|
||||||
|
pub fn without_data(&self) -> RangeSet<usize> {
|
||||||
|
let mut indices = self.span.indices.clone();
|
||||||
|
for header in &self.headers {
|
||||||
|
indices = indices.difference(header.span.indices());
|
||||||
|
}
|
||||||
|
if let Some(body) = &self.body {
|
||||||
|
indices = indices.difference(body.span.indices());
|
||||||
|
}
|
||||||
|
indices
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Shifts the span range by the given offset.
|
||||||
|
pub fn offset(&mut self, offset: usize) {
|
||||||
|
self.span.offset(offset);
|
||||||
|
self.status.offset(offset);
|
||||||
|
for header in &mut self.headers {
|
||||||
|
header.offset(offset);
|
||||||
|
}
|
||||||
|
if let Some(body) = &mut self.body {
|
||||||
|
body.offset(offset);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Spanned for Response {
|
||||||
|
fn span(&self) -> &Span {
|
||||||
|
&self.span
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// An HTTP request or response body.
|
||||||
|
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||||
|
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
|
||||||
|
pub struct Body {
|
||||||
|
pub(crate) span: Span,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Body {
|
||||||
|
/// Returns the body as a byte slice.
|
||||||
|
pub fn as_bytes(&self) -> &[u8] {
|
||||||
|
self.span.as_bytes()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Shifts the span range by the given offset.
|
||||||
|
pub fn offset(&mut self, offset: usize) {
|
||||||
|
self.span.offset(offset);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Spanned for Body {
|
||||||
|
fn span(&self) -> &Span {
|
||||||
|
&self.span
|
||||||
|
}
|
||||||
|
}
|
||||||
41
spansy/src/json/json.pest
Normal file
41
spansy/src/json/json.pest
Normal file
@@ -0,0 +1,41 @@
|
|||||||
|
// pest. The Elegant Parser
|
||||||
|
// Copyright (c) 2018 Dragoș Tiselice
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0
|
||||||
|
// <LICENSE-APACHE or http://www.apache.org/licenses/LICENSE-2.0> or the MIT
|
||||||
|
// license <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
|
||||||
|
// option. All files in the project carrying such notice may not be copied,
|
||||||
|
// modified, or distributed except according to those terms.
|
||||||
|
|
||||||
|
//! A parser for JSON file.
|
||||||
|
//!
|
||||||
|
//! And this is a example for JSON parser.
|
||||||
|
json = _{ SOI ~ value ~ eoi }
|
||||||
|
eoi = _{ !ANY }
|
||||||
|
|
||||||
|
/// Matches object, e.g.: `{ "foo": "bar" }`
|
||||||
|
/// Foobar
|
||||||
|
object = { "{" ~ pair ~ (pair)* ~ "}" | "{" ~ "}" }
|
||||||
|
pair = { quoted_string ~ ":" ~ value ~ (",")? }
|
||||||
|
|
||||||
|
array = { "[" ~ value ~ ("," ~ value)* ~ "]" | "[" ~ "]" }
|
||||||
|
|
||||||
|
//////////////////////
|
||||||
|
/// Matches value, e.g.: `"foo"`, `42`, `true`, `null`, `[]`, `{}`.
|
||||||
|
//////////////////////
|
||||||
|
value = _{ quoted_string | number | object | array | bool | null }
|
||||||
|
|
||||||
|
quoted_string = _{ "\"" ~ string ~ "\"" }
|
||||||
|
string = @{ (!("\"" | "\\") ~ ANY)* ~ (escape ~ string)? }
|
||||||
|
escape = @{ "\\" ~ ("\"" | "\\" | "/" | "b" | "f" | "n" | "r" | "t" | unicode) }
|
||||||
|
unicode = @{ "u" ~ ASCII_HEX_DIGIT{4} }
|
||||||
|
|
||||||
|
number = @{ "-"? ~ int ~ ("." ~ ASCII_DIGIT+ ~ exp? | exp)? }
|
||||||
|
int = @{ "0" | ASCII_NONZERO_DIGIT ~ ASCII_DIGIT* }
|
||||||
|
exp = @{ ("E" | "e") ~ ("+" | "-")? ~ ASCII_DIGIT+ }
|
||||||
|
|
||||||
|
bool = { "true" | "false" }
|
||||||
|
|
||||||
|
null = { "null" }
|
||||||
|
|
||||||
|
WHITESPACE = _{ " " | "\t" | "\r" | "\n" }
|
||||||
34
spansy/src/json/mod.rs
Normal file
34
spansy/src/json/mod.rs
Normal file
@@ -0,0 +1,34 @@
|
|||||||
|
//! JSON span parsing.
|
||||||
|
//!
|
||||||
|
//! This module provides a JSON parser that can be used to parse span information for each JSON value within
|
||||||
|
//! a source string.
|
||||||
|
//!
|
||||||
|
//! Note that the parser does *not* fully parse values, it simply computes the span of the corresponding
|
||||||
|
//! characters in the source string. Thus, this parser should not be expected to perform any kind of
|
||||||
|
//! validation of the JSON.
|
||||||
|
//!
|
||||||
|
//! # Example
|
||||||
|
//!
|
||||||
|
//! ```
|
||||||
|
//! use spansy::{json, Spanned};
|
||||||
|
//!
|
||||||
|
//! let src = "{\"foo\": {\"bar\": [42, 14]}}";
|
||||||
|
//!
|
||||||
|
//! let value = json::parse_str(src).unwrap();
|
||||||
|
//!
|
||||||
|
//! // We can assert that the value present at the path "foo.bar.1" is the number 14.
|
||||||
|
//! assert_eq!(value.get("foo.bar.1").unwrap().span(), "14");
|
||||||
|
//!
|
||||||
|
//! let bar = value.get("foo.bar").unwrap();
|
||||||
|
//!
|
||||||
|
//! // The span of the `bar` array is 16..24 within the source string.
|
||||||
|
//! assert_eq!(bar.span().indices(), 16..24);
|
||||||
|
//! ```
|
||||||
|
|
||||||
|
mod span;
|
||||||
|
mod types;
|
||||||
|
mod visit;
|
||||||
|
|
||||||
|
pub use span::{parse, parse_slice, parse_str};
|
||||||
|
pub use types::{Array, Bool, JsonKey, JsonValue, KeyValue, Null, Number, Object, String};
|
||||||
|
pub use visit::JsonVisit;
|
||||||
171
spansy/src/json/span.rs
Normal file
171
spansy/src/json/span.rs
Normal file
@@ -0,0 +1,171 @@
|
|||||||
|
use bytes::Bytes;
|
||||||
|
use pest::{iterators::Pair as PestPair, Parser};
|
||||||
|
use types::KeyValue;
|
||||||
|
|
||||||
|
use super::types::{self, JsonValue};
|
||||||
|
|
||||||
|
use crate::{ParseError, Span};
|
||||||
|
|
||||||
|
#[derive(pest_derive::Parser)]
|
||||||
|
#[grammar = "json/json.pest"]
|
||||||
|
struct JsonParser;
|
||||||
|
|
||||||
|
/// Parse a JSON value from a source string.
|
||||||
|
pub fn parse_str(src: &str) -> Result<JsonValue, ParseError> {
|
||||||
|
let src = Bytes::copy_from_slice(src.as_bytes());
|
||||||
|
|
||||||
|
// # Safety
|
||||||
|
// `src` was passed as a string slice, so it is guaranteed to be valid UTF-8.
|
||||||
|
let src_str = unsafe { std::str::from_utf8_unchecked(src.as_ref()) };
|
||||||
|
|
||||||
|
let value = JsonParser::parse(Rule::value, src_str)?
|
||||||
|
.next()
|
||||||
|
.ok_or_else(|| ParseError("no json value is present in source".to_string()))?;
|
||||||
|
|
||||||
|
// Since json.pest grammar prohibits leading characters but allows trailing
|
||||||
|
// characters, we prohibit trailing characters here.
|
||||||
|
if value.as_str().len() != src.len() {
|
||||||
|
return Err(ParseError(
|
||||||
|
"trailing characters are present in source".to_string(),
|
||||||
|
));
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(JsonValue::from_pair(src.clone(), value))
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Parse a JSON value from a byte slice.
|
||||||
|
pub fn parse_slice(src: &[u8]) -> Result<JsonValue, ParseError> {
|
||||||
|
let src = Bytes::copy_from_slice(src);
|
||||||
|
parse(src)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Parse a JSON value from source bytes.
|
||||||
|
pub fn parse(src: Bytes) -> Result<JsonValue, ParseError> {
|
||||||
|
let src_str = std::str::from_utf8(&src)?;
|
||||||
|
|
||||||
|
let value = JsonParser::parse(Rule::value, src_str)?
|
||||||
|
.next()
|
||||||
|
.ok_or_else(|| ParseError("no json value is present in source".to_string()))?;
|
||||||
|
|
||||||
|
// Since json.pest grammar prohibits leading characters but allows trailing
|
||||||
|
// characters, we prohibit trailing characters here.
|
||||||
|
if value.as_str().len() != src.len() {
|
||||||
|
return Err(ParseError(
|
||||||
|
"trailing characters are present in source".to_string(),
|
||||||
|
));
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(JsonValue::from_pair(src.clone(), value))
|
||||||
|
}
|
||||||
|
|
||||||
|
macro_rules! impl_from_pair {
|
||||||
|
($ty:ty, $rule:ident) => {
|
||||||
|
impl $ty {
|
||||||
|
fn from_pair(src: Bytes, pair: PestPair<'_, Rule>) -> Self {
|
||||||
|
assert!(matches!(pair.as_rule(), Rule::$rule));
|
||||||
|
|
||||||
|
Self(Span::new_from_str(src, pair.as_str()))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
impl_from_pair!(types::JsonKey, string);
|
||||||
|
impl_from_pair!(types::Number, number);
|
||||||
|
impl_from_pair!(types::Bool, bool);
|
||||||
|
impl_from_pair!(types::Null, null);
|
||||||
|
impl_from_pair!(types::String, string);
|
||||||
|
|
||||||
|
impl types::KeyValue {
|
||||||
|
fn from_pair(src: Bytes, pair: PestPair<'_, Rule>) -> Self {
|
||||||
|
assert!(matches!(pair.as_rule(), Rule::pair));
|
||||||
|
|
||||||
|
let span = Span::new_from_str(src.clone(), pair.as_str().trim_end());
|
||||||
|
|
||||||
|
let mut pairs = pair.into_inner();
|
||||||
|
|
||||||
|
let key = pairs.next().expect("key is present");
|
||||||
|
let value = pairs.next().expect("value is present");
|
||||||
|
|
||||||
|
Self {
|
||||||
|
span,
|
||||||
|
key: types::JsonKey::from_pair(src.clone(), key),
|
||||||
|
value: types::JsonValue::from_pair(src.clone(), value),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl types::Object {
|
||||||
|
fn from_pair(src: Bytes, pair: PestPair<'_, Rule>) -> Self {
|
||||||
|
assert!(matches!(pair.as_rule(), Rule::object));
|
||||||
|
|
||||||
|
Self {
|
||||||
|
span: Span::new_from_str(src.clone(), pair.as_str()),
|
||||||
|
elems: pair
|
||||||
|
.into_inner()
|
||||||
|
.map(|pair| KeyValue::from_pair(src.clone(), pair))
|
||||||
|
.collect(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl types::Array {
|
||||||
|
fn from_pair(src: Bytes, pair: PestPair<'_, Rule>) -> Self {
|
||||||
|
assert!(matches!(pair.as_rule(), Rule::array));
|
||||||
|
|
||||||
|
Self {
|
||||||
|
span: Span::new_from_str(src.clone(), pair.as_str()),
|
||||||
|
elems: pair
|
||||||
|
.into_inner()
|
||||||
|
.map(|pair| types::JsonValue::from_pair(src.clone(), pair))
|
||||||
|
.collect(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl types::JsonValue {
|
||||||
|
fn from_pair(src: Bytes, pair: PestPair<'_, Rule>) -> Self {
|
||||||
|
match pair.as_rule() {
|
||||||
|
Rule::object => Self::Object(types::Object::from_pair(src, pair)),
|
||||||
|
Rule::array => Self::Array(types::Array::from_pair(src, pair)),
|
||||||
|
Rule::string => Self::String(types::String::from_pair(src, pair)),
|
||||||
|
Rule::number => Self::Number(types::Number::from_pair(src, pair)),
|
||||||
|
Rule::bool => Self::Bool(types::Bool::from_pair(src, pair)),
|
||||||
|
Rule::null => Self::Null(types::Null::from_pair(src, pair)),
|
||||||
|
rule => unreachable!("unexpected matched rule: {:?}", rule),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use crate::Spanned;
|
||||||
|
|
||||||
|
use super::*;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_json_spanner() {
|
||||||
|
let src = r#"{"foo": "bar", "baz": 123, "quux": { "a": "b", "c": "d" }, "arr": [1, 2, 3]}"#;
|
||||||
|
|
||||||
|
let value = parse_str(src).unwrap();
|
||||||
|
|
||||||
|
assert_eq!(value.get("foo").unwrap().span(), "bar");
|
||||||
|
assert_eq!(value.get("baz").unwrap().span(), "123");
|
||||||
|
assert_eq!(value.get("quux.a").unwrap().span(), "b");
|
||||||
|
assert_eq!(value.get("arr").unwrap().span(), "[1, 2, 3]");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_err_leading_characters() {
|
||||||
|
let src = " {\"foo\": \"bar\"}";
|
||||||
|
assert!(parse_str(src).is_err());
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_err_trailing_characters() {
|
||||||
|
let src = "{\"foo\": \"bar\"} ";
|
||||||
|
assert_eq!(
|
||||||
|
parse_str(src).err().unwrap().to_string(),
|
||||||
|
"parsing error: trailing characters are present in source"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
401
spansy/src/json/types.rs
Normal file
401
spansy/src/json/types.rs
Normal file
@@ -0,0 +1,401 @@
|
|||||||
|
use std::ops::{Index, Range};
|
||||||
|
|
||||||
|
use utils::range::{RangeDifference, RangeSet};
|
||||||
|
|
||||||
|
use crate::{Span, Spanned};
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
|
||||||
|
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
|
||||||
|
/// A JSON value.
|
||||||
|
pub enum JsonValue {
|
||||||
|
/// A null value.
|
||||||
|
Null(Null),
|
||||||
|
/// A boolean value.
|
||||||
|
Bool(Bool),
|
||||||
|
/// A number value.
|
||||||
|
Number(Number),
|
||||||
|
/// A string value.
|
||||||
|
String(String),
|
||||||
|
/// An array value.
|
||||||
|
Array(Array),
|
||||||
|
/// An object value.
|
||||||
|
Object(Object),
|
||||||
|
}
|
||||||
|
|
||||||
|
impl JsonValue {
|
||||||
|
/// Returns the span corresponding to the value.
|
||||||
|
pub fn into_span(self) -> Span<str> {
|
||||||
|
match self {
|
||||||
|
JsonValue::Null(v) => v.0,
|
||||||
|
JsonValue::Bool(v) => v.0,
|
||||||
|
JsonValue::Number(v) => v.0,
|
||||||
|
JsonValue::String(v) => v.0,
|
||||||
|
JsonValue::Array(v) => v.span,
|
||||||
|
JsonValue::Object(v) => v.span,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Shifts the span range by the given offset.
|
||||||
|
pub fn offset(&mut self, offset: usize) {
|
||||||
|
match self {
|
||||||
|
JsonValue::Null(v) => v.0.offset(offset),
|
||||||
|
JsonValue::Bool(v) => v.0.offset(offset),
|
||||||
|
JsonValue::Number(v) => v.0.offset(offset),
|
||||||
|
JsonValue::String(v) => v.0.offset(offset),
|
||||||
|
JsonValue::Array(v) => {
|
||||||
|
v.span.offset(offset);
|
||||||
|
v.elems.iter_mut().for_each(|v| v.offset(offset))
|
||||||
|
}
|
||||||
|
JsonValue::Object(v) => {
|
||||||
|
v.span.offset(offset);
|
||||||
|
v.elems.iter_mut().for_each(|kv| {
|
||||||
|
kv.span.offset(offset);
|
||||||
|
kv.key.offset(offset);
|
||||||
|
kv.value.offset(offset);
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Spanned<str> for JsonValue {
|
||||||
|
fn span(&self) -> &Span<str> {
|
||||||
|
match self {
|
||||||
|
JsonValue::Null(v) => v.span(),
|
||||||
|
JsonValue::Bool(v) => v.span(),
|
||||||
|
JsonValue::Number(v) => v.span(),
|
||||||
|
JsonValue::String(v) => v.span(),
|
||||||
|
JsonValue::Array(v) => v.span(),
|
||||||
|
JsonValue::Object(v) => v.span(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl JsonValue {
|
||||||
|
/// Get a reference to the value using the given path.
|
||||||
|
///
|
||||||
|
/// # Example
|
||||||
|
///
|
||||||
|
/// ```
|
||||||
|
/// use spansy::json::parse_str;
|
||||||
|
/// use spansy::Spanned;
|
||||||
|
///
|
||||||
|
/// let src = "{\"foo\": {\"bar\": [42, 14]}}";
|
||||||
|
///
|
||||||
|
/// let value = parse_str(src).unwrap();
|
||||||
|
///
|
||||||
|
/// assert_eq!(value.get("foo.bar.1").unwrap().span(), "14");
|
||||||
|
/// ```
|
||||||
|
pub fn get(&self, path: &str) -> Option<&JsonValue> {
|
||||||
|
match self {
|
||||||
|
JsonValue::Null(_) => None,
|
||||||
|
JsonValue::Bool(_) => None,
|
||||||
|
JsonValue::Number(_) => None,
|
||||||
|
JsonValue::String(_) => None,
|
||||||
|
JsonValue::Array(v) => v.get(path),
|
||||||
|
JsonValue::Object(v) => v.get(path),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// A key value pair in a JSON object.
|
||||||
|
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
|
||||||
|
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
|
||||||
|
pub struct KeyValue {
|
||||||
|
pub(crate) span: Span<str>,
|
||||||
|
|
||||||
|
/// The key of the pair.
|
||||||
|
pub key: JsonKey,
|
||||||
|
/// The value of the pair.
|
||||||
|
pub value: JsonValue,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl KeyValue {
|
||||||
|
/// Returns the indices of the key value pair, excluding the value.
|
||||||
|
pub fn without_value(&self) -> RangeSet<usize> {
|
||||||
|
self.span.indices.difference(&self.value.span().indices)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
|
||||||
|
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
|
||||||
|
/// A key in a JSON object.
|
||||||
|
pub struct JsonKey(pub(crate) Span<str>);
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
|
||||||
|
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
|
||||||
|
/// A null value.
|
||||||
|
pub struct Null(pub(crate) Span<str>);
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
|
||||||
|
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
|
||||||
|
/// A boolean value.
|
||||||
|
pub struct Bool(pub(crate) Span<str>);
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
|
||||||
|
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
|
||||||
|
/// A number value.
|
||||||
|
pub struct Number(pub(crate) Span<str>);
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
|
||||||
|
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
|
||||||
|
/// A string value.
|
||||||
|
pub struct String(pub(crate) Span<str>);
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
|
||||||
|
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
|
||||||
|
/// An array value.
|
||||||
|
pub struct Array {
|
||||||
|
pub(crate) span: Span<str>,
|
||||||
|
/// The elements of the array.
|
||||||
|
pub elems: Vec<JsonValue>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Array {
|
||||||
|
/// Get a reference to the value using the given path.
|
||||||
|
pub fn get(&self, path: &str) -> Option<&JsonValue> {
|
||||||
|
let mut path_iter = path.split('.');
|
||||||
|
|
||||||
|
let key = path_iter.next()?;
|
||||||
|
let idx = key.parse::<usize>().ok()?;
|
||||||
|
|
||||||
|
let value = self.elems.get(idx)?;
|
||||||
|
|
||||||
|
if path_iter.next().is_some() {
|
||||||
|
value.get(&path[key.len() + 1..])
|
||||||
|
} else {
|
||||||
|
Some(value)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns the indices of the array, excluding the values and separators.
|
||||||
|
pub fn without_values(&self) -> RangeSet<usize> {
|
||||||
|
let start = self
|
||||||
|
.span
|
||||||
|
.indices
|
||||||
|
.min()
|
||||||
|
.expect("array has at least brackets");
|
||||||
|
let end = self
|
||||||
|
.span
|
||||||
|
.indices
|
||||||
|
.max()
|
||||||
|
.expect("array has at least brackets");
|
||||||
|
|
||||||
|
RangeSet::from([start..start + 1, end..end + 1])
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Index<usize> for Array {
|
||||||
|
type Output = JsonValue;
|
||||||
|
|
||||||
|
/// Returns the value at the given index of the array.
|
||||||
|
///
|
||||||
|
/// # Panics
|
||||||
|
///
|
||||||
|
/// Panics if the index is out of bounds.
|
||||||
|
fn index(&self, index: usize) -> &Self::Output {
|
||||||
|
self.elems.get(index).expect("index is in bounds")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
|
||||||
|
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
|
||||||
|
/// A JSON object value.
|
||||||
|
pub struct Object {
|
||||||
|
pub(crate) span: Span<str>,
|
||||||
|
/// The key value pairs of the object.
|
||||||
|
pub elems: Vec<KeyValue>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Object {
|
||||||
|
/// Get a reference to the value using the given path.
|
||||||
|
pub fn get(&self, path: &str) -> Option<&JsonValue> {
|
||||||
|
let mut path_iter = path.split('.');
|
||||||
|
|
||||||
|
let key = path_iter.next()?;
|
||||||
|
|
||||||
|
let KeyValue { value, .. } = self.elems.iter().find(|kv| kv.key == key)?;
|
||||||
|
|
||||||
|
if path_iter.next().is_some() {
|
||||||
|
value.get(&path[key.len() + 1..])
|
||||||
|
} else {
|
||||||
|
Some(value)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns the indices of the object, excluding the key value pairs.
|
||||||
|
pub fn without_pairs(&self) -> RangeSet<usize> {
|
||||||
|
let mut indices = self.span.indices.clone();
|
||||||
|
for kv in &self.elems {
|
||||||
|
indices = indices.difference(&kv.span.indices);
|
||||||
|
}
|
||||||
|
indices
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Index<&str> for Object {
|
||||||
|
type Output = JsonValue;
|
||||||
|
|
||||||
|
/// Returns the value at the given key of the object.
|
||||||
|
///
|
||||||
|
/// # Panics
|
||||||
|
///
|
||||||
|
/// Panics if the key is not present.
|
||||||
|
fn index(&self, key: &str) -> &Self::Output {
|
||||||
|
self.get(key).expect("key is present")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
macro_rules! impl_type {
|
||||||
|
($ty:ident, $span:tt) => {
|
||||||
|
impl $ty {
|
||||||
|
/// Returns the span corresponding to the value.
|
||||||
|
pub fn into_span(self) -> Span<str> {
|
||||||
|
self.$span
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Shifts the span range by the given offset.
|
||||||
|
pub fn offset(&mut self, offset: usize) {
|
||||||
|
self.$span.offset(offset);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Spanned<str> for $ty {
|
||||||
|
fn span(&self) -> &Span<str> {
|
||||||
|
&self.$span
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl PartialEq<str> for $ty {
|
||||||
|
fn eq(&self, other: &str) -> bool {
|
||||||
|
self.$span == other
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl PartialEq<$ty> for str {
|
||||||
|
fn eq(&self, other: &$ty) -> bool {
|
||||||
|
self == &other.$span
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl PartialEq<&str> for $ty {
|
||||||
|
fn eq(&self, other: &&str) -> bool {
|
||||||
|
self.$span == *other
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl PartialEq<$ty> for &str {
|
||||||
|
fn eq(&self, other: &$ty) -> bool {
|
||||||
|
*self == &other.$span
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl PartialEq<Range<usize>> for $ty {
|
||||||
|
fn eq(&self, other: &Range<usize>) -> bool {
|
||||||
|
&self.$span == other
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl PartialEq<$ty> for Range<usize> {
|
||||||
|
fn eq(&self, other: &$ty) -> bool {
|
||||||
|
self == &other.$span
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl PartialEq<Span<str>> for $ty {
|
||||||
|
fn eq(&self, other: &Span<str>) -> bool {
|
||||||
|
&self.$span == other
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl PartialEq<$ty> for Span<str> {
|
||||||
|
fn eq(&self, other: &$ty) -> bool {
|
||||||
|
self == &other.$span
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
impl_type!(JsonKey, 0);
|
||||||
|
impl_type!(Null, 0);
|
||||||
|
impl_type!(Bool, 0);
|
||||||
|
impl_type!(Number, 0);
|
||||||
|
impl_type!(String, 0);
|
||||||
|
impl_type!(Array, span);
|
||||||
|
impl_type!(Object, span);
|
||||||
|
impl_type!(KeyValue, span);
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use utils::range::IndexRanges;
|
||||||
|
|
||||||
|
use crate::json::parse_str;
|
||||||
|
|
||||||
|
use super::*;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_obj_index() {
|
||||||
|
let src = "{\"foo\": \"bar\"}";
|
||||||
|
|
||||||
|
let value = parse_str(src).unwrap();
|
||||||
|
|
||||||
|
assert_eq!(value.get("foo").unwrap().span(), "bar");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_array_index() {
|
||||||
|
let src = "{\"foo\": [42, 14]}";
|
||||||
|
|
||||||
|
let value = parse_str(src).unwrap();
|
||||||
|
|
||||||
|
assert_eq!(value.get("foo.1").unwrap().span(), "14");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_nested_index() {
|
||||||
|
let src = "{\"foo\": {\"bar\": [42, 14]}}";
|
||||||
|
|
||||||
|
let value = parse_str(src).unwrap();
|
||||||
|
|
||||||
|
assert_eq!(value.get("foo.bar.1").unwrap().span(), "14");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_key_value_without_value() {
|
||||||
|
let src = "{\"foo\": \"bar\"\n}";
|
||||||
|
|
||||||
|
let JsonValue::Object(value) = parse_str(src).unwrap() else {
|
||||||
|
panic!("expected object");
|
||||||
|
};
|
||||||
|
|
||||||
|
let indices = value.elems[0].without_value();
|
||||||
|
|
||||||
|
assert_eq!(src.index_ranges(&indices), "\"foo\": \"\"");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_array_without_values() {
|
||||||
|
let src = "[42, 14]";
|
||||||
|
|
||||||
|
let JsonValue::Array(value) = parse_str(src).unwrap() else {
|
||||||
|
panic!("expected object");
|
||||||
|
};
|
||||||
|
|
||||||
|
let indices = value.without_values();
|
||||||
|
|
||||||
|
assert_eq!(src.index_ranges(&indices), "[]");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_object_without_pairs() {
|
||||||
|
let src = "{\"foo\": \"bar\"\n}";
|
||||||
|
|
||||||
|
let JsonValue::Object(value) = parse_str(src).unwrap() else {
|
||||||
|
panic!("expected object");
|
||||||
|
};
|
||||||
|
|
||||||
|
let indices = value.without_pairs();
|
||||||
|
|
||||||
|
assert_eq!(src.index_ranges(&indices), "{\n}");
|
||||||
|
}
|
||||||
|
}
|
||||||
84
spansy/src/json/visit.rs
Normal file
84
spansy/src/json/visit.rs
Normal file
@@ -0,0 +1,84 @@
|
|||||||
|
use super::{types, types::JsonValue};
|
||||||
|
|
||||||
|
/// A visitor for JSON values.
|
||||||
|
///
|
||||||
|
/// # Example
|
||||||
|
///
|
||||||
|
/// ```
|
||||||
|
/// use spansy::json::{parse_str, Number, JsonVisit};
|
||||||
|
/// use spansy::Spanned;
|
||||||
|
///
|
||||||
|
/// struct DigitReplacer<'a, 'b> {
|
||||||
|
/// src: &'a mut String,
|
||||||
|
/// digit: &'b str,
|
||||||
|
/// }
|
||||||
|
///
|
||||||
|
/// impl<'a> JsonVisit for DigitReplacer<'a, '_> {
|
||||||
|
/// fn visit_number(&mut self, node: &Number) {
|
||||||
|
/// let span = node.span();
|
||||||
|
/// for range in span.indices().iter_ranges() {
|
||||||
|
/// let replacement = self.digit.repeat(range.len());
|
||||||
|
/// self.src.replace_range(range, &replacement);
|
||||||
|
/// }
|
||||||
|
/// }
|
||||||
|
/// }
|
||||||
|
///
|
||||||
|
/// let src = "{\"foo\": [42, 69]}";
|
||||||
|
///
|
||||||
|
/// let value = parse_str(src).unwrap();
|
||||||
|
///
|
||||||
|
/// let mut new = src.to_string();
|
||||||
|
///
|
||||||
|
/// // Replace the digits of all numbers with 9.
|
||||||
|
/// DigitReplacer { src: &mut new, digit: "9" }.visit_value(&value);
|
||||||
|
///
|
||||||
|
/// assert_eq!(new, "{\"foo\": [99, 99]}");
|
||||||
|
/// ```
|
||||||
|
pub trait JsonVisit {
|
||||||
|
/// Visit a key value pair in a JSON object.
|
||||||
|
fn visit_key_value(&mut self, node: &types::KeyValue) {
|
||||||
|
self.visit_key(&node.key);
|
||||||
|
self.visit_value(&node.value);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Visit a key in a JSON object.
|
||||||
|
fn visit_key(&mut self, _node: &types::JsonKey) {}
|
||||||
|
|
||||||
|
/// Visit a JSON value.
|
||||||
|
fn visit_value(&mut self, node: &JsonValue) {
|
||||||
|
match node {
|
||||||
|
JsonValue::Null(value) => self.visit_null(value),
|
||||||
|
JsonValue::Bool(value) => self.visit_bool(value),
|
||||||
|
JsonValue::Number(value) => self.visit_number(value),
|
||||||
|
JsonValue::String(value) => self.visit_string(value),
|
||||||
|
JsonValue::Array(value) => self.visit_array(value),
|
||||||
|
JsonValue::Object(value) => self.visit_object(value),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Visit an array value.
|
||||||
|
fn visit_array(&mut self, node: &types::Array) {
|
||||||
|
for elem in &node.elems {
|
||||||
|
self.visit_value(elem);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Visit an object value.
|
||||||
|
fn visit_object(&mut self, node: &types::Object) {
|
||||||
|
for kv in &node.elems {
|
||||||
|
self.visit_key_value(kv);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Visit a null value.
|
||||||
|
fn visit_null(&mut self, _node: &types::Null) {}
|
||||||
|
|
||||||
|
/// Visit a boolean value.
|
||||||
|
fn visit_bool(&mut self, _node: &types::Bool) {}
|
||||||
|
|
||||||
|
/// Visit a number value.
|
||||||
|
fn visit_number(&mut self, _node: &types::Number) {}
|
||||||
|
|
||||||
|
/// Visit a string value.
|
||||||
|
fn visit_string(&mut self, _node: &types::String) {}
|
||||||
|
}
|
||||||
305
spansy/src/lib.rs
Normal file
305
spansy/src/lib.rs
Normal file
@@ -0,0 +1,305 @@
|
|||||||
|
//! Parsing span information.
|
||||||
|
|
||||||
|
#![deny(missing_docs, unreachable_pub, unused_must_use)]
|
||||||
|
#![deny(clippy::all)]
|
||||||
|
|
||||||
|
use std::{fmt::Debug, marker::PhantomData, ops::Range};
|
||||||
|
|
||||||
|
use bytes::Bytes;
|
||||||
|
|
||||||
|
pub(crate) mod helpers;
|
||||||
|
pub mod http;
|
||||||
|
pub mod json;
|
||||||
|
|
||||||
|
use utils::range::RangeSet;
|
||||||
|
|
||||||
|
/// A parsing error.
|
||||||
|
#[derive(Debug, thiserror::Error)]
|
||||||
|
#[error("parsing error: {0}")]
|
||||||
|
pub struct ParseError(String);
|
||||||
|
|
||||||
|
impl<R: pest::RuleType> From<pest::error::Error<R>> for ParseError {
|
||||||
|
fn from(value: pest::error::Error<R>) -> Self {
|
||||||
|
Self(value.to_string())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl From<std::str::Utf8Error> for ParseError {
|
||||||
|
fn from(value: std::str::Utf8Error) -> Self {
|
||||||
|
Self(value.to_string())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// A spanned value.
|
||||||
|
pub trait Spanned<T: ?Sized = [u8]> {
|
||||||
|
/// Get a reference to the span of the value.
|
||||||
|
fn span(&self) -> &Span<T>;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// A span of a source string.
|
||||||
|
#[derive(PartialEq, Eq, Hash)]
|
||||||
|
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
|
||||||
|
pub struct Span<T: ?Sized = [u8]> {
|
||||||
|
/// The original source bytes from when the span was parsed.
|
||||||
|
pub(crate) data: Bytes,
|
||||||
|
/// The set of indices within the source data.
|
||||||
|
pub(crate) indices: RangeSet<usize>,
|
||||||
|
_pd: PhantomData<T>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Clone for Span<[u8]> {
|
||||||
|
fn clone(&self) -> Self {
|
||||||
|
Self {
|
||||||
|
data: self.data.clone(),
|
||||||
|
indices: self.indices.clone(),
|
||||||
|
_pd: PhantomData,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Clone for Span<str> {
|
||||||
|
fn clone(&self) -> Self {
|
||||||
|
Self {
|
||||||
|
data: self.data.clone(),
|
||||||
|
indices: self.indices.clone(),
|
||||||
|
_pd: PhantomData,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Debug for Span<[u8]> {
|
||||||
|
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||||
|
f.debug_struct("Span")
|
||||||
|
.field("span", &self.as_bytes())
|
||||||
|
.field("indices", &self.indices)
|
||||||
|
.finish()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Debug for Span<str> {
|
||||||
|
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||||
|
f.debug_struct("Span")
|
||||||
|
.field("span", &self.as_str())
|
||||||
|
.field("indices", &self.indices)
|
||||||
|
.finish()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<T: ?Sized> Span<T> {
|
||||||
|
/// Returns a reference to the span data.
|
||||||
|
pub fn data(&self) -> &[u8] {
|
||||||
|
self.data.as_ref()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Converts the span into bytes.
|
||||||
|
pub fn to_bytes(self) -> Bytes {
|
||||||
|
self.data
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns the indices within the source data.
|
||||||
|
pub fn indices(&self) -> &RangeSet<usize> {
|
||||||
|
&self.indices
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns the length of the span in bytes.
|
||||||
|
///
|
||||||
|
/// Just like `str::len()`, this is not necessarily the number of characters.
|
||||||
|
pub fn len(&self) -> usize {
|
||||||
|
self.indices.len()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns `true` if the span is empty.
|
||||||
|
pub fn is_empty(&self) -> bool {
|
||||||
|
self.indices.is_empty()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Shifts the span indices by the given offset.
|
||||||
|
///
|
||||||
|
/// # Panics
|
||||||
|
///
|
||||||
|
/// Panics if the offset causes the indices to overflow `usize::MAX`.
|
||||||
|
pub fn offset(&mut self, offset: usize) {
|
||||||
|
self.indices.shift_right(&offset);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Span<str> {
|
||||||
|
/// Create a new string span.
|
||||||
|
///
|
||||||
|
/// # Panics
|
||||||
|
///
|
||||||
|
/// Panics if the given range is not within the source bytes, or
|
||||||
|
/// if the span is not a valid UTF-8 string.
|
||||||
|
pub(crate) fn new_str(src: Bytes, range: Range<usize>) -> Self {
|
||||||
|
assert!(
|
||||||
|
std::str::from_utf8(&src[range.clone()]).is_ok(),
|
||||||
|
"span is not a valid UTF-8 string"
|
||||||
|
);
|
||||||
|
|
||||||
|
Self {
|
||||||
|
data: src.slice(range.clone()),
|
||||||
|
indices: range.into(),
|
||||||
|
_pd: PhantomData,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Create a new string span from a string slice.
|
||||||
|
///
|
||||||
|
/// # Panics
|
||||||
|
///
|
||||||
|
/// Panics if the given slice is not within the source bytes.
|
||||||
|
pub(crate) fn new_from_str(src: Bytes, span: &str) -> Self {
|
||||||
|
let range = helpers::get_span_range(src.as_ref(), span.as_bytes());
|
||||||
|
|
||||||
|
Self {
|
||||||
|
data: src.slice(range.clone()),
|
||||||
|
indices: range.into(),
|
||||||
|
_pd: PhantomData,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Converts this type to a string slice.
|
||||||
|
pub fn as_str(&self) -> &str {
|
||||||
|
self.as_ref()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns the corresponding byte span.
|
||||||
|
pub fn to_byte_span(&self) -> Span<[u8]> {
|
||||||
|
self.into()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl AsRef<str> for Span<str> {
|
||||||
|
fn as_ref(&self) -> &str {
|
||||||
|
// # Safety
|
||||||
|
// The span is guaranteed to be a valid UTF-8 string because it is not
|
||||||
|
// possible to create a `Span<str>` from a non-UTF-8 string.
|
||||||
|
unsafe { std::str::from_utf8_unchecked(&self.data) }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl AsRef<[u8]> for Span<str> {
|
||||||
|
fn as_ref(&self) -> &[u8] {
|
||||||
|
self.data.as_ref()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Span<[u8]> {
|
||||||
|
/// Create a new byte span.
|
||||||
|
///
|
||||||
|
/// # Panics
|
||||||
|
///
|
||||||
|
/// Panics if the given range is not within the source bytes.
|
||||||
|
pub(crate) fn new_bytes(src: Bytes, range: Range<usize>) -> Self {
|
||||||
|
assert!(src.len() >= range.end, "span is not within source bytes");
|
||||||
|
|
||||||
|
Self {
|
||||||
|
data: src.slice(range.clone()),
|
||||||
|
indices: range.into(),
|
||||||
|
_pd: PhantomData,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Converts this type to a byte slice.
|
||||||
|
pub fn as_bytes(&self) -> &[u8] {
|
||||||
|
self.as_ref()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl AsRef<[u8]> for Span<[u8]> {
|
||||||
|
fn as_ref(&self) -> &[u8] {
|
||||||
|
self.data.as_ref()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl From<Span<str>> for Span<[u8]> {
|
||||||
|
fn from(span: Span<str>) -> Self {
|
||||||
|
Self {
|
||||||
|
data: span.data,
|
||||||
|
indices: span.indices,
|
||||||
|
_pd: PhantomData,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl From<&Span<str>> for Span<[u8]> {
|
||||||
|
fn from(span: &Span<str>) -> Self {
|
||||||
|
Self {
|
||||||
|
data: span.data.clone(),
|
||||||
|
indices: span.indices.clone(),
|
||||||
|
_pd: PhantomData,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl PartialEq<Span> for [u8] {
|
||||||
|
fn eq(&self, other: &Span) -> bool {
|
||||||
|
self == other.as_ref()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl PartialEq<[u8]> for Span {
|
||||||
|
fn eq(&self, other: &[u8]) -> bool {
|
||||||
|
self.as_ref() == other
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl PartialEq<&[u8]> for Span {
|
||||||
|
fn eq(&self, other: &&[u8]) -> bool {
|
||||||
|
self.as_ref() == *other
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl PartialEq<[u8]> for &Span {
|
||||||
|
fn eq(&self, other: &[u8]) -> bool {
|
||||||
|
self.as_ref() == other
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl PartialEq<Span<str>> for str {
|
||||||
|
fn eq(&self, other: &Span<str>) -> bool {
|
||||||
|
self == other.as_str()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl PartialEq<str> for Span<str> {
|
||||||
|
fn eq(&self, other: &str) -> bool {
|
||||||
|
self.as_str() == other
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl PartialEq<&str> for Span<str> {
|
||||||
|
fn eq(&self, other: &&str) -> bool {
|
||||||
|
self.as_str() == *other
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl PartialEq<str> for &Span<str> {
|
||||||
|
fn eq(&self, other: &str) -> bool {
|
||||||
|
self.as_str() == other
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<T: ?Sized> PartialEq<Range<usize>> for Span<T> {
|
||||||
|
fn eq(&self, other: &Range<usize>) -> bool {
|
||||||
|
&self.indices == other
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<T: ?Sized> PartialEq<Span<T>> for Range<usize> {
|
||||||
|
fn eq(&self, other: &Span<T>) -> bool {
|
||||||
|
other == self
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<T: ?Sized> PartialEq<Range<usize>> for &Span<T> {
|
||||||
|
fn eq(&self, other: &Range<usize>) -> bool {
|
||||||
|
*self == other
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<T: ?Sized> PartialEq<Span<T>> for &Range<usize> {
|
||||||
|
fn eq(&self, other: &Span<T>) -> bool {
|
||||||
|
other == *self
|
||||||
|
}
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user