[wip] search with tantivy

This commit is contained in:
GitHub
2023-06-01 19:20:26 +08:00
parent aaeaaf4dde
commit 19491d43f5
12 changed files with 480 additions and 158 deletions

3
.gitignore vendored
View File

@@ -5,4 +5,5 @@
.rustc_info.json
.rustdoc_fingerprint.json
config.toml
/snapshots
/snapshots
/tantivy

View File

@@ -11,6 +11,7 @@ use crate::{
},
meta_handler::{handler_404, home, style},
notification::notification,
search::search,
solo::{solo, solo_delete, solo_like, solo_list, solo_post},
upload::{gallery, upload, upload_pic_post, upload_post},
user::{
@@ -104,6 +105,7 @@ pub async fn router(db: Db) -> Router {
.route("/feed/star/:item_id", get(feed_star))
.route("/feed/subscribe/:uid/:item_id", get(feed_subscribe))
.route("/feed/read/:item_id", get(feed_read))
.route("/search", get(search))
.with_state(db);
let mut router_static = Router::new()

View File

@@ -15,6 +15,7 @@ pub struct Config {
pub(crate) avatars_path: String,
pub(crate) inn_icons_path: String,
pub(crate) upload_path: String,
pub(crate) tantivy_path: String,
pub(crate) serve_dir: Vec<(String, String, String)>,
cert: String,
key: String,
@@ -40,6 +41,7 @@ impl Config {
check_path(&config.avatars_path);
check_path(&config.inn_icons_path);
check_path(&config.upload_path);
check_path(&config.tantivy_path);
config
}
@@ -59,9 +61,10 @@ impl Default for Config {
Config {
db: "freedit.db".into(),
addr: "127.0.0.1:3001".into(),
avatars_path: "./static/imgs/avatars".into(),
inn_icons_path: "./static/imgs/inn_icons".into(),
upload_path: "./static/imgs/upload".into(),
avatars_path: "static/imgs/avatars".into(),
inn_icons_path: "static/imgs/inn_icons".into(),
upload_path: "static/imgs/upload".into(),
tantivy_path: "tantivy".into(),
serve_dir: vec![],
cert: "".into(),
key: "".into(),
@@ -74,6 +77,8 @@ fn check_path(path_str: &str) {
let path = Path::new(path_str);
if !path.exists() {
fs::create_dir_all(path).unwrap();
info!("create path: {}", path_str);
} else {
info!("{path_str} is ok");
}
info!("static path {path_str}");
}

View File

@@ -632,29 +632,29 @@ pub(super) async fn update(
let content = CLIENT.get(url).send().await?.bytes().await?;
let item_links_tree = db.open_tree("item_links")?;
let tan_tree = db.open_tree("tan")?;
let mut item_ids = vec![];
let feed = match rss::Channel::read_from(&content[..]) {
Ok(rss) => {
for item in rss.items.into_iter().take(n) {
let source_item: SourceItem = item.try_into()?;
let item_id = if let Some(v) = item_links_tree.get(&source_item.link)? {
ivec_to_u32(&v)
} else {
incr_id(db, "items_count")?
};
let item = Item {
link: source_item.link,
title: source_item.title,
feed_title: rss.title.clone(),
updated: source_item.updated,
content: source_item.content,
};
if let None = item_links_tree.get(&source_item.link)? {
let item_id = incr_id(db, "items_count")?;
let item = Item {
link: source_item.link,
title: source_item.title,
feed_title: rss.title.clone(),
updated: source_item.updated,
content: source_item.content,
};
item_links_tree.insert(&item.link, u32_to_ivec(item_id))?;
set_one(db, "items", item_id, &item)?;
item_links_tree.insert(&item.link, u32_to_ivec(item_id))?;
set_one(db, "items", item_id, &item)?;
item_ids.push((item_id, item.updated));
item_ids.push((item_id, item.updated));
tan_tree.insert(format!("item{}", item_id), &[])?;
}
}
Feed {
@@ -666,22 +666,22 @@ pub(super) async fn update(
Ok(atom) => {
for entry in atom.entries.into_iter().take(n) {
let source_item: SourceItem = entry.into();
let item_id = if let Some(v) = item_links_tree.get(&source_item.link)? {
ivec_to_u32(&v)
} else {
incr_id(db, "items_count")?
};
let item = Item {
link: source_item.link,
title: source_item.title,
feed_title: atom.title.to_string(),
updated: source_item.updated,
content: source_item.content,
};
item_links_tree.insert(&item.link, u32_to_ivec(item_id))?;
set_one(db, "items", item_id, &item)?;
if let None = item_links_tree.get(&source_item.link)? {
let item_id = incr_id(db, "items_count")?;
let item = Item {
link: source_item.link,
title: source_item.title,
feed_title: atom.title.to_string(),
updated: source_item.updated,
content: source_item.content,
};
item_ids.push((item_id, item.updated));
item_links_tree.insert(&item.link, u32_to_ivec(item_id))?;
set_one(db, "items", item_id, &item)?;
item_ids.push((item_id, item.updated));
tan_tree.insert(format!("item{}", item_id), &[])?;
};
}
Feed {

View File

@@ -709,6 +709,12 @@ pub(crate) async fn edit_post_post(
User::update_stats(&db, claim.uid, "post")?;
claim.update_last_write(&db)?;
if inn.inn_type.as_str() != "Private" {
let is_update: &[u8] = if old_pid == 0 { &[] } else { &[0] };
db.open_tree("tan")?
.insert(format!("post{}", pid), is_update)?;
}
let target = format!("/post/{iid}/{pid}");
Ok(Redirect::to(&target))
}
@@ -1575,6 +1581,12 @@ pub(crate) async fn comment_post(
User::update_stats(&db, claim.uid, "comment")?;
claim.update_last_write(&db)?;
let inn: Inn = get_one(&db, "inns", iid)?;
if inn.inn_type != "Private" {
db.open_tree("tan")?
.insert(format!("comt{}#{}", pid, cid), &[])?;
}
let target = format!("/post/{iid}/{pid}");
Ok(Redirect::to(&target))
}
@@ -1643,7 +1655,10 @@ pub(crate) async fn comment_delete(
inn_add_index(&db, iid, pid, timestamp as u32, visibility)?;
let target = format!("/post/{iid}/{pid}");
db.open_tree("tan")?
.remove(format!("comt{}#{}", pid, cid))?;
let target = format!("/post/{pid}/{cid}");
Ok(Redirect::to(&target))
}

View File

@@ -14,7 +14,8 @@
//! | "user_stats" | `timestamp_uid_type` | N |
//! | "user_uploads" | `uid#img_id` | `image_hash.ext` |
//! | default | "imgs_count" | N |
//! | home_pages | `uid` | `u8` |
//! | "home_pages" | `uid` | `u8` |
//! | "tan" | `content_type#id` | `&[]`or &[0] |
//!
//! ### notification
//! | tree | key | value |
@@ -107,21 +108,24 @@ pub mod db_utils;
pub mod feed;
pub mod meta_handler;
pub mod notification;
pub mod tantivy;
pub(super) mod admin;
pub(super) mod inn;
pub(super) mod search;
pub(super) mod solo;
pub(super) mod upload;
pub(super) mod user;
mod fmt;
mod tantity;
use self::db_utils::{
get_ids_by_prefix, get_one, incr_id, ivec_to_u32, u32_to_ivec, u8_slice_to_u32,
};
use self::fmt::md2html;
use self::fmt::{md2html, ts_to_date};
use self::tantivy::{ToDoc, FIELDS};
use crate::{controller::meta_handler::into_response, error::AppError};
use ::tantivy::Document;
use bincode::config::standard;
use bincode::{Decode, Encode};
use chrono::{Days, Utc};
@@ -223,6 +227,18 @@ struct Solo {
replies: Vec<u32>,
}
impl ToDoc for Solo {
fn to_doc(&self, _id: Option<u32>) -> Document {
let mut doc = Document::default();
doc.add_text(FIELDS.id, format!("solo{}", self.sid));
doc.add_text(FIELDS.title, &self.content);
doc.add_text(FIELDS.time, ts_to_date(self.created_at));
doc.add_u64(FIELDS.uid, self.uid as u64);
doc.add_text(FIELDS.content_type, "solo");
doc
}
}
#[derive(Encode, Decode, Serialize, Debug)]
struct Inn {
iid: u32,
@@ -289,6 +305,19 @@ pub struct Post {
pub status: PostStatus,
}
impl ToDoc for Post {
fn to_doc(&self, _id: Option<u32>) -> Document {
let mut doc = Document::default();
doc.add_text(FIELDS.id, format!("post{}", self.pid));
doc.add_text(FIELDS.title, &self.title);
doc.add_text(FIELDS.time, ts_to_date(self.created_at));
doc.add_u64(FIELDS.uid, self.uid as u64);
doc.add_text(FIELDS.content, &self.content);
doc.add_text(FIELDS.content_type, "post");
doc
}
}
/// Form data: `/inn/:iid/post/:pid` post create/edit page
#[derive(Debug, Default, Deserialize, Validate, Encode, Decode)]
pub(super) struct FormPost {
@@ -314,6 +343,18 @@ struct Comment {
is_hidden: bool,
}
impl ToDoc for Comment {
fn to_doc(&self, _id: Option<u32>) -> Document {
let mut doc = Document::default();
doc.add_text(FIELDS.id, format!("comt{}#{}", self.pid, self.cid));
doc.add_text(FIELDS.title, &self.content);
doc.add_text(FIELDS.time, ts_to_date(self.created_at));
doc.add_u64(FIELDS.uid, self.uid as u64);
doc.add_text(FIELDS.content_type, "comt");
doc
}
}
#[derive(Encode, Decode, Debug)]
struct Feed {
link: String,
@@ -329,6 +370,18 @@ struct Item {
content: String,
}
impl ToDoc for Item {
fn to_doc(&self, id: Option<u32>) -> Document {
let mut doc = Document::default();
doc.add_text(FIELDS.id, format!("item{}", id.unwrap()));
doc.add_text(FIELDS.title, &self.title);
doc.add_text(FIELDS.time, ts_to_date(self.updated));
doc.add_text(FIELDS.content, &self.content);
doc.add_text(FIELDS.content_type, "item");
doc
}
}
/// Go to source code to see default value: [SiteConfig::default()]
#[derive(Serialize, Deserialize, Encode, Decode, Validate, Debug)]
pub(super) struct SiteConfig {

36
src/controller/search.rs Normal file
View File

@@ -0,0 +1,36 @@
use axum::{extract::Query, response::IntoResponse};
use serde::Deserialize;
use tantivy::{collector::TopDocs, DocAddress, Score};
use crate::error::AppError;
use super::tantivy::SEARCHER;
#[derive(Debug, Deserialize)]
pub(crate) struct ParamsSearch {
search: String,
offset: Option<usize>,
}
pub(crate) async fn search(Query(input): Query<ParamsSearch>) -> impl IntoResponse {
let offset = input.offset.unwrap_or_default();
let search = input.search.trim();
if !search.is_empty() {
let Ok(query) = SEARCHER.query_parser.parse_query(&search)else{
return AppError::Custom("Please remove special chars".to_owned()).into_response();
};
let searcher = SEARCHER.reader.searcher();
let top_docs: Vec<(Score, DocAddress)> = searcher
.search(&query, &TopDocs::with_limit(20).and_offset(offset))
.unwrap_or_default();
for (_score, doc_address) in top_docs {
let retrieved_doc = searcher.doc(doc_address).unwrap();
println!("{}", SEARCHER.schema.to_json(&retrieved_doc));
}
}
return ().into_response();
}

View File

@@ -470,6 +470,10 @@ pub(crate) async fn solo_post(
User::update_stats(&db, claim.uid, "solo")?;
claim.update_last_write(&db)?;
if visibility == 0 {
db.open_tree("tan")?.insert(format!("solo{}", sid), &[])?;
}
let target = if input.reply_to > 0 {
format!("/solo/{}", input.reply_to)
} else {
@@ -554,6 +558,8 @@ pub(crate) async fn solo_delete(
let k = [&u32_to_ivec(claim.uid), &sid_ivec].concat();
db.open_tree("user_solos")?.remove(k)?;
db.open_tree("tan")?.remove(format!("solo{}", sid))?;
let target = format!("/solo/user/{}", solo.uid);
Ok(Redirect::to(&target))
}

View File

@@ -1,117 +0,0 @@
use std::collections::HashSet;
use jieba_rs::{Jieba, TokenizeMode};
use once_cell::sync::Lazy;
use rust_stemmers::{Algorithm, Stemmer};
use tantivy::tokenizer::{
BoxTokenStream, RemoveLongFilter, TextAnalyzer, Token, TokenStream, Tokenizer,
};
use unicode_segmentation::UnicodeSegmentation;
use whichlang::detect_language;
const MULTI_LINGO_TOKENIZER: &str = "multi_lingo_tokenizer";
fn get_tokenizer() -> TextAnalyzer {
TextAnalyzer::from(MultiLingoTokenizer).filter(RemoveLongFilter::limit(30))
}
#[derive(Clone)]
struct MultiLingoTokenizer;
impl Tokenizer for MultiLingoTokenizer {
fn token_stream<'a>(&self, text: &'a str) -> BoxTokenStream<'a> {
if text.is_empty() {
return BoxTokenStream::from(MultiLingoTokenStream {
tokens: vec![],
index: 0,
});
}
let tokens = pre_tokenize_text(text);
BoxTokenStream::from(MultiLingoTokenStream { tokens, index: 0 })
}
}
struct MultiLingoTokenStream {
tokens: Vec<Token>,
index: usize,
}
impl TokenStream for MultiLingoTokenStream {
fn advance(&mut self) -> bool {
if self.index < self.tokens.len() {
self.index += 1;
true
} else {
false
}
}
fn token(&self) -> &Token {
&self.tokens[self.index - 1]
}
fn token_mut(&mut self) -> &mut Token {
&mut self.tokens[self.index - 1]
}
}
static JIEBA: Lazy<Jieba> = Lazy::new(Jieba::new);
static STEMMER_ENG: Lazy<Stemmer> = Lazy::new(|| Stemmer::create(Algorithm::English));
fn pre_tokenize_text(text: &str) -> Vec<Token> {
let mut tokens = Vec::new();
match detect_language(text) {
whichlang::Lang::Eng => {
for (idx, (offset, word)) in text.unicode_word_indices().enumerate() {
let word = word.to_lowercase();
if !STOP_WORDS_ENG.contains(&word) {
tokens.push(Token {
offset_from: offset,
offset_to: offset + word.len(),
position: idx,
text: STEMMER_ENG.stem(&word).to_string(),
position_length: 1,
});
}
}
}
whichlang::Lang::Cmn => {
let text = fast2s::convert(text);
let orig_tokens = JIEBA.tokenize(&text, TokenizeMode::Search, true);
let mut indices = text.char_indices().collect::<Vec<_>>();
indices.push((text.len(), '\0'));
for token in orig_tokens {
if !STOP_WORDS_CMN.contains(token.word) {
tokens.push(Token {
offset_from: indices[token.start].0,
offset_to: indices[token.end].0,
position: token.start,
text: token.word.to_lowercase(),
position_length: 1,
});
}
}
}
_ => todo!(),
}
tokens
}
static STOP_WORDS_ENG: Lazy<HashSet<String>> = Lazy::new(|| {
stop_words::get(stop_words::LANGUAGE::English)
.into_iter()
.collect()
});
static STOP_WORDS_CMN: Lazy<HashSet<String>> = Lazy::new(|| {
let mut set: HashSet<_> = stop_words::get(stop_words::LANGUAGE::Chinese)
.into_iter()
.collect();
set.insert(" ".to_string());
set
});

284
src/controller/tantivy.rs Normal file
View File

@@ -0,0 +1,284 @@
use std::collections::HashSet;
use bincode::config::standard;
use jieba_rs::{Jieba, TokenizeMode};
use once_cell::sync::Lazy;
use rust_stemmers::{Algorithm, Stemmer};
use sled::Db;
use tantivy::{
directory::MmapDirectory,
query::QueryParser,
schema::{
Field, IndexRecordOption, Schema, SchemaBuilder, TextFieldIndexing, TextOptions, FAST,
INDEXED, STORED, STRING,
},
tokenizer::{BoxTokenStream, Token, TokenStream, Tokenizer},
Document, Index, IndexReader, IndexWriter, Term,
};
use unicode_segmentation::UnicodeSegmentation;
use whichlang::detect_language;
use crate::{config::CONFIG, error::AppError};
use super::{
db_utils::{get_one, u32_to_ivec},
Comment, Item, Post, Solo,
};
pub(super) trait ToDoc {
fn to_doc(&self, id: Option<u32>) -> Document;
}
pub(super) static SEARCHER: Lazy<Searcher> = Lazy::new(|| Tan::get_searcher().unwrap());
pub(super) static FIELDS: Lazy<Fields> = Lazy::new(|| Tan::set_schema().1);
pub struct Tan {
writer: IndexWriter,
}
pub(super) struct Searcher {
pub(super) reader: IndexReader,
pub(super) query_parser: QueryParser,
pub(super) schema: Schema,
}
pub(super) struct Fields {
pub(super) id: Field,
pub(super) title: Field,
pub(super) time: Field,
pub(super) uid: Field,
pub(super) content: Field,
pub(super) content_type: Field,
}
impl Tan {
fn set_schema() -> (Schema, Fields) {
let mut schema_builder = SchemaBuilder::default();
let text_indexing = TextFieldIndexing::default()
.set_tokenizer(MULTI_LINGO_TOKENIZER)
.set_index_option(IndexRecordOption::WithFreqsAndPositions);
let text_options_stored = TextOptions::default()
.set_indexing_options(text_indexing.clone())
.set_stored();
let text_options_nostored = TextOptions::default().set_indexing_options(text_indexing);
let id = schema_builder.add_text_field("id", STRING | STORED);
let title = schema_builder.add_text_field("title", text_options_stored);
let time = schema_builder.add_text_field("time", STORED);
let uid = schema_builder.add_u64_field("uid", STORED | INDEXED);
let content = schema_builder.add_text_field("content", text_options_nostored);
let content_type = schema_builder.add_text_field("content_type", FAST | STRING);
let fields = Fields {
id,
title,
time,
uid,
content,
content_type,
};
let schema = schema_builder.build();
(schema, fields)
}
fn get_index() -> tantivy::Result<Index> {
let (schema, _) = Tan::set_schema();
let index = tantivy::Index::open_or_create(
MmapDirectory::open(&CONFIG.tantivy_path).unwrap(),
schema,
)?;
let tokenizer = MultiLingoTokenizer {};
index
.tokenizers()
.register(MULTI_LINGO_TOKENIZER, tokenizer);
Ok(index)
}
pub fn init() -> tantivy::Result<Self> {
let index = Tan::get_index()?;
let writer = index.writer(50 * 1024 * 1024)?;
Ok(Tan { writer })
}
fn get_searcher() -> tantivy::Result<Searcher> {
let (schema, _) = Tan::set_schema();
let index = Tan::get_index()?;
let reader = index.reader().unwrap();
let mut query_parser = QueryParser::for_index(&index, vec![FIELDS.title, FIELDS.content]);
query_parser.set_conjunction_by_default();
query_parser.set_field_boost(FIELDS.title, 2.);
Ok(Searcher {
reader,
query_parser,
schema,
})
}
/// id should be `post123` `comt45#1` `solo123` or `item123`
///
/// It just add doc to tantivy, not commit.
pub fn add_doc(&mut self, id: String, db: Db) -> Result<(), AppError> {
let content_type = &id[0..4];
let ids: Vec<_> = id[4..].split("#").collect();
let id1: u32 = ids[0].parse().unwrap();
let doc = match content_type {
"post" => {
let post: Post = get_one(&db, "posts", id1)?;
post.to_doc(None)
}
"comt" => {
let id2: u32 = ids[1].parse().unwrap();
let k = [&u32_to_ivec(id1), &u32_to_ivec(id2)].concat();
let v = db
.open_tree("post_comments")?
.get(k)?
.ok_or(AppError::NotFound)?;
let (comment, _): (Comment, usize) = bincode::decode_from_slice(&v, standard())?;
comment.to_doc(None)
}
"solo" => {
let solo: Solo = get_one(&db, "solos", id1)?;
solo.to_doc(None)
}
"item" => {
let item: Item = get_one(&db, "items", id1)?;
item.to_doc(Some(id1))
}
_ => unreachable!(),
};
self.writer.add_document(doc)?;
Ok(())
}
pub fn del_index(&mut self, id: &str) -> tantivy::Result<()> {
self.writer
.delete_term(Term::from_field_text(FIELDS.id, id));
Ok(())
}
pub fn commit(&mut self) -> tantivy::Result<()> {
self.writer.commit()?;
Ok(())
}
}
const MULTI_LINGO_TOKENIZER: &str = "multi_lingo_tokenizer";
#[derive(Clone)]
struct MultiLingoTokenizer;
impl Tokenizer for MultiLingoTokenizer {
fn token_stream<'a>(&self, text: &'a str) -> BoxTokenStream<'a> {
if text.is_empty() {
return BoxTokenStream::from(MultiLingoTokenStream {
tokens: vec![],
index: 0,
});
}
let tokens = pre_tokenize_text(text);
BoxTokenStream::from(MultiLingoTokenStream { tokens, index: 0 })
}
}
struct MultiLingoTokenStream {
tokens: Vec<Token>,
index: usize,
}
impl TokenStream for MultiLingoTokenStream {
fn advance(&mut self) -> bool {
if self.index < self.tokens.len() {
self.index += 1;
true
} else {
false
}
}
fn token(&self) -> &Token {
&self.tokens[self.index - 1]
}
fn token_mut(&mut self) -> &mut Token {
&mut self.tokens[self.index - 1]
}
}
static JIEBA: Lazy<Jieba> = Lazy::new(Jieba::new);
static STEMMER_ENG: Lazy<Stemmer> = Lazy::new(|| Stemmer::create(Algorithm::English));
fn pre_tokenize_text(text: &str) -> Vec<Token> {
let mut tokens = Vec::with_capacity(text.len() / 4);
match detect_language(text) {
whichlang::Lang::Eng => {
for (idx, (offset, word)) in text.unicode_word_indices().enumerate() {
let word = word.to_lowercase();
if !STOP_WORDS_ENG.contains(&word) && word.len() <= 30 {
tokens.push(Token {
offset_from: offset,
offset_to: offset + word.len(),
position: idx,
text: STEMMER_ENG.stem(&word).to_string(),
position_length: 1,
});
}
}
}
whichlang::Lang::Cmn => {
let text = fast2s::convert(text);
let orig_tokens = JIEBA.tokenize(&text, TokenizeMode::Search, true);
let mut indices = text.char_indices().collect::<Vec<_>>();
indices.push((text.len(), '\0'));
for token in orig_tokens {
if !STOP_WORDS_CMN.contains(token.word) && token.word.len() <= 30 {
tokens.push(Token {
offset_from: indices[token.start].0,
offset_to: indices[token.end].0,
position: token.start,
text: token.word.to_lowercase(),
position_length: 1,
});
}
}
}
_ => {
for (idx, (offset, word)) in text.unicode_word_indices().enumerate() {
let word = word.to_lowercase();
if word.len() <= 30 {
tokens.push(Token {
offset_from: offset,
offset_to: offset + word.len(),
position: idx,
text: word,
position_length: 1,
});
}
}
}
}
tokens
}
static STOP_WORDS_ENG: Lazy<HashSet<String>> = Lazy::new(|| {
stop_words::get(stop_words::LANGUAGE::English)
.into_iter()
.collect()
});
static STOP_WORDS_CMN: Lazy<HashSet<String>> = Lazy::new(|| {
let mut set: HashSet<_> = stop_words::get(stop_words::LANGUAGE::Chinese)
.into_iter()
.collect();
set.insert(" ".to_string());
set
});

View File

@@ -15,6 +15,8 @@ pub enum AppError {
IoError(#[from] std::io::Error),
#[error("You must join inn first")]
NoJoinedInn,
#[error(transparent)]
TantivyError(#[from] tantivy::TantivyError),
// 4XX
#[error("Captcha Error")]

View File

@@ -4,7 +4,9 @@ use chrono::Utc;
use freedit::{
app_router::router,
config::CONFIG,
controller::{db_utils::clear_invalid, feed::cron_feed, meta_handler::shutdown_signal},
controller::{
db_utils::clear_invalid, feed::cron_feed, meta_handler::shutdown_signal, tantivy::Tan,
},
error::AppError,
CURRENT_SHA256, GIT_COMMIT, VERSION,
};
@@ -58,7 +60,40 @@ async fn main() -> Result<(), AppError> {
if let Err(e) = clear_invalid(&db2, "user_stats").await {
error!(%e);
}
sleep_seconds(3600 * 8).await;
sleep_seconds(3600 * 4).await;
}
});
let db2 = db.clone();
tokio::spawn(async move {
// TODO:
// 1. inn feed search
// 2. rebuild whole search
let mut tan = Tan::init().unwrap();
let mut subscriber = db2.open_tree("tan").unwrap().watch_prefix(vec![]);
while let Some(event) = (&mut subscriber).await {
let db2 = db2.clone();
let (k, op_type) = match event {
sled::Event::Insert { key, value } => {
if value.len() == 1 {
(key, "update")
} else {
(key, "add")
}
}
sled::Event::Remove { key } => (key, "delete"),
};
let id = String::from_utf8_lossy(&k);
if op_type == "delete" || op_type == "update" {
tan.del_index(&id).unwrap();
}
if op_type == "update" || op_type == "add" {
tan.add_doc(id.into(), db2).unwrap();
}
tan.commit().unwrap();
}
});