From 427a6cb597011e3d7165d5858d3bf9e8119aef6b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adrian=20Wo=C5=BAniak?= Date: Tue, 29 Oct 2024 17:19:10 +0100 Subject: [PATCH] Search not working --- .gitignore | 1 + src/actors/search.rs | 96 ++++++++++++++++++++++++++++++++++++++++---- src/main.rs | 57 +++++++++++++++++++++++++- 3 files changed, 145 insertions(+), 9 deletions(-) diff --git a/.gitignore b/.gitignore index 008694f..298a6a2 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ /target /migration/target node_modules +/indices diff --git a/src/actors/search.rs b/src/actors/search.rs index 4f7bb61..b73a12e 100644 --- a/src/actors/search.rs +++ b/src/actors/search.rs @@ -6,6 +6,26 @@ use tantivy::query::QueryParser; use tantivy::{doc, Index, IndexWriter, ReloadPolicy, Searcher}; use tantivy::{schema::*, TantivyError}; +#[derive(Debug, Clone, derive_more::Deref)] +pub struct Search(pub Addr); + +#[derive(Debug, Clone)] +pub struct RecipeRecord { + pub id: u64, + pub title: String, + pub summary: Option, +} + +impl From for RecipeRecord { + fn from(value: crate::entities::recipies::Model) -> Self { + Self { + id: value.id as u64, + title: value.title, + summary: value.summary, + } + } +} + pub struct Inner { writer: IndexWriter, schema: Schema, @@ -18,13 +38,17 @@ pub struct SearchEngine(Arc>); impl SearchEngine { pub fn build() -> Result { let index_path = std::path::Path::new("./indices"); + std::fs::create_dir_all(&index_path).expect("Failed to create indices directory"); + let mut schema_builder = Schema::builder(); schema_builder.add_u64_field("id", INDEXED); schema_builder.add_text_field("title", TEXT); schema_builder.add_text_field("summary", TEXT); let schema = schema_builder.build(); - let index = Index::create_in_dir(&index_path, schema.clone())?; + let index = Index::create_in_dir(&index_path, schema.clone()) + .or_else(|_| Index::open_in_dir(&index_path)) + .expect("Failed to construct indices directory"); let index_writer: IndexWriter = index.writer(50_000_000)?; let reader = index @@ -50,9 +74,7 @@ impl actix::Actor for SearchEngine { #[derive(Debug, Message)] #[rtype(result = "Result")] pub struct CreateRecipe { - id: u64, - title: String, - summary: String, + pub record: RecipeRecord, } impl Handler for SearchEngine { @@ -68,10 +90,11 @@ impl Handler for SearchEngine { let title = shared.schema.get_field("summary").unwrap(); let summary = shared.schema.get_field("summary").unwrap(); + let msg = msg.record; let n = shared.writer.add_document(doc! { id => msg.id, title => msg.title, - summary => msg.summary, + summary => msg.summary.unwrap_or_default(), })?; shared.writer.commit()?; @@ -85,7 +108,7 @@ impl Handler for SearchEngine { #[derive(Debug, Message)] #[rtype(result = "Result,TantivyError>")] pub struct Find { - query: String, + pub query: String, } impl Handler for SearchEngine { @@ -102,19 +125,38 @@ impl Handler for SearchEngine { let summary = shared.schema.get_field("summary").unwrap(); let query_parser = QueryParser::for_index(&shared.index, vec![title, summary]); - let query = query_parser.parse_query(&msg.query)?; + let query = msg + .query + .split_whitespace() + .map(|piece| { + piece + .chars() + .filter(|c| c.is_alphabetic()) + .collect::() + }) + .filter(|s| !s.trim().is_empty()) + .collect::>() + .join(" OR "); + tracing::debug!("Query is: {query:?}"); + let query = query_parser.parse_query(&query).expect("invalid query"); let rows = shared.searcher.search(&query, &TopDocs::with_limit(100))?; let ids = rows .into_iter() .filter_map(|row| { + tracing::debug!("tantivy row: {row:?}"); let doc: Option = shared.searcher.doc(row.1).ok(); doc }) .fold(Vec::with_capacity(1_000), |agg, doc| { + let json = doc.to_json(&shared.schema); + tracing::debug!("tantivy doc: {doc:?} {json:?}"); doc.get_all(id) - .filter_map(|id| id.as_u64()) + .filter_map(|id| { + tracing::debug!("tantivy id: {id:?}"); + id.as_u64() + }) .fold(agg, |mut agg, id| { agg.push(id); agg @@ -127,3 +169,41 @@ impl Handler for SearchEngine { ) } } + +#[derive(Debug, Message)] +#[rtype(result = "Result<(), TantivyError>")] +pub struct Refresh { + pub records: Vec, +} + +impl Handler for SearchEngine { + type Result = actix::ResponseActFuture>; + + fn handle(&mut self, msg: Refresh, _ctx: &mut Self::Context) -> Self::Result { + let inner = self.0.clone(); + Box::pin( + async move { + let mut shared = inner.lock().unwrap(); + + let id = shared.schema.get_field("id").unwrap(); + let title = shared.schema.get_field("summary").unwrap(); + let summary = shared.schema.get_field("summary").unwrap(); + + shared.writer.delete_all_documents()?; + + for msg in msg.records { + tracing::debug!("creating search index for {msg:?}"); + let _n = shared.writer.add_document(doc! { + id => msg.id, + title => msg.title, + summary => msg.summary.unwrap_or_default(), + })?; + } + shared.writer.commit()?; + + Ok(()) + } + .into_actor(self), + ) + } +} diff --git a/src/main.rs b/src/main.rs index dcbfbba..e5cb35c 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,3 +1,4 @@ +use actix::Actor; use actix_files::Files; use actix_identity::IdentityMiddleware; use actix_session::{storage::RedisSessionStore, SessionMiddleware}; @@ -15,6 +16,30 @@ pub mod types; const SESSION_KEY: &'static str = "session"; +#[derive(serde::Deserialize)] +struct Q { + q: String, +} +#[actix_web::get("/s")] +async fn test_search( + q: actix_web::web::Query, + search: Data, +) -> actix_web::HttpResponse { + let res = search + .send(crate::actors::search::Find { + query: q.into_inner().q, + }) + .await; + tracing::debug!("search res: {res:?}"); + actix_web::HttpResponse::Ok().body( + serde_json::to_string(&match res { + Ok(Ok(res)) => res, + _ => Vec::new(), + }) + .unwrap_or_default(), + ) +} + #[actix_web::main] async fn main() { let _ = tracing_subscriber::fmt::init(); @@ -55,10 +80,38 @@ async fn main() { } }; drop(secret); - tracing::info!("{:?}", secret_key.master()); + tracing::debug!("{:?}", secret_key.master()); let redis_store = RedisSessionStore::new(redis_url.as_str()).await.unwrap(); + let search = { + let search_addr = crate::actors::search::SearchEngine::build() + .unwrap() + .start(); + use crate::actors::search::*; + use sea_orm::prelude::*; + let records = entities::prelude::Recipies::find() + .all(&db) + .await + .unwrap_or_default() + .into_iter() + .map(|recipe| RecipeRecord::from(recipe)) + .collect::>(); + let title = records + .first() + .map(|rec| rec.title.clone()) + .unwrap_or_else(|| "test".into()); + let _ = search_addr.send(Refresh { records }).await; + let res = search_addr + .send(Find { query: title }) + .await + .expect("Search can't fail") + .expect("Must find something"); + tracing::debug!("Test search result: {res:?}"); + Search(search_addr) + }; + // Transform to data + let search = Data::new(search); let admins = Data::new(admins); let db = Data::new(db); let redis = Data::new(redis); @@ -80,7 +133,9 @@ async fn main() { .app_data(admins.clone()) .app_data(db.clone()) .app_data(redis.clone()) + .app_data(search.clone()) .service(Files::new("/pages", "./pages")) + .service(test_search) .configure(routes::configure) }) .bind(&bind)