diff --git a/crates/store/src/search/query.rs b/crates/store/src/search/query.rs index b0dfc753..0b3e4c9a 100644 --- a/crates/store/src/search/query.rs +++ b/crates/store/src/search/query.rs @@ -53,214 +53,223 @@ impl Store { .details("Account ID must be specified before other filters")); } - #[cfg(feature = "test_mode")] - { - if query.filters.len() == 1 { - state.bm = Some(mask.clone()); - } - } + let mut results; - let mut filters = query.filters.into_iter().peekable(); - while let Some(filter) = filters.next() { - let mut result = match filter { - SearchFilter::Operator { field, op, value } => { - if matches!(field, SearchField::AccountId) { - continue; - } + if query.filters.len() > 1 { + let mut filters = query.filters.into_iter().peekable(); + while let Some(filter) = filters.next() { + let mut result = match filter { + SearchFilter::Operator { field, op, value } => { + if matches!(field, SearchField::AccountId) { + continue; + } - if field.is_text() - && matches!(op, SearchOperator::Contains | SearchOperator::Equal) - { - let (value, language) = match value { - SearchValue::Text { value, language } => (value, language), - _ => { - return Err(trc::StoreEvent::UnexpectedError - .into_err() - .details("Expected text value for text field")); - } - }; - - if op == SearchOperator::Equal { - bitmaps - .merge_bitmaps( - self, - query.index, - account_id, - language - .tokenize_text(&value, MAX_TOKEN_LENGTH) - .map(|token| CheekyHash::new(token.word.as_bytes())), - field.u8_id(), - false, - ) - .await? - } else { - let mut result = RoaringBitmap::new(); - for token in Stemmer::new(&value, language, MAX_TOKEN_LENGTH) { - let mut tokens = Vec::with_capacity(3); - tokens.push(CheekyHash::new(token.word.as_bytes())); - tokens.push(CheekyHash::new(format!("{}*", token.word).as_bytes())); - if let Some(stemmed_word) = token.stemmed_word { - tokens.push(CheekyHash::new( - format!("{stemmed_word}*").as_bytes(), - )); + if field.is_text() + && matches!(op, SearchOperator::Contains | SearchOperator::Equal) + { + let (value, language) = match value { + SearchValue::Text { value, language } => (value, language), + _ => { + return Err(trc::StoreEvent::UnexpectedError + .into_err() + .details("Expected text value for text field")); } - let union = bitmaps + }; + + if op == SearchOperator::Equal { + bitmaps .merge_bitmaps( self, query.index, account_id, - tokens.into_iter(), + language + .tokenize_text(&value, MAX_TOKEN_LENGTH) + .map(|token| CheekyHash::new(token.word.as_bytes())), + field.u8_id(), + false, + ) + .await? + } else { + let mut result = RoaringBitmap::new(); + for token in Stemmer::new(&value, language, MAX_TOKEN_LENGTH) { + let mut tokens = Vec::with_capacity(3); + tokens.push(CheekyHash::new(token.word.as_bytes())); + tokens.push(CheekyHash::new( + format!("{}*", token.word).as_bytes(), + )); + if let Some(stemmed_word) = token.stemmed_word { + tokens.push(CheekyHash::new( + format!("{stemmed_word}*").as_bytes(), + )); + } + let union = bitmaps + .merge_bitmaps( + self, + query.index, + account_id, + tokens.into_iter(), + field.u8_id(), + true, + ) + .await?; + if let Some(union) = union { + if result.is_empty() { + result = union; + } else { + result.bitand_assign(&union); + if result.is_empty() { + break; + } + } + } else { + result.clear(); + break; + } + } + if !result.is_empty() { + Some(result) + } else { + None + } + } + } else if field.is_json() { + let (key, value) = match value { + SearchValue::KeyValues(kv) => kv.into_iter().next().unwrap(), + _ => { + return Err(trc::StoreEvent::UnexpectedError + .into_err() + .details("Expected text value for text field")); + } + }; + + if !value.is_empty() { + bitmaps + .merge_bitmaps( + self, + query.index, + account_id, + SpaceTokenizer::new(value.as_str(), MAX_TOKEN_LENGTH).map( + |value| { + CheekyHash::new(format!("{key} {value}").as_bytes()) + }, + ), field.u8_id(), true, ) - .await?; - if let Some(union) = union { - if result.is_empty() { - result = union; - } else { - result.bitand_assign(&union); - if result.is_empty() { - break; - } - } - } else { - result.clear(); - break; - } - } - if !result.is_empty() { - Some(result) + .await? } else { - None + bitmaps + .merge_bitmaps( + self, + query.index, + account_id, + [CheekyHash::new(key.as_bytes())].into_iter(), + field.u8_id(), + false, + ) + .await? } - } - } else if field.is_json() { - let (key, value) = match value { - SearchValue::KeyValues(kv) => kv.into_iter().next().unwrap(), - _ => { - return Err(trc::StoreEvent::UnexpectedError - .into_err() - .details("Expected text value for text field")); - } - }; + } else if field.is_indexed() { + let value = match value { + SearchValue::Text { value, .. } => { + let mut value = value.into_bytes(); + value.truncate(SEARCH_INDEX_MAX_FIELD_LEN); + value + } + SearchValue::Int(v) => (v as u64).to_be_bytes().to_vec(), + SearchValue::Uint(v) => v.to_be_bytes().to_vec(), + SearchValue::Boolean(v) => vec![v as u8], + SearchValue::KeyValues(_) => { + return Err(trc::StoreEvent::UnexpectedError + .into_err() + .details("Expected non key-value for non-text field")); + } + }; - if !value.is_empty() { - bitmaps - .merge_bitmaps( - self, - query.index, - account_id, - SpaceTokenizer::new(value.as_str(), MAX_TOKEN_LENGTH).map( - |value| { - CheekyHash::new(format!("{key} {value}").as_bytes()) - }, - ), - field.u8_id(), - true, - ) - .await? - } else { - bitmaps - .merge_bitmaps( - self, - query.index, - account_id, - [CheekyHash::new(key.as_bytes())].into_iter(), - field.u8_id(), - false, - ) - .await? - } - } else if field.is_indexed() { - let value = match value { - SearchValue::Text { value, .. } => { - let mut value = value.into_bytes(); - value.truncate(SEARCH_INDEX_MAX_FIELD_LEN); - value - } - SearchValue::Int(v) => (v as u64).to_be_bytes().to_vec(), - SearchValue::Uint(v) => v.to_be_bytes().to_vec(), - SearchValue::Boolean(v) => vec![v as u8], - SearchValue::KeyValues(_) => { - return Err(trc::StoreEvent::UnexpectedError - .into_err() - .details("Expected non key-value for non-text field")); - } - }; - - range_to_bitmap(self, query.index, account_id, field.u8_id(), &value, op) + range_to_bitmap( + self, + query.index, + account_id, + field.u8_id(), + &value, + op, + ) .await? - } else { - return Err(trc::StoreEvent::UnexpectedError - .into_err() - .details(format!("Field {field:?} is not indexed"))); - } - } - SearchFilter::DocumentSet(bitmap) => Some(bitmap), - op @ (SearchFilter::And | SearchFilter::Or | SearchFilter::Not) => { - stack.push(state); - state = State { op, bm: None }; - continue; - } - SearchFilter::End => { - if let Some(prev_state) = stack.pop() { - let bm = state.bm; - state = prev_state; - bm - } else { - break; - } - } - }; - - // Apply logical operation - if let Some(dest) = &mut state.bm { - match state.op { - SearchFilter::And => { - if let Some(result) = result { - dest.bitand_assign(result); } else { - dest.clear(); + return Err(trc::StoreEvent::UnexpectedError + .into_err() + .details(format!("Field {field:?} is not indexed"))); } } - SearchFilter::Or => { - if let Some(result) = result { - dest.bitor_assign(result); + SearchFilter::DocumentSet(bitmap) => Some(bitmap), + op @ (SearchFilter::And | SearchFilter::Or | SearchFilter::Not) => { + stack.push(state); + state = State { op, bm: None }; + continue; + } + SearchFilter::End => { + if let Some(prev_state) = stack.pop() { + let bm = state.bm; + state = prev_state; + bm + } else { + break; } } - SearchFilter::Not => { - if let Some(mut result) = result { - result.bitxor_assign(&mask); - dest.bitand_assign(result); + }; + + // Apply logical operation + if let Some(dest) = &mut state.bm { + match state.op { + SearchFilter::And => { + if let Some(result) = result { + dest.bitand_assign(result); + } else { + dest.clear(); + } } + SearchFilter::Or => { + if let Some(result) = result { + dest.bitor_assign(result); + } + } + SearchFilter::Not => { + if let Some(mut result) = result { + result.bitxor_assign(&mask); + dest.bitand_assign(result); + } + } + _ => unreachable!(), } - _ => unreachable!(), + } else if let Some(result_) = &mut result { + if let SearchFilter::Not = state.op { + result_.bitxor_assign(&mask); + } + state.bm = result; + } else if let SearchFilter::Not = state.op { + state.bm = Some(mask.clone()); + } else { + state.bm = Some(RoaringBitmap::new()); } - } else if let Some(result_) = &mut result { - if let SearchFilter::Not = state.op { - result_.bitxor_assign(&mask); + + // And short circuit + if matches!(state.op, SearchFilter::And) && state.bm.as_ref().unwrap().is_empty() { + while let Some(filter) = filters.peek() { + if matches!(filter, SearchFilter::End) { + break; + } else { + filters.next(); + } + } } - state.bm = result; - } else if let SearchFilter::Not = state.op { - state.bm = Some(mask.clone()); - } else { - state.bm = Some(RoaringBitmap::new()); } - // And short circuit - if matches!(state.op, SearchFilter::And) && state.bm.as_ref().unwrap().is_empty() { - while let Some(filter) = filters.peek() { - if matches!(filter, SearchFilter::End) { - break; - } else { - filters.next(); - } - } - } + results = state.bm.unwrap_or_default(); + results.bitand_assign(&mask); + } else { + results = mask; } - let mut results = state.bm.unwrap_or_default(); - results.bitand_assign(&mask); if results.len() > 1 && !query.comparators.is_empty() { let mut comparators = Vec::with_capacity(query.comparators.len()); for comparator in query.comparators {