Spam filter and index configuration updates

This commit is contained in:
mdecimus
2025-12-15 16:30:27 +01:00
parent fd3736252d
commit 472bddf733
9 changed files with 100 additions and 45 deletions

View File

@@ -311,7 +311,7 @@ jobs:
- name: Build - name: Build
run: | run: |
rustup target add ${{matrix.target}} rustup target add ${{matrix.target}}
cargo build --release --target ${{matrix.target}} -p stalwart --no-default-features --features "sqlite postgres mysql rocks elastic s3 redis azure nats enterprise" cargo build --release --target ${{matrix.target}} -p stalwart --no-default-features --features "sqlite postgres mysql rocks s3 redis azure nats enterprise"
cargo build --release --target ${{matrix.target}} -p stalwart-cli cargo build --release --target ${{matrix.target}} -p stalwart-cli
mkdir -p artifacts mkdir -p artifacts
mv ./target/${{matrix.target}}/release/stalwart.exe ./artifacts/stalwart.exe mv ./target/${{matrix.target}}/release/stalwart.exe ./artifacts/stalwart.exe
@@ -349,14 +349,14 @@ jobs:
# Get latest FoundationDB installer # Get latest FoundationDB installer
curl --retry 5 -Lso foundationdb.pkg "$(gh api -X GET /repos/apple/foundationdb/releases --jq '.[] | select(.prerelease == false) | .assets[] | select(.name | test("${{startsWith(matrix.target, 'x86') && 'x86_64' || 'arm64'}}" + ".pkg$")) | .browser_download_url' | head -n1)" curl --retry 5 -Lso foundationdb.pkg "$(gh api -X GET /repos/apple/foundationdb/releases --jq '.[] | select(.prerelease == false) | .assets[] | select(.name | test("${{startsWith(matrix.target, 'x86') && 'x86_64' || 'arm64'}}" + ".pkg$")) | .browser_download_url' | head -n1)"
sudo installer -allowUntrusted -dumplog -pkg foundationdb.pkg -target / sudo installer -allowUntrusted -dumplog -pkg foundationdb.pkg -target /
cargo build --release --target ${{matrix.target}} -p stalwart --no-default-features --features "foundationdb elastic s3 redis nats enterprise" cargo build --release --target ${{matrix.target}} -p stalwart --no-default-features --features "foundationdb s3 redis nats enterprise"
mkdir -p artifacts mkdir -p artifacts
mv ./target/${{matrix.target}}/release/stalwart ./artifacts/stalwart-foundationdb mv ./target/${{matrix.target}}/release/stalwart ./artifacts/stalwart-foundationdb
- name: Build - name: Build
run: | run: |
rustup target add ${{matrix.target}} rustup target add ${{matrix.target}}
cargo build --release --target ${{matrix.target}} -p stalwart --no-default-features --features "sqlite postgres mysql rocks elastic s3 redis azure nats enterprise" cargo build --release --target ${{matrix.target}} -p stalwart --no-default-features --features "sqlite postgres mysql rocks s3 redis azure nats enterprise"
cargo build --release --target ${{matrix.target}} -p stalwart-cli cargo build --release --target ${{matrix.target}} -p stalwart-cli
mkdir -p artifacts mkdir -p artifacts
mv ./target/${{matrix.target}}/release/stalwart ./artifacts/stalwart mv ./target/${{matrix.target}}/release/stalwart ./artifacts/stalwart

View File

@@ -92,7 +92,7 @@ RUN \
--mount=type=cache,target=/usr/local/cargo/git \ --mount=type=cache,target=/usr/local/cargo/git \
source /env-cargo && \ source /env-cargo && \
if [ ! -z "${FDB_ARCH}" ]; then \ if [ ! -z "${FDB_ARCH}" ]; then \
RUSTFLAGS="-L /usr/lib" cargo chef cook --recipe-path recipe.json --zigbuild --release --target ${TARGET} -p stalwart --no-default-features --features "foundationdb elastic s3 redis nats enterprise"; \ RUSTFLAGS="-L /usr/lib" cargo chef cook --recipe-path recipe.json --zigbuild --release --target ${TARGET} -p stalwart --no-default-features --features "foundationdb s3 redis nats enterprise"; \
fi fi
RUN \ RUN \
--mount=type=secret,id=ACTIONS_RESULTS_URL,env=ACTIONS_RESULTS_URL \ --mount=type=secret,id=ACTIONS_RESULTS_URL,env=ACTIONS_RESULTS_URL \
@@ -100,7 +100,7 @@ RUN \
--mount=type=cache,target=/usr/local/cargo/registry \ --mount=type=cache,target=/usr/local/cargo/registry \
--mount=type=cache,target=/usr/local/cargo/git \ --mount=type=cache,target=/usr/local/cargo/git \
source /env-cargo && \ source /env-cargo && \
cargo chef cook --recipe-path recipe.json --zigbuild --release --target ${TARGET} -p stalwart --no-default-features --features "sqlite postgres mysql rocks elastic s3 redis azure nats enterprise" && \ cargo chef cook --recipe-path recipe.json --zigbuild --release --target ${TARGET} -p stalwart --no-default-features --features "sqlite postgres mysql rocks s3 redis azure nats enterprise" && \
cargo chef cook --recipe-path recipe.json --zigbuild --release --target ${TARGET} -p stalwart-cli cargo chef cook --recipe-path recipe.json --zigbuild --release --target ${TARGET} -p stalwart-cli
# Copy the source code # Copy the source code
COPY . . COPY . .
@@ -114,7 +114,7 @@ RUN \
--mount=type=cache,target=/usr/local/cargo/git \ --mount=type=cache,target=/usr/local/cargo/git \
source /env-cargo && \ source /env-cargo && \
if [ ! -z "${FDB_ARCH}" ]; then \ if [ ! -z "${FDB_ARCH}" ]; then \
RUSTFLAGS="-L /usr/lib" cargo zigbuild --release --target ${TARGET} -p stalwart --no-default-features --features "foundationdb elastic s3 redis nats enterprise" && \ RUSTFLAGS="-L /usr/lib" cargo zigbuild --release --target ${TARGET} -p stalwart --no-default-features --features "foundationdb s3 redis nats enterprise" && \
mv /app/target/${TARGET}/release/stalwart /app/artifact/stalwart-foundationdb; \ mv /app/target/${TARGET}/release/stalwart /app/artifact/stalwart-foundationdb; \
fi fi
# Build generic version # Build generic version
@@ -124,7 +124,7 @@ RUN \
--mount=type=cache,target=/usr/local/cargo/registry \ --mount=type=cache,target=/usr/local/cargo/registry \
--mount=type=cache,target=/usr/local/cargo/git \ --mount=type=cache,target=/usr/local/cargo/git \
source /env-cargo && \ source /env-cargo && \
cargo zigbuild --release --target ${TARGET} -p stalwart --no-default-features --features "sqlite postgres mysql rocks elastic s3 redis azure nats enterprise" && \ cargo zigbuild --release --target ${TARGET} -p stalwart --no-default-features --features "sqlite postgres mysql rocks s3 redis azure nats enterprise" && \
cargo zigbuild --release --target ${TARGET} -p stalwart-cli && \ cargo zigbuild --release --target ${TARGET} -p stalwart-cli && \
mv /app/target/${TARGET}/release/stalwart /app/artifact/stalwart && \ mv /app/target/${TARGET}/release/stalwart /app/artifact/stalwart && \
mv /app/target/${TARGET}/release/stalwart-cli /app/artifact/stalwart-cli mv /app/target/${TARGET}/release/stalwart-cli /app/artifact/stalwart-cli

View File

@@ -219,7 +219,7 @@ impl JmapConfig {
let mut jmap = JmapConfig { let mut jmap = JmapConfig {
default_language: Language::from_iso_639( default_language: Language::from_iso_639(
config config
.value("storage.full-text.default-language") .value("storage.search-index.default-language")
.unwrap_or("en"), .unwrap_or("en"),
) )
.unwrap_or(Language::English), .unwrap_or(Language::English),
@@ -356,7 +356,9 @@ impl JmapConfig {
calendar_parse_max_items: config calendar_parse_max_items: config
.property("jmap.calendar.parse.max-items") .property("jmap.calendar.parse.max-items")
.unwrap_or(10), .unwrap_or(10),
index_batch_size: config.property("jmap.index.batch-size").unwrap_or(100), index_batch_size: config
.property("storage.search-index.batch-size")
.unwrap_or(100),
index_fields: AHashMap::new(), index_fields: AHashMap::new(),
default_folders, default_folders,
shared_folder, shared_folder,
@@ -379,14 +381,17 @@ impl JmapConfig {
}; };
if !config if !config
.property_or_default::<bool>(&format!("jmap.index.{index_name}.enabled"), "true") .property_or_default::<bool>(
&format!("storage.search-index.{index_name}.enabled"),
"true",
)
.unwrap_or(true) .unwrap_or(true)
{ {
continue; continue;
} }
for (_, field) in for (_, field) in config
config.properties::<SearchField>(&format!("jmap.index.{index_name}.fields")) .properties::<SearchField>(&format!("storage.search-index.{index_name}.fields"))
{ {
fields.insert(field); fields.insert(field);
} }

View File

@@ -91,6 +91,8 @@ pub struct ClassifierConfig {
pub auto_learn_ham_score: f32, pub auto_learn_ham_score: f32,
pub hold_samples_for: u64, pub hold_samples_for: u64,
pub train_frequency: Option<u64>, pub train_frequency: Option<u64>,
pub log_scale: bool,
pub l2_normalize: bool,
} }
#[derive(Debug, Clone, Default)] #[derive(Debug, Clone, Default)]
@@ -454,7 +456,7 @@ impl ClassifierConfig {
let ccfh = match config.value("spam-filter.classifier.model") { let ccfh = match config.value("spam-filter.classifier.model") {
Some("ftrl-fh") | None => false, Some("ftrl-fh") | None => false,
Some("ftrl-ccfh") => true, Some("ftrl-ccfh") => true,
Some("disabled") => return None, Some("disabled" | "disable") => return None,
Some(other) => { Some(other) => {
config.new_build_error( config.new_build_error(
"spam-filter.classifier.model", "spam-filter.classifier.model",
@@ -498,11 +500,11 @@ impl ClassifierConfig {
.unwrap_or(Duration::from_secs(180 * 24 * 60 * 60)) .unwrap_or(Duration::from_secs(180 * 24 * 60 * 60))
.as_secs(), .as_secs(),
min_ham_samples: config min_ham_samples: config
.property_or_default("spam-filter.classifier.samples.min-ham", "10") .property_or_default("spam-filter.classifier.samples.min-ham", "100")
.unwrap_or(10), .unwrap_or(100),
min_spam_samples: config min_spam_samples: config
.property_or_default("spam-filter.classifier.samples.min-spam", "10") .property_or_default("spam-filter.classifier.samples.min-spam", "100")
.unwrap_or(10), .unwrap_or(100),
train_frequency: config train_frequency: config
.property_or_default::<Option<Duration>>( .property_or_default::<Option<Duration>>(
"spam-filter.classifier.training.frequency", "spam-filter.classifier.training.frequency",
@@ -510,6 +512,12 @@ impl ClassifierConfig {
) )
.unwrap_or(Some(Duration::from_secs(12 * 60 * 60))) .unwrap_or(Some(Duration::from_secs(12 * 60 * 60)))
.map(|d| d.as_secs()), .map(|d| d.as_secs()),
log_scale: config
.property_or_default("spam-filter.classifier.features.log-scale", "true")
.unwrap_or(true),
l2_normalize: config
.property_or_default("spam-filter.classifier.features.l2-normalize", "true")
.unwrap_or(true),
} }
.into() .into()
} }

View File

@@ -4,7 +4,13 @@
* SPDX-License-Identifier: AGPL-3.0-only OR LicenseRef-SEL * SPDX-License-Identifier: AGPL-3.0-only OR LicenseRef-SEL
*/ */
use common::{Server, auth::AccessToken, config::spamfilter::SpamFilterAction, psl}; use common::{
Server,
auth::AccessToken,
config::spamfilter::SpamFilterAction,
manager::{SPAM_CLASSIFIER_KEY, SPAM_TRAINER_KEY},
psl,
};
use directory::{ use directory::{
Permission, Permission,
backend::internal::manage::{self, ManageDirectory}, backend::internal::manage::{self, ManageDirectory},
@@ -87,7 +93,7 @@ impl ManageSpamHandler for Server {
access_token: &AccessToken, access_token: &AccessToken,
) -> trc::Result<HttpResponse> { ) -> trc::Result<HttpResponse> {
match (path.get(1).copied(), path.get(2).copied(), req.method()) { match (path.get(1).copied(), path.get(2).copied(), req.method()) {
(Some("sample"), Some(class @ ("ham" | "spam")), &Method::POST) => { (Some("upload"), Some(class @ ("ham" | "spam")), &Method::POST) => {
// Validate the access token // Validate the access token
access_token.assert_has_permission(Permission::SpamFilterTrain)?; access_token.assert_has_permission(Permission::SpamFilterTrain)?;
@@ -166,6 +172,12 @@ impl ManageSpamHandler for Server {
false false
} }
} }
Some("delete") => {
for key in [SPAM_CLASSIFIER_KEY, SPAM_TRAINER_KEY] {
self.blob_store().delete_blob(key).await?;
}
true
}
Some("status") => self.inner.ipc.train_task_controller.is_running(), Some("status") => self.inner.ipc.train_task_controller.is_running(),
_ => { _ => {
return Err(trc::ResourceEvent::NotFound.into_err()); return Err(trc::ResourceEvent::NotFound.into_err());

View File

@@ -216,6 +216,10 @@ pub(crate) async fn migrate_blobs_v014(server: &Server) -> trc::Result<()> {
); );
} }
OldType::Undelete { deleted_at, size } => { OldType::Undelete { deleted_at, size } => {
// SPDX-SnippetBegin
// SPDX-FileCopyrightText: 2020 Stalwart Labs LLC <hello@stalw.art>
// SPDX-License-Identifier: LicenseRef-SEL
#[cfg(feature = "enterprise")] #[cfg(feature = "enterprise")]
{ {
batch batch
@@ -244,6 +248,8 @@ pub(crate) async fn migrate_blobs_v014(server: &Server) -> trc::Result<()> {
.caused_by(trc::location!())?, .caused_by(trc::location!())?,
); );
} }
// SPDX-SnippetEnd
} }
OldType::Temp => { OldType::Temp => {
batch.set( batch.set(

View File

@@ -124,6 +124,7 @@ pub trait FeatureBuilder {
&self, &self,
features_in: &HashMap<I, f32>, features_in: &HashMap<I, f32>,
account_id: Option<u32>, account_id: Option<u32>,
l2_normalize: bool,
) -> Vec<Self::Feature> { ) -> Vec<Self::Feature> {
let mut features_out = Vec::with_capacity(features_in.len()); let mut features_out = Vec::with_capacity(features_in.len());
let mut buf = Vec::with_capacity(2 + 4 + 63); let mut buf = Vec::with_capacity(2 + 4 + 63);
@@ -141,14 +142,16 @@ pub trait FeatureBuilder {
} }
// L2 normalization // L2 normalization
let sum_of_squares = features_out if l2_normalize {
.iter() let sum_of_squares = features_out
.map(|f| f.weight() as f64 * f.weight() as f64) .iter()
.sum::<f64>(); .map(|f| f.weight() as f64 * f.weight() as f64)
if sum_of_squares > 0.0 { .sum::<f64>();
let norm = sum_of_squares.sqrt() as f32; if sum_of_squares > 0.0 {
for feature in &mut features_out { let norm = sum_of_squares.sqrt() as f32;
*feature.weight_mut() /= norm; for feature in &mut features_out {
*feature.weight_mut() /= norm;
}
} }
} }

View File

@@ -410,7 +410,7 @@ pub mod tests {
} }
builder.scale(&mut sample); builder.scale(&mut sample);
samples.push(Sample { samples.push(Sample {
features: builder.build(&sample, 12345.into()), features: builder.build(&sample, 12345.into(), true),
class: if *class { 1.0 } else { 0.0 }, class: if *class { 1.0 } else { 0.0 },
}); });
} }
@@ -431,7 +431,7 @@ pub mod tests {
} }
builder.scale(&mut sample); builder.scale(&mut sample);
samples.push(Sample { samples.push(Sample {
features: builder.build(&sample, 12345.into()), features: builder.build(&sample, 12345.into(), true),
class: if *class { 1.0 } else { 0.0 }, class: if *class { 1.0 } else { 0.0 },
}); });
} }

View File

@@ -367,16 +367,20 @@ impl SpamClassifier for Server {
match &task { match &task {
TrainTask::Fh { builder, .. } => { TrainTask::Fh { builder, .. } => {
builder.scale(&mut tokens); if config.log_scale {
builder.scale(&mut tokens);
}
fh_samples.push(Sample::new( fh_samples.push(Sample::new(
builder.build(&tokens, account_id), builder.build(&tokens, account_id, config.l2_normalize),
sample.is_spam, sample.is_spam,
)); ));
} }
TrainTask::Ccfh { builder, .. } => { TrainTask::Ccfh { builder, .. } => {
builder.scale(&mut tokens); if config.log_scale {
builder.scale(&mut tokens);
}
ccfh_samples.push(Sample::new( ccfh_samples.push(Sample::new(
builder.build(&tokens, account_id), builder.build(&tokens, account_id, config.l2_normalize),
sample.is_spam, sample.is_spam,
)); ));
} }
@@ -558,6 +562,9 @@ impl SpamClassifier for Server {
async fn spam_classify(&self, ctx: &mut SpamFilterContext<'_>) -> trc::Result<()> { async fn spam_classify(&self, ctx: &mut SpamFilterContext<'_>) -> trc::Result<()> {
let classifier = self.inner.data.spam_classifier.load_full(); let classifier = self.inner.data.spam_classifier.load_full();
let Some(config) = &self.core.spam.classifier else {
return Ok(());
};
let started = Instant::now(); let started = Instant::now();
match classifier.as_ref() { match classifier.as_ref() {
@@ -566,7 +573,9 @@ impl SpamClassifier for Server {
let mut has_prediction = false; let mut has_prediction = false;
let mut tokens = self.spam_build_tokens(ctx).await.0; let mut tokens = self.spam_build_tokens(ctx).await.0;
let feature_builder = classifier.feature_builder(); let feature_builder = classifier.feature_builder();
feature_builder.scale(&mut tokens); if config.log_scale {
feature_builder.scale(&mut tokens);
}
for rcpt in &ctx.input.env_rcpt_to { for rcpt in &ctx.input.env_rcpt_to {
let prediction = if let Some(account_id) = self let prediction = if let Some(account_id) = self
@@ -577,9 +586,11 @@ impl SpamClassifier for Server {
{ {
has_prediction = true; has_prediction = true;
classifier classifier
.predict_proba_sample( .predict_proba_sample(&feature_builder.build(
&feature_builder.build(&tokens, account_id.into()), &tokens,
) account_id.into(),
config.l2_normalize,
))
.into() .into()
} else { } else {
None None
@@ -591,8 +602,11 @@ impl SpamClassifier for Server {
ctx.result.classifier_confidence = classifier_confidence; ctx.result.classifier_confidence = classifier_confidence;
} else { } else {
// None of the recipients are local, default to global model prediction // None of the recipients are local, default to global model prediction
let prediction = let prediction = classifier.predict_proba_sample(&feature_builder.build(
classifier.predict_proba_sample(&feature_builder.build(&tokens, None)); &tokens,
None,
config.l2_normalize,
));
ctx.result.classifier_confidence = ctx.result.classifier_confidence =
vec![prediction.into(); ctx.input.env_rcpt_to.len()]; vec![prediction.into(); ctx.input.env_rcpt_to.len()];
} }
@@ -602,7 +616,9 @@ impl SpamClassifier for Server {
let mut has_prediction = false; let mut has_prediction = false;
let mut tokens = self.spam_build_tokens(ctx).await.0; let mut tokens = self.spam_build_tokens(ctx).await.0;
let feature_builder = classifier.feature_builder(); let feature_builder = classifier.feature_builder();
feature_builder.scale(&mut tokens); if config.log_scale {
feature_builder.scale(&mut tokens);
}
for rcpt in &ctx.input.env_rcpt_to { for rcpt in &ctx.input.env_rcpt_to {
let prediction = if let Some(account_id) = self let prediction = if let Some(account_id) = self
@@ -613,9 +629,11 @@ impl SpamClassifier for Server {
{ {
has_prediction = true; has_prediction = true;
classifier classifier
.predict_proba_sample( .predict_proba_sample(&feature_builder.build(
&feature_builder.build(&tokens, account_id.into()), &tokens,
) account_id.into(),
config.l2_normalize,
))
.into() .into()
} else { } else {
None None
@@ -627,8 +645,11 @@ impl SpamClassifier for Server {
ctx.result.classifier_confidence = classifier_confidence; ctx.result.classifier_confidence = classifier_confidence;
} else { } else {
// None of the recipients are local, default to global model prediction // None of the recipients are local, default to global model prediction
let prediction = let prediction = classifier.predict_proba_sample(&feature_builder.build(
classifier.predict_proba_sample(&feature_builder.build(&tokens, None)); &tokens,
None,
config.l2_normalize,
));
ctx.result.classifier_confidence = ctx.result.classifier_confidence =
vec![prediction.into(); ctx.input.env_rcpt_to.len()]; vec![prediction.into(); ctx.input.env_rcpt_to.len()];
} }