mirror of
https://github.com/stalwartlabs/stalwart.git
synced 2026-03-17 14:34:03 +00:00
Spam filter and index configuration updates
This commit is contained in:
6
.github/workflows/ci.yml
vendored
6
.github/workflows/ci.yml
vendored
@@ -311,7 +311,7 @@ jobs:
|
||||
- name: Build
|
||||
run: |
|
||||
rustup target add ${{matrix.target}}
|
||||
cargo build --release --target ${{matrix.target}} -p stalwart --no-default-features --features "sqlite postgres mysql rocks elastic s3 redis azure nats enterprise"
|
||||
cargo build --release --target ${{matrix.target}} -p stalwart --no-default-features --features "sqlite postgres mysql rocks s3 redis azure nats enterprise"
|
||||
cargo build --release --target ${{matrix.target}} -p stalwart-cli
|
||||
mkdir -p artifacts
|
||||
mv ./target/${{matrix.target}}/release/stalwart.exe ./artifacts/stalwart.exe
|
||||
@@ -349,14 +349,14 @@ jobs:
|
||||
# Get latest FoundationDB installer
|
||||
curl --retry 5 -Lso foundationdb.pkg "$(gh api -X GET /repos/apple/foundationdb/releases --jq '.[] | select(.prerelease == false) | .assets[] | select(.name | test("${{startsWith(matrix.target, 'x86') && 'x86_64' || 'arm64'}}" + ".pkg$")) | .browser_download_url' | head -n1)"
|
||||
sudo installer -allowUntrusted -dumplog -pkg foundationdb.pkg -target /
|
||||
cargo build --release --target ${{matrix.target}} -p stalwart --no-default-features --features "foundationdb elastic s3 redis nats enterprise"
|
||||
cargo build --release --target ${{matrix.target}} -p stalwart --no-default-features --features "foundationdb s3 redis nats enterprise"
|
||||
mkdir -p artifacts
|
||||
mv ./target/${{matrix.target}}/release/stalwart ./artifacts/stalwart-foundationdb
|
||||
|
||||
- name: Build
|
||||
run: |
|
||||
rustup target add ${{matrix.target}}
|
||||
cargo build --release --target ${{matrix.target}} -p stalwart --no-default-features --features "sqlite postgres mysql rocks elastic s3 redis azure nats enterprise"
|
||||
cargo build --release --target ${{matrix.target}} -p stalwart --no-default-features --features "sqlite postgres mysql rocks s3 redis azure nats enterprise"
|
||||
cargo build --release --target ${{matrix.target}} -p stalwart-cli
|
||||
mkdir -p artifacts
|
||||
mv ./target/${{matrix.target}}/release/stalwart ./artifacts/stalwart
|
||||
|
||||
@@ -92,7 +92,7 @@ RUN \
|
||||
--mount=type=cache,target=/usr/local/cargo/git \
|
||||
source /env-cargo && \
|
||||
if [ ! -z "${FDB_ARCH}" ]; then \
|
||||
RUSTFLAGS="-L /usr/lib" cargo chef cook --recipe-path recipe.json --zigbuild --release --target ${TARGET} -p stalwart --no-default-features --features "foundationdb elastic s3 redis nats enterprise"; \
|
||||
RUSTFLAGS="-L /usr/lib" cargo chef cook --recipe-path recipe.json --zigbuild --release --target ${TARGET} -p stalwart --no-default-features --features "foundationdb s3 redis nats enterprise"; \
|
||||
fi
|
||||
RUN \
|
||||
--mount=type=secret,id=ACTIONS_RESULTS_URL,env=ACTIONS_RESULTS_URL \
|
||||
@@ -100,7 +100,7 @@ RUN \
|
||||
--mount=type=cache,target=/usr/local/cargo/registry \
|
||||
--mount=type=cache,target=/usr/local/cargo/git \
|
||||
source /env-cargo && \
|
||||
cargo chef cook --recipe-path recipe.json --zigbuild --release --target ${TARGET} -p stalwart --no-default-features --features "sqlite postgres mysql rocks elastic s3 redis azure nats enterprise" && \
|
||||
cargo chef cook --recipe-path recipe.json --zigbuild --release --target ${TARGET} -p stalwart --no-default-features --features "sqlite postgres mysql rocks s3 redis azure nats enterprise" && \
|
||||
cargo chef cook --recipe-path recipe.json --zigbuild --release --target ${TARGET} -p stalwart-cli
|
||||
# Copy the source code
|
||||
COPY . .
|
||||
@@ -114,7 +114,7 @@ RUN \
|
||||
--mount=type=cache,target=/usr/local/cargo/git \
|
||||
source /env-cargo && \
|
||||
if [ ! -z "${FDB_ARCH}" ]; then \
|
||||
RUSTFLAGS="-L /usr/lib" cargo zigbuild --release --target ${TARGET} -p stalwart --no-default-features --features "foundationdb elastic s3 redis nats enterprise" && \
|
||||
RUSTFLAGS="-L /usr/lib" cargo zigbuild --release --target ${TARGET} -p stalwart --no-default-features --features "foundationdb s3 redis nats enterprise" && \
|
||||
mv /app/target/${TARGET}/release/stalwart /app/artifact/stalwart-foundationdb; \
|
||||
fi
|
||||
# Build generic version
|
||||
@@ -124,7 +124,7 @@ RUN \
|
||||
--mount=type=cache,target=/usr/local/cargo/registry \
|
||||
--mount=type=cache,target=/usr/local/cargo/git \
|
||||
source /env-cargo && \
|
||||
cargo zigbuild --release --target ${TARGET} -p stalwart --no-default-features --features "sqlite postgres mysql rocks elastic s3 redis azure nats enterprise" && \
|
||||
cargo zigbuild --release --target ${TARGET} -p stalwart --no-default-features --features "sqlite postgres mysql rocks s3 redis azure nats enterprise" && \
|
||||
cargo zigbuild --release --target ${TARGET} -p stalwart-cli && \
|
||||
mv /app/target/${TARGET}/release/stalwart /app/artifact/stalwart && \
|
||||
mv /app/target/${TARGET}/release/stalwart-cli /app/artifact/stalwart-cli
|
||||
|
||||
@@ -219,7 +219,7 @@ impl JmapConfig {
|
||||
let mut jmap = JmapConfig {
|
||||
default_language: Language::from_iso_639(
|
||||
config
|
||||
.value("storage.full-text.default-language")
|
||||
.value("storage.search-index.default-language")
|
||||
.unwrap_or("en"),
|
||||
)
|
||||
.unwrap_or(Language::English),
|
||||
@@ -356,7 +356,9 @@ impl JmapConfig {
|
||||
calendar_parse_max_items: config
|
||||
.property("jmap.calendar.parse.max-items")
|
||||
.unwrap_or(10),
|
||||
index_batch_size: config.property("jmap.index.batch-size").unwrap_or(100),
|
||||
index_batch_size: config
|
||||
.property("storage.search-index.batch-size")
|
||||
.unwrap_or(100),
|
||||
index_fields: AHashMap::new(),
|
||||
default_folders,
|
||||
shared_folder,
|
||||
@@ -379,14 +381,17 @@ impl JmapConfig {
|
||||
};
|
||||
|
||||
if !config
|
||||
.property_or_default::<bool>(&format!("jmap.index.{index_name}.enabled"), "true")
|
||||
.property_or_default::<bool>(
|
||||
&format!("storage.search-index.{index_name}.enabled"),
|
||||
"true",
|
||||
)
|
||||
.unwrap_or(true)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
for (_, field) in
|
||||
config.properties::<SearchField>(&format!("jmap.index.{index_name}.fields"))
|
||||
for (_, field) in config
|
||||
.properties::<SearchField>(&format!("storage.search-index.{index_name}.fields"))
|
||||
{
|
||||
fields.insert(field);
|
||||
}
|
||||
|
||||
@@ -91,6 +91,8 @@ pub struct ClassifierConfig {
|
||||
pub auto_learn_ham_score: f32,
|
||||
pub hold_samples_for: u64,
|
||||
pub train_frequency: Option<u64>,
|
||||
pub log_scale: bool,
|
||||
pub l2_normalize: bool,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Default)]
|
||||
@@ -454,7 +456,7 @@ impl ClassifierConfig {
|
||||
let ccfh = match config.value("spam-filter.classifier.model") {
|
||||
Some("ftrl-fh") | None => false,
|
||||
Some("ftrl-ccfh") => true,
|
||||
Some("disabled") => return None,
|
||||
Some("disabled" | "disable") => return None,
|
||||
Some(other) => {
|
||||
config.new_build_error(
|
||||
"spam-filter.classifier.model",
|
||||
@@ -498,11 +500,11 @@ impl ClassifierConfig {
|
||||
.unwrap_or(Duration::from_secs(180 * 24 * 60 * 60))
|
||||
.as_secs(),
|
||||
min_ham_samples: config
|
||||
.property_or_default("spam-filter.classifier.samples.min-ham", "10")
|
||||
.unwrap_or(10),
|
||||
.property_or_default("spam-filter.classifier.samples.min-ham", "100")
|
||||
.unwrap_or(100),
|
||||
min_spam_samples: config
|
||||
.property_or_default("spam-filter.classifier.samples.min-spam", "10")
|
||||
.unwrap_or(10),
|
||||
.property_or_default("spam-filter.classifier.samples.min-spam", "100")
|
||||
.unwrap_or(100),
|
||||
train_frequency: config
|
||||
.property_or_default::<Option<Duration>>(
|
||||
"spam-filter.classifier.training.frequency",
|
||||
@@ -510,6 +512,12 @@ impl ClassifierConfig {
|
||||
)
|
||||
.unwrap_or(Some(Duration::from_secs(12 * 60 * 60)))
|
||||
.map(|d| d.as_secs()),
|
||||
log_scale: config
|
||||
.property_or_default("spam-filter.classifier.features.log-scale", "true")
|
||||
.unwrap_or(true),
|
||||
l2_normalize: config
|
||||
.property_or_default("spam-filter.classifier.features.l2-normalize", "true")
|
||||
.unwrap_or(true),
|
||||
}
|
||||
.into()
|
||||
}
|
||||
|
||||
@@ -4,7 +4,13 @@
|
||||
* SPDX-License-Identifier: AGPL-3.0-only OR LicenseRef-SEL
|
||||
*/
|
||||
|
||||
use common::{Server, auth::AccessToken, config::spamfilter::SpamFilterAction, psl};
|
||||
use common::{
|
||||
Server,
|
||||
auth::AccessToken,
|
||||
config::spamfilter::SpamFilterAction,
|
||||
manager::{SPAM_CLASSIFIER_KEY, SPAM_TRAINER_KEY},
|
||||
psl,
|
||||
};
|
||||
use directory::{
|
||||
Permission,
|
||||
backend::internal::manage::{self, ManageDirectory},
|
||||
@@ -87,7 +93,7 @@ impl ManageSpamHandler for Server {
|
||||
access_token: &AccessToken,
|
||||
) -> trc::Result<HttpResponse> {
|
||||
match (path.get(1).copied(), path.get(2).copied(), req.method()) {
|
||||
(Some("sample"), Some(class @ ("ham" | "spam")), &Method::POST) => {
|
||||
(Some("upload"), Some(class @ ("ham" | "spam")), &Method::POST) => {
|
||||
// Validate the access token
|
||||
access_token.assert_has_permission(Permission::SpamFilterTrain)?;
|
||||
|
||||
@@ -166,6 +172,12 @@ impl ManageSpamHandler for Server {
|
||||
false
|
||||
}
|
||||
}
|
||||
Some("delete") => {
|
||||
for key in [SPAM_CLASSIFIER_KEY, SPAM_TRAINER_KEY] {
|
||||
self.blob_store().delete_blob(key).await?;
|
||||
}
|
||||
true
|
||||
}
|
||||
Some("status") => self.inner.ipc.train_task_controller.is_running(),
|
||||
_ => {
|
||||
return Err(trc::ResourceEvent::NotFound.into_err());
|
||||
|
||||
@@ -216,6 +216,10 @@ pub(crate) async fn migrate_blobs_v014(server: &Server) -> trc::Result<()> {
|
||||
);
|
||||
}
|
||||
OldType::Undelete { deleted_at, size } => {
|
||||
// SPDX-SnippetBegin
|
||||
// SPDX-FileCopyrightText: 2020 Stalwart Labs LLC <hello@stalw.art>
|
||||
// SPDX-License-Identifier: LicenseRef-SEL
|
||||
|
||||
#[cfg(feature = "enterprise")]
|
||||
{
|
||||
batch
|
||||
@@ -244,6 +248,8 @@ pub(crate) async fn migrate_blobs_v014(server: &Server) -> trc::Result<()> {
|
||||
.caused_by(trc::location!())?,
|
||||
);
|
||||
}
|
||||
|
||||
// SPDX-SnippetEnd
|
||||
}
|
||||
OldType::Temp => {
|
||||
batch.set(
|
||||
|
||||
@@ -124,6 +124,7 @@ pub trait FeatureBuilder {
|
||||
&self,
|
||||
features_in: &HashMap<I, f32>,
|
||||
account_id: Option<u32>,
|
||||
l2_normalize: bool,
|
||||
) -> Vec<Self::Feature> {
|
||||
let mut features_out = Vec::with_capacity(features_in.len());
|
||||
let mut buf = Vec::with_capacity(2 + 4 + 63);
|
||||
@@ -141,14 +142,16 @@ pub trait FeatureBuilder {
|
||||
}
|
||||
|
||||
// L2 normalization
|
||||
let sum_of_squares = features_out
|
||||
.iter()
|
||||
.map(|f| f.weight() as f64 * f.weight() as f64)
|
||||
.sum::<f64>();
|
||||
if sum_of_squares > 0.0 {
|
||||
let norm = sum_of_squares.sqrt() as f32;
|
||||
for feature in &mut features_out {
|
||||
*feature.weight_mut() /= norm;
|
||||
if l2_normalize {
|
||||
let sum_of_squares = features_out
|
||||
.iter()
|
||||
.map(|f| f.weight() as f64 * f.weight() as f64)
|
||||
.sum::<f64>();
|
||||
if sum_of_squares > 0.0 {
|
||||
let norm = sum_of_squares.sqrt() as f32;
|
||||
for feature in &mut features_out {
|
||||
*feature.weight_mut() /= norm;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -410,7 +410,7 @@ pub mod tests {
|
||||
}
|
||||
builder.scale(&mut sample);
|
||||
samples.push(Sample {
|
||||
features: builder.build(&sample, 12345.into()),
|
||||
features: builder.build(&sample, 12345.into(), true),
|
||||
class: if *class { 1.0 } else { 0.0 },
|
||||
});
|
||||
}
|
||||
@@ -431,7 +431,7 @@ pub mod tests {
|
||||
}
|
||||
builder.scale(&mut sample);
|
||||
samples.push(Sample {
|
||||
features: builder.build(&sample, 12345.into()),
|
||||
features: builder.build(&sample, 12345.into(), true),
|
||||
class: if *class { 1.0 } else { 0.0 },
|
||||
});
|
||||
}
|
||||
|
||||
@@ -367,16 +367,20 @@ impl SpamClassifier for Server {
|
||||
|
||||
match &task {
|
||||
TrainTask::Fh { builder, .. } => {
|
||||
builder.scale(&mut tokens);
|
||||
if config.log_scale {
|
||||
builder.scale(&mut tokens);
|
||||
}
|
||||
fh_samples.push(Sample::new(
|
||||
builder.build(&tokens, account_id),
|
||||
builder.build(&tokens, account_id, config.l2_normalize),
|
||||
sample.is_spam,
|
||||
));
|
||||
}
|
||||
TrainTask::Ccfh { builder, .. } => {
|
||||
builder.scale(&mut tokens);
|
||||
if config.log_scale {
|
||||
builder.scale(&mut tokens);
|
||||
}
|
||||
ccfh_samples.push(Sample::new(
|
||||
builder.build(&tokens, account_id),
|
||||
builder.build(&tokens, account_id, config.l2_normalize),
|
||||
sample.is_spam,
|
||||
));
|
||||
}
|
||||
@@ -558,6 +562,9 @@ impl SpamClassifier for Server {
|
||||
|
||||
async fn spam_classify(&self, ctx: &mut SpamFilterContext<'_>) -> trc::Result<()> {
|
||||
let classifier = self.inner.data.spam_classifier.load_full();
|
||||
let Some(config) = &self.core.spam.classifier else {
|
||||
return Ok(());
|
||||
};
|
||||
|
||||
let started = Instant::now();
|
||||
match classifier.as_ref() {
|
||||
@@ -566,7 +573,9 @@ impl SpamClassifier for Server {
|
||||
let mut has_prediction = false;
|
||||
let mut tokens = self.spam_build_tokens(ctx).await.0;
|
||||
let feature_builder = classifier.feature_builder();
|
||||
feature_builder.scale(&mut tokens);
|
||||
if config.log_scale {
|
||||
feature_builder.scale(&mut tokens);
|
||||
}
|
||||
|
||||
for rcpt in &ctx.input.env_rcpt_to {
|
||||
let prediction = if let Some(account_id) = self
|
||||
@@ -577,9 +586,11 @@ impl SpamClassifier for Server {
|
||||
{
|
||||
has_prediction = true;
|
||||
classifier
|
||||
.predict_proba_sample(
|
||||
&feature_builder.build(&tokens, account_id.into()),
|
||||
)
|
||||
.predict_proba_sample(&feature_builder.build(
|
||||
&tokens,
|
||||
account_id.into(),
|
||||
config.l2_normalize,
|
||||
))
|
||||
.into()
|
||||
} else {
|
||||
None
|
||||
@@ -591,8 +602,11 @@ impl SpamClassifier for Server {
|
||||
ctx.result.classifier_confidence = classifier_confidence;
|
||||
} else {
|
||||
// None of the recipients are local, default to global model prediction
|
||||
let prediction =
|
||||
classifier.predict_proba_sample(&feature_builder.build(&tokens, None));
|
||||
let prediction = classifier.predict_proba_sample(&feature_builder.build(
|
||||
&tokens,
|
||||
None,
|
||||
config.l2_normalize,
|
||||
));
|
||||
ctx.result.classifier_confidence =
|
||||
vec![prediction.into(); ctx.input.env_rcpt_to.len()];
|
||||
}
|
||||
@@ -602,7 +616,9 @@ impl SpamClassifier for Server {
|
||||
let mut has_prediction = false;
|
||||
let mut tokens = self.spam_build_tokens(ctx).await.0;
|
||||
let feature_builder = classifier.feature_builder();
|
||||
feature_builder.scale(&mut tokens);
|
||||
if config.log_scale {
|
||||
feature_builder.scale(&mut tokens);
|
||||
}
|
||||
|
||||
for rcpt in &ctx.input.env_rcpt_to {
|
||||
let prediction = if let Some(account_id) = self
|
||||
@@ -613,9 +629,11 @@ impl SpamClassifier for Server {
|
||||
{
|
||||
has_prediction = true;
|
||||
classifier
|
||||
.predict_proba_sample(
|
||||
&feature_builder.build(&tokens, account_id.into()),
|
||||
)
|
||||
.predict_proba_sample(&feature_builder.build(
|
||||
&tokens,
|
||||
account_id.into(),
|
||||
config.l2_normalize,
|
||||
))
|
||||
.into()
|
||||
} else {
|
||||
None
|
||||
@@ -627,8 +645,11 @@ impl SpamClassifier for Server {
|
||||
ctx.result.classifier_confidence = classifier_confidence;
|
||||
} else {
|
||||
// None of the recipients are local, default to global model prediction
|
||||
let prediction =
|
||||
classifier.predict_proba_sample(&feature_builder.build(&tokens, None));
|
||||
let prediction = classifier.predict_proba_sample(&feature_builder.build(
|
||||
&tokens,
|
||||
None,
|
||||
config.l2_normalize,
|
||||
));
|
||||
ctx.result.classifier_confidence =
|
||||
vec![prediction.into(); ctx.input.env_rcpt_to.len()];
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user