Skip to content

Implement ProcessCdnLog and ProcessCdnLogQueue background jobs #8036

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 10 commits into from
Feb 1, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions .env.sample
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,13 @@ export TEST_DATABASE_URL=
# Uses AWS credentials.
# export CLOUDFRONT_DISTRIBUTION=

# Configuration for the CDN log queue. You can leave these commented out if
# you're not using the CDN log queue.
# export CDN_LOG_QUEUE_ACCESS_KEY=
# export CDN_LOG_QUEUE_SECRET_KEY=
# export CDN_LOG_QUEUE_URL=
# export CDN_LOG_QUEUE_REGION=

# Upstream location of the registry index. Background jobs will push to
# this URL. The default points to a local index for development.
# Run `./script/init-local-index.sh` to initialize this repo.
Expand Down
24 changes: 24 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -52,11 +52,13 @@ async-trait = "=0.1.77"
aws-credential-types = { version = "=1.1.4", features = ["hardcoded-credentials"] }
aws-ip-ranges = "=0.90.0"
aws-sdk-cloudfront = "=1.12.0"
aws-sdk-sqs = "=1.12.0"
axum = { version = "=0.7.4", features = ["macros", "matched-path"] }
axum-extra = { version = "=0.9.2", features = ["cookie-signed", "typed-header"] }
base64 = "=0.21.7"
bigdecimal = "=0.4.2"
cargo-manifest = "=0.13.0"
crates_io_cdn_logs = { path = "crates_io_cdn_logs" }
crates_io_env_vars = { path = "crates_io_env_vars" }
crates_io_github = { path = "crates_io_github" }
crates_io_index = { path = "crates_io_index" }
Expand Down
4 changes: 4 additions & 0 deletions src/admin/enqueue_job.rs
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ pub enum Command {
#[arg()]
name: String,
},
ProcessCdnLogQueue(jobs::ProcessCdnLogQueue),
SyncAdmins {
/// Force a sync even if one is already in progress
#[arg(long)]
Expand Down Expand Up @@ -89,6 +90,9 @@ pub fn run(command: Command) -> Result<()> {
Command::DailyDbMaintenance => {
jobs::DailyDbMaintenance.enqueue(conn)?;
}
Command::ProcessCdnLogQueue(job) => {
job.enqueue(conn)?;
}
Command::SquashIndex => {
jobs::SquashIndex.enqueue(conn)?;
}
Expand Down
4 changes: 2 additions & 2 deletions src/app.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ pub struct App {
pub github_oauth: BasicClient,

/// The server configuration
pub config: config::Server,
pub config: Arc<config::Server>,

/// Cache the `version_id` of a `canonical_crate_name:semver` pair
///
Expand Down Expand Up @@ -158,7 +158,7 @@ impl App {
instance_metrics,
balance_capacity: Default::default(),
rate_limiter: RateLimiter::new(config.rate_limiter.clone()),
config,
config: Arc::new(config),
}
}

Expand Down
1 change: 1 addition & 0 deletions src/bin/background-worker.rs
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,7 @@ fn main() -> anyhow::Result<()> {
.build_unchecked(ConnectionManager::new(db_url));

let environment = Environment::builder()
.config(Arc::new(config))
.repository_config(repository_config)
.cloudfront(cloudfront)
.fastly(fastly)
Expand Down
4 changes: 4 additions & 0 deletions src/config.rs
Original file line number Diff line number Diff line change
@@ -1,11 +1,15 @@
mod balance_capacity;
mod base;
mod cdn_log_queue;
mod cdn_log_storage;
mod database_pools;
mod sentry;
mod server;

pub use self::balance_capacity::BalanceCapacityConfig;
pub use self::base::Base;
pub use self::cdn_log_queue::CdnLogQueueConfig;
pub use self::cdn_log_storage::CdnLogStorageConfig;
pub use self::database_pools::{DatabasePools, DbPoolConfig};
pub use self::sentry::SentryConfig;
pub use self::server::Server;
33 changes: 33 additions & 0 deletions src/config/cdn_log_queue.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
use crates_io_env_vars::{required_var, var};
use secrecy::SecretString;

#[derive(Debug, Clone)]
pub enum CdnLogQueueConfig {
SQS {
access_key: String,
secret_key: SecretString,
queue_url: String,
region: String,
},
Mock,
}

impl CdnLogQueueConfig {
pub fn from_env() -> anyhow::Result<Self> {
if let Some(queue_url) = var("CDN_LOG_QUEUE_URL")? {
let access_key = required_var("CDN_LOG_QUEUE_ACCESS_KEY")?;
let secret_key = required_var("CDN_LOG_QUEUE_SECRET_KEY")?.into();
let region = required_var("CDN_LOG_QUEUE_REGION")?;

return Ok(Self::SQS {
access_key,
secret_key,
queue_url,
region,
});
}

warn!("Falling back to mocked CDN log queue");
Ok(Self::Mock)
}
}
53 changes: 53 additions & 0 deletions src/config/cdn_log_storage.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
use anyhow::Context;
use crates_io_env_vars::{required_var, var};
use secrecy::SecretString;
use std::path::PathBuf;

#[derive(Debug, Clone)]
pub enum CdnLogStorageConfig {
S3 {
access_key: String,
secret_key: SecretString,
},
Local {
path: PathBuf,
},
Memory,
}

impl CdnLogStorageConfig {
pub fn s3(access_key: String, secret_key: SecretString) -> Self {
Self::S3 {
access_key,
secret_key,
}
}

pub fn local(path: PathBuf) -> Self {
Self::Local { path }
}

pub fn memory() -> Self {
Self::Memory
}

pub fn from_env() -> anyhow::Result<Self> {
if let Some(access_key) = var("AWS_ACCESS_KEY")? {
let secret_key = required_var("AWS_SECRET_KEY")?.into();
return Ok(Self::s3(access_key, secret_key));
}

let current_dir = std::env::current_dir();
let current_dir = current_dir.context("Failed to read the current directory")?;

let path = current_dir.join("local_uploads");
let path_display = path.display();
if path.exists() {
info!("Falling back to local CDN log storage at {path_display}");
return Ok(Self::local(path));
}

warn!("Falling back to in-memory CDN log storage because {path_display} does not exist");
Ok(Self::memory())
}
}
6 changes: 6 additions & 0 deletions src/config/server.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ use crate::Env;
use super::base::Base;
use super::database_pools::DatabasePools;
use crate::config::balance_capacity::BalanceCapacityConfig;
use crate::config::cdn_log_storage::CdnLogStorageConfig;
use crate::config::CdnLogQueueConfig;
use crate::middleware::cargo_compat::StatusCodeConfig;
use crate::storage::StorageConfig;
use crates_io_env_vars::{list, list_parsed, required_var, var, var_parsed};
Expand All @@ -34,6 +36,8 @@ pub struct Server {
pub max_blocking_threads: Option<usize>,
pub db: DatabasePools,
pub storage: StorageConfig,
pub cdn_log_storage: CdnLogStorageConfig,
pub cdn_log_queue: CdnLogQueueConfig,
pub session_key: cookie::Key,
pub gh_client_id: ClientId,
pub gh_client_secret: ClientSecret,
Expand Down Expand Up @@ -172,6 +176,8 @@ impl Server {
Ok(Server {
db: DatabasePools::full_from_environment(&base)?,
storage,
cdn_log_storage: CdnLogStorageConfig::from_env()?,
cdn_log_queue: CdnLogQueueConfig::from_env()?,
base,
ip,
port,
Expand Down
1 change: 1 addition & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ mod router;
pub mod schema;
pub mod sentry;
pub mod sql;
pub mod sqs;
pub mod ssh;
pub mod storage;
pub mod tasks;
Expand Down
94 changes: 94 additions & 0 deletions src/sqs.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
use anyhow::Context;
use async_trait::async_trait;
use aws_credential_types::Credentials;
use aws_sdk_sqs::config::{BehaviorVersion, Region};
use aws_sdk_sqs::operation::receive_message::ReceiveMessageOutput;
use mockall::automock;

/// The [SqsQueue] trait defines a basic interface for interacting with an
/// AWS SQS queue.
///
/// A [MockSqsQueue] struct is automatically generated by the [automock]
/// attribute. This struct can be used in unit tests to mock the behavior of
/// the [SqsQueue] trait.
///
/// The [SqsQueueImpl] struct is the actual implementation of the trait.
#[automock]
#[async_trait]
pub trait SqsQueue {
async fn receive_messages(&self, max_messages: i32) -> anyhow::Result<ReceiveMessageOutput>;
async fn delete_message(&self, receipt_handle: &str) -> anyhow::Result<()>;
}

/// The [SqsQueueImpl] struct is the actual implementation of the [SqsQueue]
/// trait, which interacts with the real AWS API servers.
#[derive(Debug, Clone)]
pub struct SqsQueueImpl {
client: aws_sdk_sqs::Client,
queue_url: String,
}

impl SqsQueueImpl {
pub fn new(queue_url: impl Into<String>, region: Region, credentials: Credentials) -> Self {
let config = aws_sdk_sqs::Config::builder()
.credentials_provider(credentials)
.region(region)
.behavior_version(BehaviorVersion::v2023_11_09())
.build();

let client = aws_sdk_sqs::Client::from_conf(config);
let queue_url = queue_url.into();

SqsQueueImpl { client, queue_url }
}
}

#[async_trait]
impl SqsQueue for SqsQueueImpl {
async fn receive_messages(&self, max_messages: i32) -> anyhow::Result<ReceiveMessageOutput> {
let response = self
.client
.receive_message()
.max_number_of_messages(max_messages)
.queue_url(&self.queue_url)
.send()
.await
.context("Failed to receive SQS queue message")?;

Ok(response)
}

async fn delete_message(&self, receipt_handle: &str) -> anyhow::Result<()> {
self.client
.delete_message()
.receipt_handle(receipt_handle)
.queue_url(&self.queue_url)
.send()
.await
.context("Failed to delete SQS queue message")?;

Ok(())
}
}

#[cfg(test)]
mod tests {
use super::*;

#[test]
fn test_constructor() {
let credentials = Credentials::new(
"ANOTREAL",
"notrealrnrELgWzOk3IfjzDKtFBhDby",
None,
None,
"test",
);

let queue_url = "https://sqs.us-west-1.amazonaws.com/359172468976/cdn-log-event-queue";
let region = Region::new("us-west-1");

// Check that `SqsQueueImpl::new()` does not panic.
let _queue = SqsQueueImpl::new(queue_url, region, credentials);
}
}
8 changes: 7 additions & 1 deletion src/tests/util/test_app.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,10 @@ use super::{MockAnonymousUser, MockCookieUser, MockTokenUser};
use crate::util::chaosproxy::ChaosProxy;
use crate::util::github::{MockGitHubClient, MOCK_GITHUB_DATA};
use anyhow::Context;
use crates_io::config::{self, BalanceCapacityConfig, Base, DatabasePools, DbPoolConfig};
use crates_io::config::{
self, BalanceCapacityConfig, Base, CdnLogQueueConfig, CdnLogStorageConfig, DatabasePools,
DbPoolConfig,
};
use crates_io::middleware::cargo_compat::StatusCodeConfig;
use crates_io::models::token::{CrateScope, EndpointScope};
use crates_io::rate_limiter::{LimitedAction, RateLimiterConfig};
Expand Down Expand Up @@ -270,6 +273,7 @@ impl TestAppBuilder {
};

let environment = Environment::builder()
.config(app.config.clone())
.repository_config(repository_config)
.storage(app.storage.clone())
.connection_pool(app.primary_database.clone())
Expand Down Expand Up @@ -421,6 +425,8 @@ fn simple_config() -> config::Server {
max_blocking_threads: None,
db,
storage,
cdn_log_queue: CdnLogQueueConfig::Mock,
cdn_log_storage: CdnLogStorageConfig::memory(),
session_key: cookie::Key::derive_from("test this has to be over 32 bytes long".as_bytes()),
gh_client_id: ClientId::new(dotenvy::var("GH_CLIENT_ID").unwrap_or_default()),
gh_client_secret: ClientSecret::new(dotenvy::var("GH_CLIENT_SECRET").unwrap_or_default()),
Expand Down
2 changes: 2 additions & 0 deletions src/worker/environment.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@ use std::time::Instant;
#[derive(Builder)]
#[builder(pattern = "owned")]
pub struct Environment {
pub config: Arc<crate::config::Server>,

repository_config: RepositoryConfig,
#[builder(default, setter(skip))]
repository: Mutex<Option<Repository>>,
Expand Down
Loading