Skip to content

Commit b742928

Browse files
Dan GardnerTurbo87
Dan Gardner
authored andcommitted
Add API endpoint for GitHub secret alerts
1 parent 7bb3853 commit b742928

File tree

12 files changed

+525
-3
lines changed

12 files changed

+525
-3
lines changed

Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -63,11 +63,13 @@ lettre = { version = "=0.10.1", default-features = false, features = ["file-tran
6363
minijinja = "=0.27.0"
6464
moka = "=0.9.6"
6565
oauth2 = { version = "=4.3.0", default-features = false, features = ["reqwest"] }
66+
once_cell = "=1.16.0"
6667
parking_lot = "=0.12.1"
6768
prometheus = { version = "=0.13.3", default-features = false }
6869
rand = "=0.8.5"
6970
reqwest = { version = "=0.11.13", features = ["blocking", "gzip", "json"] }
7071
retry = "=2.0.0"
72+
ring = "=0.16.20"
7173
scheduled-thread-pool = "=0.2.6"
7274
semver = { version = "=1.0.14", features = ["serde"] }
7375
sentry = { version = "=0.29.1", features = ["tracing"] }
@@ -92,7 +94,6 @@ claims = "=0.7.1"
9294
conduit-test = "=0.10.0"
9395
hyper-tls = "=0.5.0"
9496
insta = { version = "=1.23.0", features = ["redactions", "yaml"] }
95-
once_cell = "=1.16.0"
9697
tokio = "=1.23.0"
9798
tower-service = "=0.3.2"
9899

src/controllers.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,7 @@ pub mod util;
7575

7676
pub mod category;
7777
pub mod crate_owner_invitation;
78+
pub mod github;
7879
pub mod keyword;
7980
pub mod krate;
8081
pub mod metrics;

src/controllers/github.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
pub mod secret_scanning;
Lines changed: 255 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,255 @@
1+
use crate::controllers::frontend_prelude::*;
2+
use crate::models::{ApiToken, User};
3+
use crate::schema::api_tokens;
4+
use crate::util::read_fill;
5+
use base64;
6+
use once_cell::sync::Lazy;
7+
use ring::signature;
8+
use serde_json as json;
9+
use std::sync::Mutex;
10+
11+
static PEM_HEADER: &str = "-----BEGIN PUBLIC KEY-----\n";
12+
static PEM_FOOTER: &str = "\n-----END PUBLIC KEY-----";
13+
14+
// Minimum number of seconds to wait before refreshing cache of GitHub's public keys
15+
static PUBLIC_KEY_CACHE_LIFETIME_SECONDS: i64 = 60 * 60 * 24; // 24 hours
16+
17+
// Cache of public keys that have been fetched from GitHub API
18+
static PUBLIC_KEY_CACHE: Lazy<Mutex<GitHubPublicKeyCache>> = Lazy::new(|| {
19+
let keys: Vec<GitHubPublicKey> = Vec::new();
20+
let cache = GitHubPublicKeyCache {
21+
keys,
22+
timestamp: None,
23+
};
24+
Mutex::new(cache)
25+
});
26+
27+
#[derive(Debug, Deserialize, Clone, Eq, Hash, PartialEq)]
28+
pub struct GitHubPublicKey {
29+
pub key_identifier: String,
30+
pub key: String,
31+
pub is_current: bool,
32+
}
33+
34+
#[derive(Debug, Deserialize)]
35+
pub struct GitHubPublicKeyList {
36+
pub public_keys: Vec<GitHubPublicKey>,
37+
}
38+
39+
#[derive(Debug, Clone)]
40+
struct GitHubPublicKeyCache {
41+
keys: Vec<GitHubPublicKey>,
42+
timestamp: Option<chrono::DateTime<chrono::Utc>>,
43+
}
44+
45+
/// Converts a PEM format ECDSA P-256 SHA-256 public key in SubjectPublicKeyInfo format into
46+
/// the Octet-String-to-Elliptic-Curve-Point format expected by ring::signature::verify
47+
fn key_from_spki(key: &GitHubPublicKey) -> Result<Vec<u8>, std::io::Error> {
48+
let start_idx = key
49+
.key
50+
.find(PEM_HEADER)
51+
.ok_or(std::io::ErrorKind::InvalidData)?;
52+
let gh_key = &key.key[(start_idx + PEM_HEADER.len())..];
53+
let end_idx = gh_key
54+
.find(PEM_FOOTER)
55+
.ok_or(std::io::ErrorKind::InvalidData)?;
56+
let gh_key = gh_key[..end_idx].replace('\n', "");
57+
let gh_key = base64::decode(gh_key)
58+
.map_err(|_| std::io::Error::from(std::io::ErrorKind::InvalidData))?;
59+
if gh_key.len() != 91 {
60+
return Err(std::io::Error::from(std::io::ErrorKind::InvalidData));
61+
}
62+
// extract the key bytes from the fixed position in the ASN.1 structure
63+
Ok(gh_key[26..91].to_vec())
64+
}
65+
66+
/// Check if cache of public keys is populated and not expired
67+
fn is_cache_valid(timestamp: Option<chrono::DateTime<chrono::Utc>>) -> bool {
68+
timestamp.is_some()
69+
&& chrono::Utc::now() - timestamp.unwrap()
70+
< chrono::Duration::seconds(PUBLIC_KEY_CACHE_LIFETIME_SECONDS)
71+
}
72+
73+
// Fetches list of public keys from GitHub API
74+
fn get_public_keys(req: &dyn RequestExt) -> Result<Vec<GitHubPublicKey>, Box<dyn AppError>> {
75+
// Return list from cache if populated and still valid
76+
if let Ok(cache) = PUBLIC_KEY_CACHE.lock() {
77+
if is_cache_valid(cache.timestamp) {
78+
return Ok(cache.keys.clone());
79+
}
80+
}
81+
// Fetch from GitHub API
82+
let app = req.app();
83+
let keys = app
84+
.github
85+
.public_keys(&app.config.gh_client_id, &app.config.gh_client_secret)
86+
.unwrap();
87+
88+
// Populate cache
89+
if let Ok(mut cache) = PUBLIC_KEY_CACHE.lock() {
90+
cache.keys = keys.clone();
91+
cache.timestamp = Some(chrono::Utc::now());
92+
}
93+
Ok(keys)
94+
}
95+
96+
/// Verifies that the GitHub signature in request headers is valid
97+
fn verify_github_signature(req: &dyn RequestExt, json: &[u8]) -> Result<(), Box<dyn AppError>> {
98+
// Read and decode request headers
99+
let headers = req.headers();
100+
let req_key_id = headers
101+
.get("GITHUB-PUBLIC-KEY-IDENTIFIER")
102+
.ok_or_else(|| bad_request("missing HTTP header: GITHUB-PUBLIC-KEY-IDENTIFIER"))?
103+
.to_str()
104+
.map_err(|e| bad_request(&format!("failed to decode HTTP header: {e:?}")))?;
105+
let sig = headers
106+
.get("GITHUB-PUBLIC-KEY-SIGNATURE")
107+
.ok_or_else(|| bad_request("missing HTTP header: GITHUB-PUBLIC-KEY-SIGNATURE"))?;
108+
let sig = base64::decode(sig)
109+
.map_err(|e| bad_request(&format!("failed to decode signature as base64: {e:?}")))?;
110+
let public_keys = get_public_keys(req)
111+
.map_err(|e| bad_request(&format!("failed to fetch GitHub public keys: {e:?}")))?;
112+
113+
for key in public_keys {
114+
if key.key_identifier == req_key_id {
115+
if !key.is_current {
116+
return Err(bad_request(&format!(
117+
"key id {req_key_id} is not a current key"
118+
)));
119+
}
120+
let key_bytes =
121+
key_from_spki(&key).map_err(|_| bad_request("cannot parse public key"))?;
122+
let gh_key =
123+
signature::UnparsedPublicKey::new(&signature::ECDSA_P256_SHA256_ASN1, &key_bytes);
124+
125+
return match gh_key.verify(json, &sig) {
126+
Ok(v) => {
127+
info!(
128+
"GitHub secret alert request validated with key id {}",
129+
key.key_identifier
130+
);
131+
Ok(v)
132+
}
133+
Err(e) => Err(bad_request(&format!("invalid signature: {e:?}"))),
134+
};
135+
}
136+
}
137+
138+
return Err(bad_request(&format!("unknown key id {req_key_id}")));
139+
}
140+
141+
#[derive(Deserialize, Serialize)]
142+
struct GitHubSecretAlert {
143+
token: String,
144+
r#type: String,
145+
url: String,
146+
source: String,
147+
}
148+
149+
/// Revokes an API token and notifies the token owner
150+
fn alert_revoke_token(
151+
req: &dyn RequestExt,
152+
alert: &GitHubSecretAlert,
153+
) -> Result<(), Box<dyn AppError>> {
154+
let conn = req.db_write()?;
155+
156+
// not using ApiToken::find_by_api_token in order to preserve last_used_at
157+
// the token field has a uniqueness constraint so get_result() should be safe to use
158+
let token: ApiToken = diesel::update(api_tokens::table)
159+
.filter(api_tokens::token.eq(alert.token.as_bytes()))
160+
.set(api_tokens::revoked.eq(true))
161+
.get_result::<ApiToken>(&*conn)?;
162+
163+
// send email notification to the token owner
164+
let user = User::find(&conn, token.user_id)?;
165+
info!(
166+
"Revoked API token '{}' for user {} ({})",
167+
alert.token, user.gh_login, user.id
168+
);
169+
match user.email(&conn)? {
170+
None => {
171+
info!(
172+
"No email address for user {} ({}), cannot send email notification",
173+
user.gh_login, user.id
174+
);
175+
Ok(())
176+
}
177+
Some(email) => req.app().emails.send_token_exposed_notification(
178+
&email,
179+
&alert.url,
180+
"GitHub",
181+
&alert.source,
182+
&token.name,
183+
),
184+
}
185+
}
186+
187+
#[derive(Deserialize, Serialize)]
188+
pub struct GitHubSecretAlertFeedback {
189+
pub token_raw: String,
190+
pub token_type: String,
191+
pub label: String,
192+
}
193+
194+
/// Handles the `POST /api/github/secret-scanning/verify` route.
195+
pub fn verify(req: &mut dyn RequestExt) -> EndpointResult {
196+
let max_size = 8192;
197+
let length = req
198+
.content_length()
199+
.ok_or_else(|| bad_request("missing header: Content-Length"))?;
200+
201+
if length > max_size {
202+
return Err(bad_request(&format!("max content length is: {max_size}")));
203+
}
204+
205+
let mut json = vec![0; length as usize];
206+
read_fill(req.body(), &mut json)?;
207+
verify_github_signature(req, &json)
208+
.map_err(|e| bad_request(&format!("failed to verify request signature: {e:?}")))?;
209+
210+
let json = String::from_utf8(json)
211+
.map_err(|e| bad_request(&format!("failed to decode request body: {e:?}")))?;
212+
let alerts: Vec<GitHubSecretAlert> = json::from_str(&json)
213+
.map_err(|e| bad_request(&format!("invalid secret alert request: {e:?}")))?;
214+
215+
let feedback: Vec<GitHubSecretAlertFeedback> = alerts
216+
.into_iter()
217+
.map(|alert| GitHubSecretAlertFeedback {
218+
token_raw: alert.token.clone(),
219+
token_type: alert.r#type.clone(),
220+
label: match alert_revoke_token(req, &alert) {
221+
Ok(()) => "true_positive".to_string(),
222+
Err(e) => {
223+
warn!(
224+
"Error revoking API token in GitHub secret alert: {} ({e:?})",
225+
alert.token
226+
);
227+
"false_positive".to_string()
228+
}
229+
},
230+
})
231+
.collect();
232+
233+
Ok(req.json(&feedback))
234+
}
235+
236+
#[cfg(test)]
237+
mod tests {
238+
use super::*;
239+
240+
#[test]
241+
fn test_is_cache_valid() {
242+
assert!(!is_cache_valid(None));
243+
assert!(!is_cache_valid(Some(
244+
chrono::Utc::now() - chrono::Duration::seconds(PUBLIC_KEY_CACHE_LIFETIME_SECONDS)
245+
)));
246+
assert!(is_cache_valid(Some(
247+
chrono::Utc::now() - chrono::Duration::seconds(PUBLIC_KEY_CACHE_LIFETIME_SECONDS - 1)
248+
)));
249+
assert!(is_cache_valid(Some(chrono::Utc::now())));
250+
// shouldn't happen, but just in case of time travel
251+
assert!(is_cache_valid(Some(
252+
chrono::Utc::now() + chrono::Duration::seconds(PUBLIC_KEY_CACHE_LIFETIME_SECONDS)
253+
)));
254+
}
255+
}

src/email.rs

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,33 @@ or go to https://{domain}/me/pending-invites to manage all of your crate ownersh
9191
self.send(email, subject, &body)
9292
}
9393

94+
/// Attempts to send an API token exposure notification email
95+
pub fn send_token_exposed_notification(
96+
&self,
97+
email: &str,
98+
url: &str,
99+
reporter: &str,
100+
source: &str,
101+
token_name: &str,
102+
) -> AppResult<()> {
103+
let subject = "Exposed API token found";
104+
let mut body = format!(
105+
"{reporter} has notified us that your crates.io API token {token_name}\n
106+
has been exposed publicly. We have revoked this token as a precaution.\n
107+
Please review your account at https://{domain} to confirm that no\n
108+
unexpected changes have been made to your settings or crates.\n
109+
\n
110+
Source type: {source}\n",
111+
domain = crate::config::domain_name()
112+
);
113+
if url.is_empty() {
114+
body.push_str("\nWe were not informed of the URL where the token was found.\n");
115+
} else {
116+
body.push_str(&format!("\nURL where the token was found: {url}\n"));
117+
}
118+
self.send(email, subject, &body)
119+
}
120+
94121
/// This is supposed to be used only during tests, to retrieve the messages stored in the
95122
/// "memory" backend. It's not cfg'd away because our integration tests need to access this.
96123
pub fn mails_in_memory(&self) -> Option<Vec<StoredEmail>> {

0 commit comments

Comments
 (0)