1use regex::Regex;
7use std::collections::HashSet;
8use std::sync::LazyLock;
9
10use super::{ExpandError, ExpandedContent};
11use crate::config::BabyriteConfig;
12use crate::utils::language_from_extension;
13
14static GITHUB_PERMALINK_REGEX: LazyLock<Regex> = LazyLock::new(|| {
23 Regex::new(
24 r"https://github\.com/([^/]+)/([^/]+)/blob/([0-9a-f]{4,40})/([^#\s]+)(?:#L(\d+)(?:-L(\d+))?)?"
25 )
26 .unwrap()
27});
28
29#[derive(Debug)]
31pub struct GitHubPermalink {
32 pub owner: String,
34 pub repo: String,
36 pub commit: String,
38 pub path: String,
40 pub line_range: Option<LineRange>,
42}
43
44#[derive(Debug, Clone, Copy)]
46pub struct LineRange {
47 pub start: usize,
49 pub end: usize,
51}
52
53#[derive(thiserror::Error, Debug)]
55pub enum GitHubExpandError {
56 #[error("Failed to fetch raw content: {0}")]
58 Fetch(String),
59 #[error("Content exceeds size limit")]
61 ContentTooLarge,
62 #[error(transparent)]
64 Http(#[from] reqwest::Error),
65}
66
67impl GitHubPermalink {
68 pub fn parse_all(text: &str) -> Vec<GitHubPermalink> {
75 let mut seen_urls = HashSet::new();
76 GITHUB_PERMALINK_REGEX
77 .captures_iter(text)
78 .filter_map(|captures| {
79 let m = captures.get(0)?;
80 let full_url = m.as_str();
81 if m.start() > 0 && text.as_bytes()[m.start() - 1] == b'<' {
83 return None;
84 }
85 if !seen_urls.insert(full_url.to_string()) {
86 return None;
87 }
88
89 let owner = captures.get(1)?.as_str().to_string();
90 let repo = captures.get(2)?.as_str().to_string();
91 let commit = captures.get(3)?.as_str().to_string();
92 let path = captures.get(4)?.as_str().to_string();
93
94 let line_range = match (captures.get(5), captures.get(6)) {
95 (Some(start), Some(end)) => {
96 let s = start.as_str().parse().ok()?;
97 let e = end.as_str().parse().ok()?;
98 Some(LineRange { start: s, end: e })
99 }
100 (Some(start), None) => {
101 let s = start.as_str().parse().ok()?;
102 Some(LineRange { start: s, end: s })
103 }
104 _ => None,
105 };
106
107 Some(GitHubPermalink {
108 owner,
109 repo,
110 commit,
111 path,
112 line_range,
113 })
114 })
115 .take(3) .collect()
117 }
118
119 pub async fn fetch(
121 &self,
122 http_client: &reqwest::Client,
123 ) -> Result<ExpandedContent, ExpandError> {
124 let config = BabyriteConfig::get();
125 let max_lines = config.github.max_lines;
126
127 let raw_url = format!(
128 "https://raw.githubusercontent.com/{}/{}/{}/{}",
129 self.owner, self.repo, self.commit, self.path
130 );
131
132 let response = http_client
133 .get(&raw_url)
134 .send()
135 .await
136 .map_err(GitHubExpandError::Http)?;
137
138 if !response.status().is_success() {
139 return Err(GitHubExpandError::Fetch(format!(
140 "HTTP {} for {}",
141 response.status(),
142 raw_url
143 ))
144 .into());
145 }
146
147 let content_length = response.content_length().unwrap_or(0);
148 if content_length > 1_048_576 {
150 return Err(GitHubExpandError::ContentTooLarge.into());
151 }
152
153 let body = response.text().await.map_err(GitHubExpandError::Http)?;
154
155 Ok(self.build_code_block(&body, max_lines))
156 }
157
158 fn build_code_block(&self, body: &str, max_lines: usize) -> ExpandedContent {
160 let all_lines: Vec<&str> = body.lines().collect();
161 let (code, line_info) = match self.line_range {
162 Some(range) => {
163 let start = range.start.saturating_sub(1); let end = range.end.min(all_lines.len());
165 let selected: Vec<&str> = all_lines.get(start..end).unwrap_or_default().to_vec();
166
167 let (code, truncated) = truncate_lines(&selected, max_lines);
168 let info = if truncated {
169 format!(
170 "L{}-L{}, truncated to {} lines",
171 range.start, range.end, max_lines
172 )
173 } else {
174 format!("L{}-L{}", range.start, range.end)
175 };
176 (code, info)
177 }
178 None => {
179 let (code, truncated) = truncate_lines(&all_lines, max_lines);
180 let info = if truncated {
181 format!("truncated to {} lines", max_lines)
182 } else {
183 String::new()
184 };
185 (code, info)
186 }
187 };
188
189 let short_commit = &self.commit[..7.min(self.commit.len())];
190 let language = language_for_path(&self.path);
191
192 let metadata = if line_info.is_empty() {
193 format!(
194 "`{}` - {}/{}@{}",
195 self.path, self.owner, self.repo, short_commit
196 )
197 } else {
198 format!(
199 "`{}` ({}) - {}/{}@{}",
200 self.path, line_info, self.owner, self.repo, short_commit
201 )
202 };
203
204 ExpandedContent::CodeBlock {
205 language: language.to_string(),
206 code,
207 metadata,
208 }
209 }
210}
211
212fn language_for_path(path: &str) -> &str {
214 let filename = path.rsplit('/').next().unwrap_or(path);
215 match filename.rsplit_once('.') {
216 Some((_, ext)) => language_from_extension(ext),
217 None => language_from_extension(filename),
218 }
219}
220
221fn truncate_lines(lines: &[&str], max: usize) -> (String, bool) {
223 if lines.len() > max {
224 let truncated: Vec<&str> = lines[..max].to_vec();
225 (truncated.join("\n"), true)
226 } else {
227 (lines.join("\n"), false)
228 }
229}
230
231#[cfg(test)]
232mod tests {
233 use super::*;
234
235 #[test]
238 fn truncate_lines_under_limit() {
239 let lines = vec!["a", "b", "c"];
240 let (result, truncated) = truncate_lines(&lines, 5);
241 assert_eq!(result, "a\nb\nc");
242 assert!(!truncated);
243 }
244
245 #[test]
246 fn truncate_lines_at_limit() {
247 let lines = vec!["a", "b", "c"];
248 let (result, truncated) = truncate_lines(&lines, 3);
249 assert_eq!(result, "a\nb\nc");
250 assert!(!truncated);
251 }
252
253 #[test]
254 fn truncate_lines_over_limit() {
255 let lines = vec!["a", "b", "c", "d", "e"];
256 let (result, truncated) = truncate_lines(&lines, 2);
257 assert_eq!(result, "a\nb");
258 assert!(truncated);
259 }
260
261 #[test]
262 fn truncate_lines_empty() {
263 let lines: Vec<&str> = vec![];
264 let (result, truncated) = truncate_lines(&lines, 5);
265 assert_eq!(result, "");
266 assert!(!truncated);
267 }
268
269 #[test]
272 fn parse_basic_permalink() {
273 let text = "https://github.com/owner/repo/blob/abcdef1234567890abcdef1234567890abcdef12/src/main.rs";
274 let results = GitHubPermalink::parse_all(text);
275 assert_eq!(results.len(), 1);
276 assert_eq!(results[0].owner, "owner");
277 assert_eq!(results[0].repo, "repo");
278 assert_eq!(
279 results[0].commit,
280 "abcdef1234567890abcdef1234567890abcdef12"
281 );
282 assert_eq!(results[0].path, "src/main.rs");
283 assert!(results[0].line_range.is_none());
284 }
285
286 #[test]
287 fn parse_permalink_with_single_line() {
288 let text = "https://github.com/owner/repo/blob/abcd1234/src/lib.rs#L42";
289 let results = GitHubPermalink::parse_all(text);
290 assert_eq!(results.len(), 1);
291 let range = results[0].line_range.unwrap();
292 assert_eq!(range.start, 42);
293 assert_eq!(range.end, 42);
294 }
295
296 #[test]
297 fn parse_permalink_with_line_range() {
298 let text = "https://github.com/owner/repo/blob/abcd1234/src/lib.rs#L10-L20";
299 let results = GitHubPermalink::parse_all(text);
300 assert_eq!(results.len(), 1);
301 let range = results[0].line_range.unwrap();
302 assert_eq!(range.start, 10);
303 assert_eq!(range.end, 20);
304 }
305
306 #[test]
307 fn parse_rejects_branch_name() {
308 let text = "https://github.com/owner/repo/blob/main/src/lib.rs";
310 let results = GitHubPermalink::parse_all(text);
311 assert!(results.is_empty());
312 }
313
314 #[test]
315 fn parse_rejects_short_sha() {
316 let text = "https://github.com/owner/repo/blob/abc/src/lib.rs";
318 let results = GitHubPermalink::parse_all(text);
319 assert!(results.is_empty());
320 }
321
322 #[test]
323 fn parse_deduplicates_urls() {
324 let text = "https://github.com/owner/repo/blob/abcd1234/src/lib.rs \
325 https://github.com/owner/repo/blob/abcd1234/src/lib.rs";
326 let results = GitHubPermalink::parse_all(text);
327 assert_eq!(results.len(), 1);
328 }
329
330 #[test]
331 fn parse_limits_to_three() {
332 let text = "\
333 https://github.com/o/r/blob/aaaa1111/a.rs \
334 https://github.com/o/r/blob/bbbb2222/b.rs \
335 https://github.com/o/r/blob/cccc3333/c.rs \
336 https://github.com/o/r/blob/dddd4444/d.rs";
337 let results = GitHubPermalink::parse_all(text);
338 assert_eq!(results.len(), 3);
339 }
340
341 #[test]
342 fn parse_multiple_different_urls() {
343 let text = "Check https://github.com/a/b/blob/1111aaaa/x.rs#L1 and \
344 https://github.com/c/d/blob/2222bbbb/y.py#L5-L10";
345 let results = GitHubPermalink::parse_all(text);
346 assert_eq!(results.len(), 2);
347 assert_eq!(results[0].owner, "a");
348 assert_eq!(results[1].owner, "c");
349 assert_eq!(results[1].path, "y.py");
350 }
351
352 #[test]
353 fn parse_no_match() {
354 let text = "Hello, no links here!";
355 let results = GitHubPermalink::parse_all(text);
356 assert!(results.is_empty());
357 }
358
359 #[test]
360 fn parse_ignores_angle_bracket_link() {
361 let text = "<https://github.com/owner/repo/blob/abcd1234/src/lib.rs#L10-L20>";
362 let results = GitHubPermalink::parse_all(text);
363 assert!(results.is_empty());
364 }
365
366 #[test]
367 fn parse_nested_path() {
368 let text = "https://github.com/owner/repo/blob/abcd1234/src/deeply/nested/path/file.rs";
369 let results = GitHubPermalink::parse_all(text);
370 assert_eq!(results.len(), 1);
371 assert_eq!(results[0].path, "src/deeply/nested/path/file.rs");
372 }
373
374 #[test]
375 fn parse_short_commit_sha() {
376 let text = "https://github.com/owner/repo/blob/abcd/file.rs";
378 let results = GitHubPermalink::parse_all(text);
379 assert_eq!(results.len(), 1);
380 assert_eq!(results[0].commit, "abcd");
381 }
382
383 #[test]
386 fn language_for_path_basic_extension() {
387 assert_eq!(language_for_path("src/main.rs"), "rust");
388 }
389
390 #[test]
391 fn language_for_path_dockerfile_in_subdir() {
392 assert_eq!(language_for_path("docker/Dockerfile"), "dockerfile");
393 }
394
395 #[test]
396 fn language_for_path_dotted_directory() {
397 assert_eq!(language_for_path("some.config/Dockerfile"), "dockerfile");
398 }
399
400 #[test]
401 fn language_for_path_makefile_in_subdir() {
402 assert_eq!(language_for_path("build/Makefile"), "makefile");
403 }
404
405 #[test]
406 fn language_for_path_multiple_dots() {
407 assert_eq!(language_for_path("file.test.ts"), "typescript");
408 }
409
410 #[test]
411 fn language_for_path_dotfile() {
412 assert_eq!(language_for_path(".gitignore"), "gitignore");
413 }
414
415 fn make_permalink(path: &str, line_range: Option<LineRange>) -> GitHubPermalink {
418 GitHubPermalink {
419 owner: "owner".to_string(),
420 repo: "repo".to_string(),
421 commit: "abcdef1234567".to_string(),
422 path: path.to_string(),
423 line_range,
424 }
425 }
426
427 #[test]
428 fn build_code_block_full_file() {
429 let permalink = make_permalink("src/main.rs", None);
430 let body = "fn main() {\n println!(\"hello\");\n}";
431 let result = permalink.build_code_block(body, 50);
432
433 match result {
434 ExpandedContent::CodeBlock {
435 language,
436 code,
437 metadata,
438 } => {
439 assert_eq!(language, "rust");
440 assert_eq!(code, body);
441 assert_eq!(metadata, "`src/main.rs` - owner/repo@abcdef1");
442 }
443 _ => panic!("expected CodeBlock"),
444 }
445 }
446
447 #[test]
448 fn build_code_block_with_line_range() {
449 let permalink = make_permalink("src/lib.rs", Some(LineRange { start: 2, end: 3 }));
450 let body = "line1\nline2\nline3\nline4";
451 let result = permalink.build_code_block(body, 50);
452
453 match result {
454 ExpandedContent::CodeBlock {
455 language,
456 code,
457 metadata,
458 } => {
459 assert_eq!(language, "rust");
460 assert_eq!(code, "line2\nline3");
461 assert!(metadata.contains("L2-L3"));
462 }
463 _ => panic!("expected CodeBlock"),
464 }
465 }
466
467 #[test]
468 fn build_code_block_truncated() {
469 let permalink = make_permalink("app.py", None);
470 let body = "a\nb\nc\nd\ne";
471 let result = permalink.build_code_block(body, 2);
472
473 match result {
474 ExpandedContent::CodeBlock { code, metadata, .. } => {
475 assert_eq!(code, "a\nb");
476 assert!(metadata.contains("truncated to 2 lines"));
477 }
478 _ => panic!("expected CodeBlock"),
479 }
480 }
481
482 #[test]
483 fn build_code_block_line_range_truncated() {
484 let permalink = make_permalink("app.py", Some(LineRange { start: 1, end: 5 }));
485 let body = "a\nb\nc\nd\ne";
486 let result = permalink.build_code_block(body, 3);
487
488 match result {
489 ExpandedContent::CodeBlock { code, metadata, .. } => {
490 assert_eq!(code, "a\nb\nc");
491 assert!(metadata.contains("L1-L5"));
492 assert!(metadata.contains("truncated to 3 lines"));
493 }
494 _ => panic!("expected CodeBlock"),
495 }
496 }
497
498 #[test]
499 fn build_code_block_dockerfile_language() {
500 let permalink = make_permalink("docker/Dockerfile", None);
501 let body = "FROM rust:latest";
502 let result = permalink.build_code_block(body, 50);
503
504 match result {
505 ExpandedContent::CodeBlock { language, .. } => {
506 assert_eq!(language, "dockerfile");
507 }
508 _ => panic!("expected CodeBlock"),
509 }
510 }
511
512 #[test]
513 fn build_code_block_short_commit() {
514 let permalink = GitHubPermalink {
515 owner: "o".to_string(),
516 repo: "r".to_string(),
517 commit: "abcd".to_string(),
518 path: "f.rs".to_string(),
519 line_range: None,
520 };
521 let result = permalink.build_code_block("x", 50);
522
523 match result {
524 ExpandedContent::CodeBlock { metadata, .. } => {
525 assert!(metadata.contains("o/r@abcd"));
526 }
527 _ => panic!("expected CodeBlock"),
528 }
529 }
530}