1use regex::Regex;
7use std::collections::HashSet;
8use std::sync::LazyLock;
9
10use super::{ExpandError, ExpandedContent};
11use crate::config::BabyriteConfig;
12use crate::utils::language_from_extension;
13
14static GITHUB_PERMALINK_REGEX: LazyLock<Regex> = LazyLock::new(|| {
25 Regex::new(
26 r"https://github\.com/([^/]+)/([^/]+)/blob/([^/]+)/([^#\s]+)(?:#L(\d+)(?:-L(\d+))?)?",
27 )
28 .unwrap()
29});
30
31#[derive(Debug)]
33pub struct GitHubPermalink {
34 pub owner: String,
36 pub repo: String,
38 pub git_ref: String,
40 pub path: String,
42 pub line_range: Option<LineRange>,
44}
45
46#[derive(Debug, Clone, Copy)]
48pub struct LineRange {
49 pub start: usize,
51 pub end: usize,
53}
54
55#[derive(thiserror::Error, Debug)]
57pub enum GitHubExpandError {
58 #[error("Failed to fetch raw content: {0}")]
60 Fetch(String),
61 #[error("Content exceeds size limit")]
63 ContentTooLarge,
64 #[error(transparent)]
66 Http(#[from] reqwest::Error),
67}
68
69impl GitHubPermalink {
70 pub fn parse_all(text: &str) -> Vec<GitHubPermalink> {
76 let mut seen_urls = HashSet::new();
77 GITHUB_PERMALINK_REGEX
78 .captures_iter(text)
79 .filter_map(|captures| {
80 let m = captures.get(0)?;
81 let full_url = m.as_str();
82 if m.start() > 0 && text.as_bytes()[m.start() - 1] == b'<' {
84 return None;
85 }
86 if !seen_urls.insert(full_url.to_string()) {
87 return None;
88 }
89
90 let owner = captures.get(1)?.as_str().to_string();
91 let repo = captures.get(2)?.as_str().to_string();
92 let git_ref = captures.get(3)?.as_str().to_string();
93 let path = captures.get(4)?.as_str().to_string();
94
95 let line_range = match (captures.get(5), captures.get(6)) {
96 (Some(start), Some(end)) => {
97 let s = start.as_str().parse().ok()?;
98 let e = end.as_str().parse().ok()?;
99 Some(LineRange { start: s, end: e })
100 }
101 (Some(start), None) => {
102 let s = start.as_str().parse().ok()?;
103 Some(LineRange { start: s, end: s })
104 }
105 _ => None,
106 };
107
108 Some(GitHubPermalink {
109 owner,
110 repo,
111 git_ref,
112 path,
113 line_range,
114 })
115 })
116 .take(3) .collect()
118 }
119
120 #[cfg_attr(coverage_nightly, coverage(off))]
122 pub async fn fetch(
123 &self,
124 http_client: &reqwest::Client,
125 ) -> Result<ExpandedContent, ExpandError> {
126 let config = BabyriteConfig::get();
127 let max_lines = config.github.max_lines;
128
129 let raw_url = format!(
130 "https://raw.githubusercontent.com/{}/{}/{}/{}",
131 self.owner, self.repo, self.git_ref, self.path
132 );
133
134 let response = http_client
135 .get(&raw_url)
136 .send()
137 .await
138 .map_err(GitHubExpandError::Http)?;
139
140 if !response.status().is_success() {
141 return Err(GitHubExpandError::Fetch(format!(
142 "HTTP {} for {}",
143 response.status(),
144 raw_url
145 ))
146 .into());
147 }
148
149 let content_length = response.content_length().unwrap_or(0);
150 if content_length > 1_048_576 {
152 return Err(GitHubExpandError::ContentTooLarge.into());
153 }
154
155 let body = response.text().await.map_err(GitHubExpandError::Http)?;
156
157 Ok(self.build_code_block(&body, max_lines))
158 }
159
160 fn build_code_block(&self, body: &str, max_lines: usize) -> ExpandedContent {
162 let all_lines: Vec<&str> = body.lines().collect();
163 let (code, line_info) = match self.line_range {
164 Some(range) => {
165 let start = range.start.saturating_sub(1); let end = range.end.min(all_lines.len());
167 let selected: Vec<&str> = all_lines.get(start..end).unwrap_or_default().to_vec();
168
169 let (code, truncated) = truncate_lines(&selected, max_lines);
170 let info = if truncated {
171 format!(
172 "L{}-L{}, truncated to {} lines",
173 range.start, range.end, max_lines
174 )
175 } else {
176 format!("L{}-L{}", range.start, range.end)
177 };
178 (code, info)
179 }
180 None => {
181 let (code, truncated) = truncate_lines(&all_lines, max_lines);
182 let info = if truncated {
183 format!("truncated to {} lines", max_lines)
184 } else {
185 String::new()
186 };
187 (code, info)
188 }
189 };
190
191 let display_ref = shorten_ref(&self.git_ref);
192 let language = language_for_path(&self.path);
193
194 let metadata = if line_info.is_empty() {
195 format!(
196 "`{}` - {}/{}@{}",
197 self.path, self.owner, self.repo, display_ref
198 )
199 } else {
200 format!(
201 "`{}` ({}) - {}/{}@{}",
202 self.path, line_info, self.owner, self.repo, display_ref
203 )
204 };
205
206 ExpandedContent::CodeBlock {
207 language: language.to_string(),
208 code,
209 metadata,
210 }
211 }
212}
213
214fn is_commit_sha(s: &str) -> bool {
216 (4..=40).contains(&s.len()) && s.bytes().all(|b| b.is_ascii_hexdigit())
217}
218
219fn shorten_ref(git_ref: &str) -> &str {
222 if is_commit_sha(git_ref) {
223 &git_ref[..7.min(git_ref.len())]
224 } else {
225 git_ref
226 }
227}
228
229fn language_for_path(path: &str) -> &str {
231 let filename = path.rsplit('/').next().unwrap_or(path);
232 match filename.rsplit_once('.') {
233 Some((_, ext)) => language_from_extension(ext),
234 None => language_from_extension(filename),
235 }
236}
237
238fn truncate_lines(lines: &[&str], max: usize) -> (String, bool) {
240 if lines.len() > max {
241 let truncated: Vec<&str> = lines[..max].to_vec();
242 (truncated.join("\n"), true)
243 } else {
244 (lines.join("\n"), false)
245 }
246}
247
248#[cfg(test)]
249mod tests {
250 use super::*;
251
252 #[test]
255 fn truncate_lines_under_limit() {
256 let lines = vec!["a", "b", "c"];
257 let (result, truncated) = truncate_lines(&lines, 5);
258 assert_eq!(result, "a\nb\nc");
259 assert!(!truncated);
260 }
261
262 #[test]
263 fn truncate_lines_at_limit() {
264 let lines = vec!["a", "b", "c"];
265 let (result, truncated) = truncate_lines(&lines, 3);
266 assert_eq!(result, "a\nb\nc");
267 assert!(!truncated);
268 }
269
270 #[test]
271 fn truncate_lines_over_limit() {
272 let lines = vec!["a", "b", "c", "d", "e"];
273 let (result, truncated) = truncate_lines(&lines, 2);
274 assert_eq!(result, "a\nb");
275 assert!(truncated);
276 }
277
278 #[test]
279 fn truncate_lines_empty() {
280 let lines: Vec<&str> = vec![];
281 let (result, truncated) = truncate_lines(&lines, 5);
282 assert_eq!(result, "");
283 assert!(!truncated);
284 }
285
286 #[test]
289 fn parse_basic_permalink() {
290 let text = "https://github.com/owner/repo/blob/abcdef1234567890abcdef1234567890abcdef12/src/main.rs";
291 let results = GitHubPermalink::parse_all(text);
292 assert_eq!(results.len(), 1);
293 assert_eq!(results[0].owner, "owner");
294 assert_eq!(results[0].repo, "repo");
295 assert_eq!(
296 results[0].git_ref,
297 "abcdef1234567890abcdef1234567890abcdef12"
298 );
299 assert_eq!(results[0].path, "src/main.rs");
300 assert!(results[0].line_range.is_none());
301 }
302
303 #[test]
304 fn parse_permalink_with_single_line() {
305 let text = "https://github.com/owner/repo/blob/abcd1234/src/lib.rs#L42";
306 let results = GitHubPermalink::parse_all(text);
307 assert_eq!(results.len(), 1);
308 let range = results[0].line_range.unwrap();
309 assert_eq!(range.start, 42);
310 assert_eq!(range.end, 42);
311 }
312
313 #[test]
314 fn parse_permalink_with_line_range() {
315 let text = "https://github.com/owner/repo/blob/abcd1234/src/lib.rs#L10-L20";
316 let results = GitHubPermalink::parse_all(text);
317 assert_eq!(results.len(), 1);
318 let range = results[0].line_range.unwrap();
319 assert_eq!(range.start, 10);
320 assert_eq!(range.end, 20);
321 }
322
323 #[test]
324 fn parse_branch_name() {
325 let text = "https://github.com/owner/repo/blob/main/src/lib.rs";
326 let results = GitHubPermalink::parse_all(text);
327 assert_eq!(results.len(), 1);
328 assert_eq!(results[0].git_ref, "main");
329 assert_eq!(results[0].path, "src/lib.rs");
330 }
331
332 #[test]
333 fn parse_branch_name_with_line_range() {
334 let text = "https://github.com/owner/repo/blob/develop/src/main.rs#L5-L10";
335 let results = GitHubPermalink::parse_all(text);
336 assert_eq!(results.len(), 1);
337 assert_eq!(results[0].git_ref, "develop");
338 let range = results[0].line_range.unwrap();
339 assert_eq!(range.start, 5);
340 assert_eq!(range.end, 10);
341 }
342
343 #[test]
344 fn parse_branch_name_with_single_line() {
345 let text = "https://github.com/owner/repo/blob/main/src/lib.rs#L5";
346 let results = GitHubPermalink::parse_all(text);
347 assert_eq!(results.len(), 1);
348 assert_eq!(results[0].git_ref, "main");
349 let range = results[0].line_range.unwrap();
350 assert_eq!(range.start, 5);
351 assert_eq!(range.end, 5);
352 }
353
354 #[test]
355 fn parse_branch_with_special_characters() {
356 let cases = [
357 (
358 "https://github.com/o/r/blob/release-v1.0/f.rs",
359 "release-v1.0",
360 ),
361 (
362 "https://github.com/o/r/blob/feat_something/f.rs",
363 "feat_something",
364 ),
365 ("https://github.com/o/r/blob/v2.0.0/f.rs", "v2.0.0"),
366 ];
367 for (text, expected_ref) in cases {
368 let results = GitHubPermalink::parse_all(text);
369 assert_eq!(results.len(), 1, "failed for: {text}");
370 assert_eq!(results[0].git_ref, expected_ref);
371 }
372 }
373
374 #[test]
375 fn parse_tag_name() {
376 let text = "https://github.com/owner/repo/blob/v1.0.0/src/main.rs#L1-L10";
377 let results = GitHubPermalink::parse_all(text);
378 assert_eq!(results.len(), 1);
379 assert_eq!(results[0].git_ref, "v1.0.0");
380 let range = results[0].line_range.unwrap();
381 assert_eq!(range.start, 1);
382 assert_eq!(range.end, 10);
383 }
384
385 #[test]
386 fn parse_mixed_sha_and_branch() {
387 let text = "https://github.com/o/r/blob/abcd1234/a.rs \
388 https://github.com/o/r/blob/main/b.rs";
389 let results = GitHubPermalink::parse_all(text);
390 assert_eq!(results.len(), 2);
391 assert_eq!(results[0].git_ref, "abcd1234");
392 assert_eq!(results[1].git_ref, "main");
393 }
394
395 #[test]
396 fn parse_accepts_short_ref() {
397 let text = "https://github.com/owner/repo/blob/abc/src/lib.rs";
399 let results = GitHubPermalink::parse_all(text);
400 assert_eq!(results.len(), 1);
401 assert_eq!(results[0].git_ref, "abc");
402 }
403
404 #[test]
405 fn parse_deduplicates_urls() {
406 let text = "https://github.com/owner/repo/blob/abcd1234/src/lib.rs \
407 https://github.com/owner/repo/blob/abcd1234/src/lib.rs";
408 let results = GitHubPermalink::parse_all(text);
409 assert_eq!(results.len(), 1);
410 }
411
412 #[test]
413 fn parse_limits_to_three() {
414 let text = "\
415 https://github.com/o/r/blob/aaaa1111/a.rs \
416 https://github.com/o/r/blob/bbbb2222/b.rs \
417 https://github.com/o/r/blob/cccc3333/c.rs \
418 https://github.com/o/r/blob/dddd4444/d.rs";
419 let results = GitHubPermalink::parse_all(text);
420 assert_eq!(results.len(), 3);
421 }
422
423 #[test]
424 fn parse_multiple_different_urls() {
425 let text = "Check https://github.com/a/b/blob/1111aaaa/x.rs#L1 and \
426 https://github.com/c/d/blob/2222bbbb/y.py#L5-L10";
427 let results = GitHubPermalink::parse_all(text);
428 assert_eq!(results.len(), 2);
429 assert_eq!(results[0].owner, "a");
430 assert_eq!(results[1].owner, "c");
431 assert_eq!(results[1].path, "y.py");
432 }
433
434 #[test]
435 fn parse_no_match() {
436 let text = "Hello, no links here!";
437 let results = GitHubPermalink::parse_all(text);
438 assert!(results.is_empty());
439 }
440
441 #[test]
442 fn parse_ignores_angle_bracket_link() {
443 let text = "<https://github.com/owner/repo/blob/abcd1234/src/lib.rs#L10-L20>";
444 let results = GitHubPermalink::parse_all(text);
445 assert!(results.is_empty());
446 }
447
448 #[test]
449 fn parse_nested_path() {
450 let text = "https://github.com/owner/repo/blob/abcd1234/src/deeply/nested/path/file.rs";
451 let results = GitHubPermalink::parse_all(text);
452 assert_eq!(results.len(), 1);
453 assert_eq!(results[0].path, "src/deeply/nested/path/file.rs");
454 }
455
456 #[test]
457 fn parse_short_commit_sha() {
458 let text = "https://github.com/owner/repo/blob/abcd/file.rs";
460 let results = GitHubPermalink::parse_all(text);
461 assert_eq!(results.len(), 1);
462 assert_eq!(results[0].git_ref, "abcd");
463 }
464
465 #[test]
468 fn language_for_path_basic_extension() {
469 assert_eq!(language_for_path("src/main.rs"), "rust");
470 }
471
472 #[test]
473 fn language_for_path_dockerfile_in_subdir() {
474 assert_eq!(language_for_path("docker/Dockerfile"), "dockerfile");
475 }
476
477 #[test]
478 fn language_for_path_dotted_directory() {
479 assert_eq!(language_for_path("some.config/Dockerfile"), "dockerfile");
480 }
481
482 #[test]
483 fn language_for_path_makefile_in_subdir() {
484 assert_eq!(language_for_path("build/Makefile"), "makefile");
485 }
486
487 #[test]
488 fn language_for_path_multiple_dots() {
489 assert_eq!(language_for_path("file.test.ts"), "typescript");
490 }
491
492 #[test]
493 fn language_for_path_dotfile() {
494 assert_eq!(language_for_path(".gitignore"), "gitignore");
495 }
496
497 fn make_permalink(path: &str, line_range: Option<LineRange>) -> GitHubPermalink {
500 GitHubPermalink {
501 owner: "owner".to_string(),
502 repo: "repo".to_string(),
503 git_ref: "abcdef1234567".to_string(),
504 path: path.to_string(),
505 line_range,
506 }
507 }
508
509 #[test]
510 fn build_code_block_full_file() {
511 let permalink = make_permalink("src/main.rs", None);
512 let body = "fn main() {\n println!(\"hello\");\n}";
513 let result = permalink.build_code_block(body, 50);
514
515 match result {
516 ExpandedContent::CodeBlock {
517 language,
518 code,
519 metadata,
520 } => {
521 assert_eq!(language, "rust");
522 assert_eq!(code, body);
523 assert_eq!(metadata, "`src/main.rs` - owner/repo@abcdef1");
524 }
525 _ => panic!("expected CodeBlock"),
526 }
527 }
528
529 #[test]
530 fn build_code_block_with_line_range() {
531 let permalink = make_permalink("src/lib.rs", Some(LineRange { start: 2, end: 3 }));
532 let body = "line1\nline2\nline3\nline4";
533 let result = permalink.build_code_block(body, 50);
534
535 match result {
536 ExpandedContent::CodeBlock {
537 language,
538 code,
539 metadata,
540 } => {
541 assert_eq!(language, "rust");
542 assert_eq!(code, "line2\nline3");
543 assert!(metadata.contains("L2-L3"));
544 }
545 _ => panic!("expected CodeBlock"),
546 }
547 }
548
549 #[test]
550 fn build_code_block_truncated() {
551 let permalink = make_permalink("app.py", None);
552 let body = "a\nb\nc\nd\ne";
553 let result = permalink.build_code_block(body, 2);
554
555 match result {
556 ExpandedContent::CodeBlock { code, metadata, .. } => {
557 assert_eq!(code, "a\nb");
558 assert!(metadata.contains("truncated to 2 lines"));
559 }
560 _ => panic!("expected CodeBlock"),
561 }
562 }
563
564 #[test]
565 fn build_code_block_line_range_truncated() {
566 let permalink = make_permalink("app.py", Some(LineRange { start: 1, end: 5 }));
567 let body = "a\nb\nc\nd\ne";
568 let result = permalink.build_code_block(body, 3);
569
570 match result {
571 ExpandedContent::CodeBlock { code, metadata, .. } => {
572 assert_eq!(code, "a\nb\nc");
573 assert!(metadata.contains("L1-L5"));
574 assert!(metadata.contains("truncated to 3 lines"));
575 }
576 _ => panic!("expected CodeBlock"),
577 }
578 }
579
580 #[test]
581 fn build_code_block_dockerfile_language() {
582 let permalink = make_permalink("docker/Dockerfile", None);
583 let body = "FROM rust:latest";
584 let result = permalink.build_code_block(body, 50);
585
586 match result {
587 ExpandedContent::CodeBlock { language, .. } => {
588 assert_eq!(language, "dockerfile");
589 }
590 _ => panic!("expected CodeBlock"),
591 }
592 }
593
594 #[test]
595 fn build_code_block_short_commit() {
596 let permalink = GitHubPermalink {
597 owner: "o".to_string(),
598 repo: "r".to_string(),
599 git_ref: "abcd".to_string(),
600 path: "f.rs".to_string(),
601 line_range: None,
602 };
603 let result = permalink.build_code_block("x", 50);
604
605 match result {
606 ExpandedContent::CodeBlock { metadata, .. } => {
607 assert!(metadata.contains("o/r@abcd"));
608 }
609 _ => panic!("expected CodeBlock"),
610 }
611 }
612
613 #[test]
614 fn build_code_block_branch_ref() {
615 let permalink = GitHubPermalink {
616 owner: "o".to_string(),
617 repo: "r".to_string(),
618 git_ref: "main".to_string(),
619 path: "f.rs".to_string(),
620 line_range: None,
621 };
622 let result = permalink.build_code_block("x", 50);
623
624 match result {
625 ExpandedContent::CodeBlock { metadata, .. } => {
626 assert!(metadata.contains("o/r@main"));
628 }
629 _ => panic!("expected CodeBlock"),
630 }
631 }
632
633 #[test]
634 fn build_code_block_branch_ref_with_line_range() {
635 let permalink = GitHubPermalink {
636 owner: "o".to_string(),
637 repo: "r".to_string(),
638 git_ref: "develop".to_string(),
639 path: "src/lib.rs".to_string(),
640 line_range: Some(LineRange { start: 3, end: 5 }),
641 };
642 let body = "a\nb\nc\nd\ne\nf";
643 let result = permalink.build_code_block(body, 50);
644
645 match result {
646 ExpandedContent::CodeBlock { code, metadata, .. } => {
647 assert_eq!(code, "c\nd\ne");
648 assert!(metadata.contains("L3-L5"));
649 assert!(metadata.contains("o/r@develop"));
650 }
651 _ => panic!("expected CodeBlock"),
652 }
653 }
654
655 #[test]
658 fn is_commit_sha_valid() {
659 assert!(is_commit_sha("abcd1234"));
660 assert!(is_commit_sha("abcdef1234567890abcdef1234567890abcdef12"));
661 }
662
663 #[test]
664 fn is_commit_sha_boundary() {
665 assert!(is_commit_sha("abcd"));
667 assert!(is_commit_sha("abcdef1234567890abcdef1234567890abcdef12"));
669 }
670
671 #[test]
672 fn is_commit_sha_invalid() {
673 assert!(!is_commit_sha("main"));
674 assert!(!is_commit_sha("develop"));
675 assert!(!is_commit_sha("abc")); assert!(!is_commit_sha("abcdef1234567890abcdef1234567890abcdef123")); assert!(!is_commit_sha("ghijkl")); assert!(!is_commit_sha("")); assert!(is_commit_sha("ABCD1234")); }
681
682 #[test]
683 fn shorten_ref_commit() {
684 assert_eq!(shorten_ref("abcdef1234567890"), "abcdef1");
685 }
686
687 #[test]
688 fn shorten_ref_short_sha() {
689 assert_eq!(shorten_ref("abcd"), "abcd");
691 }
692
693 #[test]
694 fn shorten_ref_branch() {
695 assert_eq!(shorten_ref("main"), "main");
696 assert_eq!(shorten_ref("feature-branch"), "feature-branch");
697 assert_eq!(shorten_ref("release-v1.0"), "release-v1.0");
698 assert_eq!(shorten_ref("v2.0.0"), "v2.0.0");
699 }
700}