1use std::path::PathBuf;
67
68use regex::Regex;
69
70use crate::{AirError, Result, dev_log};
71use crate::Indexing::State::CreateState::{FileIndex, FileMetadata};
73
74pub const MAX_SEARCH_RESULTS_DEFAULT:u32 = 100;
76
77#[derive(Debug, Clone)]
79pub struct SearchQuery {
80 pub query:String,
82
83 pub mode:SearchMode,
85
86 pub case_sensitive:bool,
88
89 pub whole_word:bool,
91
92 pub regex:Option<Regex>,
94
95 pub max_results:u32,
97
98 pub page:u32,
100}
101
102#[derive(Debug, Clone, PartialEq)]
104pub enum SearchMode {
105 Literal,
107
108 Regex,
110
111 Fuzzy,
113
114 Exact,
116}
117
118#[derive(Debug, Clone)]
120pub struct SearchResult {
121 pub path:String,
123
124 pub file_name:String,
126
127 pub matches:Vec<SearchMatch>,
129
130 pub relevance:f64,
132
133 pub language:Option<String>,
135}
136
137#[derive(Debug, Clone)]
139pub struct SearchMatch {
140 pub line_number:u32,
142
143 pub line_content:String,
145
146 pub match_start:usize,
148
149 pub match_end:usize,
151
152 pub context_before:Vec<String>,
154
155 pub context_after:Vec<String>,
157}
158
159#[derive(Debug, Clone)]
161pub struct PaginatedSearchResults {
162 pub results:Vec<SearchResult>,
164
165 pub total_count:u32,
167
168 pub page:u32,
170
171 pub total_pages:u32,
173
174 pub page_size:u32,
176}
177
178impl IntoIterator for PaginatedSearchResults {
179 type Item = SearchResult;
180
181 type IntoIter = std::vec::IntoIter<SearchResult>;
182
183 fn into_iter(self) -> Self::IntoIter { self.results.into_iter() }
184}
185
186impl<'a> IntoIterator for &'a PaginatedSearchResults {
187 type Item = &'a SearchResult;
188
189 type IntoIter = std::slice::Iter<'a, SearchResult>;
190
191 fn into_iter(self) -> Self::IntoIter { self.results.iter() }
192}
193
194pub async fn QueryIndexSearch(
206 index:&FileIndex,
207
208 query:SearchQuery,
209
210 path_filter:Option<String>,
211
212 language_filter:Option<String>,
213) -> Result<PaginatedSearchResults> {
214 dev_log!(
215 "indexing",
216 "[QueryIndex] Searching for: '{}' (mode: {:?})",
217 query.query,
218 query.mode
219 );
220
221 let sanitized_query = SanitizeSearchQuery(&query.query)?;
223
224 let case_sensitive = query.case_sensitive;
226
227 let whole_word = query.whole_word;
228
229 let max_results = if query.max_results == 0 {
230 MAX_SEARCH_RESULTS_DEFAULT
231 } else {
232 query.max_results.min(1000) };
234
235 let mut all_results = Vec::new();
236
237 match query.mode {
239 SearchMode::Literal => {
240 QueryIndexLiteral(
241 &sanitized_query,
242 case_sensitive,
243 whole_word,
244 path_filter.as_deref(),
245 language_filter.as_deref(),
246 index,
247 &mut all_results,
248 )
249 .await;
250 },
251
252 SearchMode::Regex => {
253 if let Some(regex) = &query.regex {
254 QueryIndexRegex(
255 regex,
256 path_filter.as_deref(),
257 language_filter.as_deref(),
258 index,
259 &mut all_results,
260 )
261 .await;
262 } else {
263 if let Ok(regex) = Regex::new(&sanitized_query) {
265 QueryIndexRegex(
266 ®ex,
267 path_filter.as_deref(),
268 language_filter.as_deref(),
269 index,
270 &mut all_results,
271 )
272 .await;
273 }
274 }
275 },
276
277 SearchMode::Fuzzy => {
278 QueryIndexFuzzy(
279 &sanitized_query,
280 case_sensitive,
281 path_filter.as_deref(),
282 language_filter.as_deref(),
283 index,
284 &mut all_results,
285 )
286 .await;
287 },
288
289 SearchMode::Exact => {
290 QueryIndexExact(
291 &sanitized_query,
292 case_sensitive,
293 path_filter.as_deref(),
294 language_filter.as_deref(),
295 index,
296 &mut all_results,
297 )
298 .await;
299 },
300 }
301
302 all_results.sort_by(|a, b| b.relevance.partial_cmp(&a.relevance).unwrap());
304
305 let total_count = all_results.len() as u32;
307
308 let total_pages = if max_results == 0 { 0 } else { total_count.div_ceil(max_results) };
309
310 let page = query.page.min(total_pages.saturating_sub(1));
311
312 let start = (page * max_results) as usize;
314
315 let end = ((page + 1) * max_results).min(total_count) as usize;
316
317 let page_results = all_results[start..end].to_vec();
318
319 dev_log!(
320 "indexing",
321 "[QueryIndex] Search completed: {} total results, page {} of {}",
322 total_count,
323 page + 1,
324 total_pages
325 );
326
327 Ok(PaginatedSearchResults { results:page_results, total_count, page, total_pages, page_size:max_results })
328}
329
330pub fn SanitizeSearchQuery(query:&str) -> Result<String> {
332 let sanitized:String = query.chars().filter(|c| *c != '\0' && !c.is_control()).collect();
334
335 if sanitized.len() > 1000 {
337 return Err(AirError::Validation(
338 "Search query exceeds maximum length of 1000 characters".to_string(),
339 ));
340 }
341
342 Ok(sanitized)
343}
344
345async fn QueryIndexLiteral(
347 query:&str,
348
349 case_sensitive:bool,
350
351 whole_word:bool,
352
353 path_filter:Option<&str>,
354
355 language_filter:Option<&str>,
356
357 index:&FileIndex,
358
359 results:&mut Vec<SearchResult>,
360) {
361 let search_query = if case_sensitive { query.to_string() } else { query.to_lowercase() };
362
363 if let Some(file_paths) = index.content_index.get(&search_query.to_lowercase()) {
365 for file_path in file_paths {
366 if let Some(metadata) = index.files.get(file_path) {
367 if MatchesFilters(file_path, metadata, path_filter, language_filter) {
368 if let Ok(search_result) =
369 FindMatchesInFile(file_path, &search_query, case_sensitive, whole_word, index).await
370 {
371 if !search_result.matches.is_empty() {
372 results.push(search_result);
373 }
374 }
375 }
376 }
377 }
378 }
379
380 for (file_path, metadata) in &index.files {
382 if results.len() >= 1000 {
383 break;
384 }
385
386 let file_name = file_path.file_name().unwrap_or_default().to_string_lossy().to_string();
387
388 let name_to_search = if case_sensitive { file_name.clone() } else { file_name.to_lowercase() };
389
390 if name_to_search.contains(&search_query) {
391 if MatchesFilters(file_path, metadata, path_filter, language_filter) {
392 results.push(SearchResult {
394 path:file_path.to_string_lossy().to_string(),
395 file_name,
396 matches:Vec::new(),
397 relevance:0.3,
398 language:metadata.language.clone(),
399 });
400 }
401 }
402 }
403}
404
405async fn QueryIndexRegex(
407 regex:&Regex,
408
409 path_filter:Option<&str>,
410
411 language_filter:Option<&str>,
412
413 index:&FileIndex,
414
415 results:&mut Vec<SearchResult>,
416) {
417 for (file_path, metadata) in &index.files {
418 if results.len() >= 1000 {
419 break;
420 }
421
422 if !MatchesFilters(file_path, metadata, path_filter, language_filter) {
423 continue;
424 }
425
426 if let Ok(content) = tokio::fs::read_to_string(file_path).await {
427 let matches = FindRegexMatches(&content, regex);
428
429 if !matches.is_empty() {
430 let relevance = CalculateRelevance(&matches, metadata);
431
432 results.push(SearchResult {
433 path:file_path.to_string_lossy().to_string(),
434 file_name:file_path.file_name().unwrap_or_default().to_string_lossy().to_string(),
435 matches,
436 relevance,
437 language:metadata.language.clone(),
438 });
439 }
440 }
441 }
442}
443
444async fn QueryIndexFuzzy(
446 query:&str,
447
448 case_sensitive:bool,
449
450 path_filter:Option<&str>,
451
452 language_filter:Option<&str>,
453
454 index:&FileIndex,
455
456 results:&mut Vec<SearchResult>,
457) {
458 let query_lower = query.to_lowercase();
459
460 for (file_path, metadata) in &index.files {
461 if results.len() >= 1000 {
462 break;
463 }
464
465 if !MatchesFilters(file_path, metadata, path_filter, language_filter) {
466 continue;
467 }
468
469 if let Ok(content) = tokio::fs::read_to_string(file_path).await {
470 const MAX_FUZZY_DISTANCE:usize = 2;
471
472 let matches = FindFuzzyMatches(&content, &query_lower, case_sensitive, MAX_FUZZY_DISTANCE);
473
474 if !matches.is_empty() {
475 let relevance = CalculateRelevance(&matches, metadata) * 0.8; results.push(SearchResult {
478 path:file_path.to_string_lossy().to_string(),
479 file_name:file_path.file_name().unwrap_or_default().to_string_lossy().to_string(),
480 matches,
481 relevance,
482 language:metadata.language.clone(),
483 });
484 }
485 }
486 }
487}
488
489async fn QueryIndexExact(
491 query:&str,
492
493 _case_sensitive:bool,
494
495 path_filter:Option<&str>,
496
497 language_filter:Option<&str>,
498
499 index:&FileIndex,
500
501 results:&mut Vec<SearchResult>,
502) {
503 for (file_path, metadata) in &index.files {
504 if results.len() >= 1000 {
505 break;
506 }
507
508 if !MatchesFilters(file_path, metadata, path_filter, language_filter) {
509 continue;
510 }
511
512 if let Ok(content) = tokio::fs::read_to_string(file_path).await {
513 let matches = FindExactMatches(&content, query);
514
515 if !matches.is_empty() {
516 let relevance = CalculateRelevance(&matches, metadata) * 1.1; results.push(SearchResult {
519 path:file_path.to_string_lossy().to_string(),
520 file_name:file_path.file_name().unwrap_or_default().to_string_lossy().to_string(),
521 matches,
522 relevance,
523 language:metadata.language.clone(),
524 });
525 }
526 }
527 }
528}
529
530async fn FindMatchesInFile(
532 file_path:&PathBuf,
533
534 query:&str,
535
536 case_sensitive:bool,
537
538 whole_word:bool,
539
540 index:&FileIndex,
541) -> Result<SearchResult> {
542 let content = tokio::fs::read_to_string(file_path)
543 .await
544 .map_err(|e| AirError::FileSystem(format!("Failed to read file: {}", e)))?;
545
546 let metadata = index
547 .files
548 .get(file_path)
549 .ok_or_else(|| AirError::Internal("File metadata not found in index".to_string()))?;
550
551 let matches = FindMatchesWithContext(&content, query, case_sensitive, whole_word);
552
553 let relevance = CalculateRelevance(&matches, metadata);
554
555 Ok(SearchResult {
556 path:file_path.to_string_lossy().to_string(),
557 file_name:file_path.file_name().unwrap_or_default().to_string_lossy().to_string(),
558 matches,
559 relevance,
560 language:metadata.language.clone(),
561 })
562}
563
564fn FindMatchesWithContext(content:&str, query:&str, case_sensitive:bool, whole_word:bool) -> Vec<SearchMatch> {
566 let mut matches = Vec::new();
567
568 let lines:Vec<&str> = content.lines().collect();
569
570 let search_in = |line:&str| -> Option<(usize, usize)> {
571 let line_to_search = if case_sensitive { line.to_string() } else { line.to_lowercase() };
572
573 let query_to_find = if case_sensitive { query.to_string() } else { query.to_lowercase() };
574
575 let start = if whole_word {
576 FindWholeWordMatch(&line_to_search, &query_to_find)
577 } else {
578 line_to_search.find(&query_to_find)
579 };
580
581 start.map(|s| (s, s + query.len()))
582 };
583
584 for (line_idx, line) in lines.iter().enumerate() {
585 let line_number = line_idx as u32 + 1;
586
587 if let Some((match_start, match_end)) = search_in(line) {
588 let context_start = line_idx.saturating_sub(2);
590
591 let context_end = (line_idx + 3).min(lines.len());
592
593 let context_before = lines[context_start..line_idx].iter().map(|s| s.to_string()).collect();
594
595 let context_after = lines[line_idx + 1..context_end].iter().map(|s| s.to_string()).collect();
596
597 matches.push(SearchMatch {
598 line_number,
599 line_content:line.to_string(),
600 match_start,
601 match_end,
602 context_before,
603 context_after,
604 });
605 }
606 }
607
608 matches
609}
610
611fn FindWholeWordMatch(line:&str, word:&str) -> Option<usize> {
613 let mut start = 0;
614
615 while let Some(pos) = line[start..].find(word) {
616 let actual_pos = start + pos;
617
618 let valid_before = actual_pos == 0
620 || line
621 .chars()
622 .nth(actual_pos - 1)
623 .map_or(true, |c| !c.is_alphanumeric() && c != '_');
624
625 let match_end = actual_pos + word.len();
627
628 let valid_after =
629 match_end == line.len() || line.chars().nth(match_end).map_or(true, |c| !c.is_alphanumeric() && c != '_');
630
631 if valid_before && valid_after {
632 return Some(actual_pos);
633 }
634
635 start = actual_pos + 1;
636 }
637
638 None
639}
640
641fn FindRegexMatches(content:&str, regex:&Regex) -> Vec<SearchMatch> {
643 let mut matches = Vec::new();
644
645 let lines:Vec<&str> = content.lines().collect();
646
647 for (line_idx, line) in lines.iter().enumerate() {
648 let line_number = line_idx as u32 + 1;
649
650 for mat in regex.find_iter(line) {
651 matches.push(SearchMatch {
652 line_number,
653 line_content:line.to_string(),
654 match_start:mat.start(),
655 match_end:mat.end(),
656 context_before:Vec::new(),
657 context_after:Vec::new(),
658 });
659 }
660 }
661
662 matches
663}
664
665fn FindFuzzyMatches(content:&str, query:&str, case_sensitive:bool, max_distance:usize) -> Vec<SearchMatch> {
667 let mut matches = Vec::new();
668
669 let lines:Vec<&str> = content.lines().collect();
670
671 for (line_idx, line) in lines.iter().enumerate() {
672 let line_number = line_idx as u32 + 1;
673
674 let line_to_search = if case_sensitive { line.to_string() } else { line.to_lowercase() };
675
676 if let Some(pos) = line_to_search.find(query) {
678 let distance = CalculateLevenshteinDistance(&line_to_search[pos..pos.saturating_add(query.len())], query);
680
681 if distance <= max_distance {
682 matches.push(SearchMatch {
683 line_number,
684 line_content:line.to_string(),
685 match_start:pos,
686 match_end:pos + query.len(),
687 context_before:Vec::new(),
688 context_after:Vec::new(),
689 });
690 }
691 }
692 }
693
694 matches
695}
696
697fn FindExactMatches(content:&str, query:&str) -> Vec<SearchMatch> { FindMatchesWithContext(content, query, true, true) }
699
700fn CalculateLevenshteinDistance(s1:&str, s2:&str) -> usize {
702 let s1_chars:Vec<char> = s1.chars().collect();
703
704 let s2_chars:Vec<char> = s2.chars().collect();
705
706 let len1 = s1_chars.len();
707
708 let len2 = s2_chars.len();
709
710 let mut dp = vec![vec![0usize; len2 + 1]; len1 + 1];
712
713 for i in 0..=len1 {
715 dp[i][0] = i;
716 }
717
718 for j in 0..=len2 {
719 dp[0][j] = j;
720 }
721
722 for i in 1..=len1 {
724 for j in 1..=len2 {
725 if s1_chars[i - 1] == s2_chars[j - 1] {
726 dp[i][j] = dp[i - 1][j - 1];
727 } else {
728 dp[i][j] = 1 + [
729 dp[i - 1][j], dp[i][j - 1], dp[i - 1][j - 1], ]
733 .into_iter()
734 .min()
735 .unwrap();
736 }
737 }
738 }
739
740 dp[len1][len2]
741}
742
743fn CalculateRelevance(matches:&[SearchMatch], metadata:&FileMetadata) -> f64 {
745 let match_count = matches.len();
746
747 let line_count = metadata.line_count.unwrap_or(1) as f64;
748
749 let mut relevance = (match_count as f64 / line_count) * 10.0;
751
752 if match_count > 0 {
754 relevance += (match_count as f64).log10() * 0.5;
755 }
756
757 let days_old = (chrono::Utc::now() - metadata.modified).num_days() as f64;
759
760 relevance += 1.0 / (days_old + 1.0).max(1.0);
761
762 relevance.min(10.0).max(0.0)
763}
764
765pub fn MatchesFilters(
767 file_path:&PathBuf,
768
769 metadata:&FileMetadata,
770
771 path_filter:Option<&str>,
772
773 language_filter:Option<&str>,
774) -> bool {
775 if let Some(search_path) = path_filter {
777 if !file_path.to_string_lossy().contains(search_path) {
778 return false;
779 }
780 }
781
782 if let Some(lang) = language_filter {
784 if metadata.language.as_deref() != Some(lang) {
785 return false;
786 }
787 }
788
789 true
790}