chore(users/edef): move to contrib
Change-Id: I1a6972fab8ada26917f29607fc401e376d634070
This commit is contained in:
		
							parent
							
								
									a7916624dc
								
							
						
					
					
						commit
						403d8fc897
					
				
					 55 changed files with 15 additions and 17 deletions
				
			
		
							
								
								
									
										154
									
								
								contrib/refscan/src/lib.rs
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										154
									
								
								contrib/refscan/src/lib.rs
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,154 @@ | |||
| // SPDX-FileCopyrightText: edef <edef@edef.eu>
 | ||||
| // SPDX-License-Identifier: MPL-2.0
 | ||||
| 
 | ||||
| use self::simd::u8x32; | ||||
| 
 | ||||
| fn prefilter(haystack: u8x32) -> u32 { | ||||
|     let alp = haystack.gt(u8x32::splat(b'a' - 1)) & haystack.lt(u8x32::splat(b'z' + 1)); | ||||
|     let num = haystack.gt(u8x32::splat(b'0' - 1)) & haystack.lt(u8x32::splat(b'9' + 1)); | ||||
|     alp | num | ||||
| } | ||||
| 
 | ||||
| /// scan_clean returns `Err(&buffer[..n])` of known pointer-free data,
 | ||||
| /// or `Ok(buffer)` if the entire buffer is pointer-free.
 | ||||
| pub fn scan_clean(buffer: &[u8]) -> Result<&[u8], &[u8]> { | ||||
|     let buffer = { | ||||
|         let n = buffer.len() & !31; | ||||
|         &buffer[..n] | ||||
|     }; | ||||
| 
 | ||||
|     let mut masks = buffer | ||||
|         .chunks_exact(32) | ||||
|         .map(|chunk| prefilter(u8x32::from_slice_unaligned(chunk))) | ||||
|         .enumerate() | ||||
|         .map(|e| (e.0 * 32, e.1)) | ||||
|         .peekable(); | ||||
| 
 | ||||
|     while let Some((offset, mask)) = masks.next() { | ||||
|         let peek = masks.peek().map(|x| x.1).unwrap_or(!0 >> 1); | ||||
|         let n = (!mask).leading_zeros() + (!peek).trailing_zeros(); | ||||
|         if n >= 32 { | ||||
|             let offset = offset + mask.trailing_zeros() as usize; | ||||
|             return Err(&buffer[..offset]); | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     Ok(buffer) | ||||
| } | ||||
| 
 | ||||
| #[cfg(test)] | ||||
| mod test { | ||||
|     #[test] | ||||
|     fn scan_tail() { | ||||
|         let buffer = b"_xfbmj7sl2ikicym9x3yq7cms5qx1w39k"; | ||||
|         assert_eq!(crate::scan_clean(buffer), Err(&buffer[..1])); | ||||
|     } | ||||
|     #[test] | ||||
|     fn scan_straddle() { | ||||
|         let buffer = b"________________xfbmj7sl2ikicym9x3yq7cms5qx1w39k________________"; | ||||
|         assert_eq!(crate::scan_clean(buffer), Err(&buffer[..16])); | ||||
|     } | ||||
|     #[test] | ||||
|     fn scan_clean() { | ||||
|         let buffer = b"x_______________xfbmj7sl2ikicym9x3yq-cms5qx1w3-k________________"; | ||||
|         assert_eq!(crate::scan_clean(buffer), Ok(&buffer[..])); | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] | ||||
| mod simd { | ||||
|     #[cfg(target_arch = "x86")] | ||||
|     use std::arch::x86 as arch; | ||||
|     #[cfg(target_arch = "x86_64")] | ||||
|     use std::arch::x86_64 as arch; | ||||
|     use { | ||||
|         arch::{__m256i, _mm256_cmpgt_epi8, _mm256_movemask_epi8, _mm256_set1_epi8}, | ||||
|         std::ptr, | ||||
|     }; | ||||
| 
 | ||||
|     #[allow(non_camel_case_types)] | ||||
|     #[derive(Copy, Clone)] | ||||
|     pub struct u8x32(__m256i); | ||||
| 
 | ||||
|     impl u8x32 { | ||||
|         #[inline(always)] | ||||
|         pub fn from_slice_unaligned(slice: &[u8]) -> Self { | ||||
|             assert_eq!(slice.len(), 32); | ||||
|             u8x32(unsafe { ptr::read_unaligned(slice.as_ptr().cast()) }) | ||||
|         } | ||||
| 
 | ||||
|         #[inline(always)] | ||||
|         pub fn splat(x: u8) -> Self { | ||||
|             u8x32(unsafe { _mm256_set1_epi8(x as i8) }) | ||||
|         } | ||||
| 
 | ||||
|         #[inline(always)] | ||||
|         pub fn gt(self, b: Self) -> u32 { | ||||
|             unsafe { _mm256_movemask_epi8(_mm256_cmpgt_epi8(self.0, b.0)) as u32 } | ||||
|         } | ||||
| 
 | ||||
|         #[inline(always)] | ||||
|         pub fn lt(self, b: Self) -> u32 { | ||||
|             b.gt(self) | ||||
|         } | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| #[cfg(target_arch = "aarch64")] | ||||
| mod simd { | ||||
|     use std::{ | ||||
|         arch::aarch64::{ | ||||
|             uint8x16_t as u8x16, vaddv_u8, vandq_u8, vcgtq_u8, vdupq_n_u8, vget_high_u8, | ||||
|             vget_low_u8, vshlq_u8, | ||||
|         }, | ||||
|         mem, ptr, | ||||
|     }; | ||||
| 
 | ||||
|     #[allow(non_camel_case_types)] | ||||
|     #[derive(Copy, Clone)] | ||||
|     #[repr(transparent)] | ||||
|     pub struct u8x32([u8x16; 2]); | ||||
| 
 | ||||
|     impl u8x32 { | ||||
|         #[cfg(target_endian = "little")] | ||||
|         #[inline(always)] | ||||
|         pub fn from_slice_unaligned(slice: &[u8]) -> Self { | ||||
|             assert_eq!(slice.len(), 32); | ||||
|             u8x32(unsafe { ptr::read_unaligned(slice.as_ptr().cast()) }) | ||||
|         } | ||||
| 
 | ||||
|         #[inline(always)] | ||||
|         pub fn splat(x: u8) -> Self { | ||||
|             u8x32(unsafe { | ||||
|                 let x = vdupq_n_u8(x); | ||||
|                 [x, x] | ||||
|             }) | ||||
|         } | ||||
| 
 | ||||
|         #[inline(always)] | ||||
|         pub fn gt(&self, b: Self) -> u32 { | ||||
|             let u8x32([al, ah]) = *self; | ||||
|             let u8x32([bl, bh]) = b; | ||||
| 
 | ||||
|             fn f(a: u8x16, b: u8x16) -> u32 { | ||||
|                 unsafe { | ||||
|                     let c = vshlq_u8( | ||||
|                         vandq_u8(vdupq_n_u8(0x80), vcgtq_u8(a, b)), | ||||
|                         mem::transmute([ | ||||
|                             -7, -6, -5, -4, -3, -2, -1, 0, -7, -6, -5, -4, -3, -2, -1, 0i8, | ||||
|                         ]), | ||||
|                     ); | ||||
| 
 | ||||
|                     (vaddv_u8(vget_low_u8(c)) as u32) << 0 | (vaddv_u8(vget_high_u8(c)) as u32) << 8 | ||||
|                 } | ||||
|             } | ||||
| 
 | ||||
|             f(al, bl) << 0 | f(ah, bh) << 16 | ||||
|         } | ||||
| 
 | ||||
|         #[inline(always)] | ||||
|         pub fn lt(self, b: Self) -> u32 { | ||||
|             b.gt(self) | ||||
|         } | ||||
|     } | ||||
| } | ||||
							
								
								
									
										58
									
								
								contrib/refscan/src/main.rs
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										58
									
								
								contrib/refscan/src/main.rs
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,58 @@ | |||
| // SPDX-FileCopyrightText: edef <edef@edef.eu>
 | ||||
| // SPDX-License-Identifier: MPL-2.0
 | ||||
| 
 | ||||
| use std::{ | ||||
|     collections::BTreeSet as Set, | ||||
|     convert::TryInto, | ||||
|     io::{self, Read}, | ||||
|     str, | ||||
| }; | ||||
| 
 | ||||
| fn main() { | ||||
|     let max_refs: Set<[u8; 32]> = include_str!("../testdata/maxrefs") | ||||
|         .lines() | ||||
|         .map(|l| l.as_bytes().try_into().unwrap()) | ||||
|         .collect(); | ||||
| 
 | ||||
|     let input = { | ||||
|         let stdin = io::stdin(); | ||||
|         let mut buffer = Vec::new(); | ||||
|         stdin.lock().read_to_end(&mut buffer).unwrap(); | ||||
|         buffer | ||||
|     }; | ||||
| 
 | ||||
|     let base = input.as_ptr() as usize; | ||||
|     let mut input: &[u8] = &input; | ||||
|     while input.len() >= 32 { | ||||
|         match refscan::scan_clean(&input) { | ||||
|             Ok(buffer) | Err(buffer) => { | ||||
|                 let n = buffer.len(); | ||||
|                 input = &input[n..]; | ||||
|             } | ||||
|         } | ||||
| 
 | ||||
|         let buffer = { | ||||
|             let idx = input.iter().position(|x| match x { | ||||
|                 b'a'..=b'z' | b'0'..=b'9' => false, | ||||
|                 _ => true, | ||||
|             }); | ||||
|             idx.map(|idx| &input[..idx]).unwrap_or(input) | ||||
|         }; | ||||
| 
 | ||||
|         for chunk in buffer.windows(32) { | ||||
|             let offset = (chunk.as_ptr() as usize) - base; | ||||
|             let chunk = { | ||||
|                 let mut fixed = [0u8; 32]; | ||||
|                 fixed.copy_from_slice(chunk); | ||||
|                 fixed | ||||
|             }; | ||||
|             if max_refs.contains(&chunk) { | ||||
|                 let seen = unsafe { str::from_utf8_unchecked(&chunk) }; | ||||
|                 println!("{} {}", seen, offset); | ||||
|             } | ||||
|         } | ||||
| 
 | ||||
|         let n = buffer.len(); | ||||
|         input = &input[n..]; | ||||
|     } | ||||
| } | ||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue