refactor(users/edef/refscan): switch to intrinsics
packed_simd is deprecated, but we don't need very much SIMD: * _mm256_set1_epi8 / vpbroadcastb (splat) * _mm256_cmpgt_epi8 / vpcmpgtb (comparison) * _mm256_movemask_epi8 / vpmovmskb (compress to bitmask) This also simplifies the code by only vectorising the bare minimum, since we just get a bitmask and operate in scalar mode as soon as possible. We don't need nightly Rust anymore: we're using only stable intrinsics. Change-Id: Id410b5fef2549f3c97f48049f722f1e643e68553 Reviewed-on: https://cl.tvl.fyi/c/depot/+/7687 Reviewed-by: edef <edef@edef.eu> Tested-by: BuildkiteCI
This commit is contained in:
		
							parent
							
								
									0b3c0725a2
								
							
						
					
					
						commit
						cb764e87de
					
				
					 3 changed files with 41 additions and 25 deletions
				
			
		
							
								
								
									
										20
									
								
								users/edef/refscan/Cargo.lock
									
										
									
										generated
									
									
									
								
							
							
						
						
									
										20
									
								
								users/edef/refscan/Cargo.lock
									
										
									
										generated
									
									
									
								
							| 
						 | 
					@ -1,25 +1,7 @@
 | 
				
			||||||
# This file is automatically @generated by Cargo.
 | 
					# This file is automatically @generated by Cargo.
 | 
				
			||||||
# It is not intended for manual editing.
 | 
					# It is not intended for manual editing.
 | 
				
			||||||
[[package]]
 | 
					version = 3
 | 
				
			||||||
name = "cfg-if"
 | 
					 | 
				
			||||||
version = "0.1.10"
 | 
					 | 
				
			||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
[[package]]
 | 
					 | 
				
			||||||
name = "packed_simd"
 | 
					 | 
				
			||||||
version = "0.3.3"
 | 
					 | 
				
			||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
 | 
					 | 
				
			||||||
dependencies = [
 | 
					 | 
				
			||||||
 "cfg-if 0.1.10 (registry+https://github.com/rust-lang/crates.io-index)",
 | 
					 | 
				
			||||||
]
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
[[package]]
 | 
					[[package]]
 | 
				
			||||||
name = "refscan"
 | 
					name = "refscan"
 | 
				
			||||||
version = "0.1.0"
 | 
					version = "0.1.0"
 | 
				
			||||||
dependencies = [
 | 
					 | 
				
			||||||
 "packed_simd 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)",
 | 
					 | 
				
			||||||
]
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
[metadata]
 | 
					 | 
				
			||||||
"checksum cfg-if 0.1.10 (registry+https://github.com/rust-lang/crates.io-index)" = "4785bdd1c96b2a846b2bd7cc02e86b6b3dbf14e7e53446c4f54c92a361040822"
 | 
					 | 
				
			||||||
"checksum packed_simd 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)" = "a85ea9fc0d4ac0deb6fe7911d38786b32fc11119afd9e9d38b84ff691ce64220"
 | 
					 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -5,6 +5,3 @@ authors = ["edef <edef@edef.eu>"]
 | 
				
			||||||
edition = "2018"
 | 
					edition = "2018"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
 | 
					# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
 | 
				
			||||||
 | 
					 | 
				
			||||||
[dependencies]
 | 
					 | 
				
			||||||
packed_simd = "0.3.3"
 | 
					 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -1,6 +1,6 @@
 | 
				
			||||||
use packed_simd::{m8x32, u8x32};
 | 
					use self::simd::u8x32;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
fn prefilter(haystack: u8x32) -> m8x32 {
 | 
					fn prefilter(haystack: u8x32) -> u32 {
 | 
				
			||||||
    let alp = haystack.gt(u8x32::splat(b'a' - 1)) & haystack.lt(u8x32::splat(b'z' + 1));
 | 
					    let alp = haystack.gt(u8x32::splat(b'a' - 1)) & haystack.lt(u8x32::splat(b'z' + 1));
 | 
				
			||||||
    let num = haystack.gt(u8x32::splat(b'0' - 1)) & haystack.lt(u8x32::splat(b'9' + 1));
 | 
					    let num = haystack.gt(u8x32::splat(b'0' - 1)) & haystack.lt(u8x32::splat(b'9' + 1));
 | 
				
			||||||
    alp | num
 | 
					    alp | num
 | 
				
			||||||
| 
						 | 
					@ -16,7 +16,7 @@ pub fn scan_clean(buffer: &[u8]) -> Result<&[u8], &[u8]> {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    let mut masks = buffer
 | 
					    let mut masks = buffer
 | 
				
			||||||
        .chunks_exact(32)
 | 
					        .chunks_exact(32)
 | 
				
			||||||
        .map(|chunk| prefilter(u8x32::from_slice_unaligned(chunk)).bitmask())
 | 
					        .map(|chunk| prefilter(u8x32::from_slice_unaligned(chunk)))
 | 
				
			||||||
        .enumerate()
 | 
					        .enumerate()
 | 
				
			||||||
        .map(|e| (e.0 * 32, e.1))
 | 
					        .map(|e| (e.0 * 32, e.1))
 | 
				
			||||||
        .peekable();
 | 
					        .peekable();
 | 
				
			||||||
| 
						 | 
					@ -51,3 +51,40 @@ mod test {
 | 
				
			||||||
        assert_eq!(crate::scan_clean(buffer), Ok(&buffer[..]));
 | 
					        assert_eq!(crate::scan_clean(buffer), Ok(&buffer[..]));
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					mod simd {
 | 
				
			||||||
 | 
					    #[cfg(target_arch = "x86")]
 | 
				
			||||||
 | 
					    use std::arch::x86 as arch;
 | 
				
			||||||
 | 
					    #[cfg(target_arch = "x86_64")]
 | 
				
			||||||
 | 
					    use std::arch::x86_64 as arch;
 | 
				
			||||||
 | 
					    use {
 | 
				
			||||||
 | 
					        arch::{__m256i, _mm256_cmpgt_epi8, _mm256_movemask_epi8, _mm256_set1_epi8},
 | 
				
			||||||
 | 
					        std::ptr,
 | 
				
			||||||
 | 
					    };
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    #[derive(Copy, Clone)]
 | 
				
			||||||
 | 
					    pub struct u8x32(__m256i);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    impl u8x32 {
 | 
				
			||||||
 | 
					        #[inline(always)]
 | 
				
			||||||
 | 
					        pub fn from_slice_unaligned(slice: &[u8]) -> Self {
 | 
				
			||||||
 | 
					            assert_eq!(slice.len(), 32);
 | 
				
			||||||
 | 
					            u8x32(unsafe { ptr::read_unaligned(slice.as_ptr().cast()) })
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        #[inline(always)]
 | 
				
			||||||
 | 
					        pub fn splat(x: u8) -> Self {
 | 
				
			||||||
 | 
					            u8x32(unsafe { _mm256_set1_epi8(x as i8) })
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        #[inline(always)]
 | 
				
			||||||
 | 
					        pub fn gt(self, b: Self) -> u32 {
 | 
				
			||||||
 | 
					            unsafe { _mm256_movemask_epi8(_mm256_cmpgt_epi8(self.0, b.0)) as u32 }
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        #[inline(always)]
 | 
				
			||||||
 | 
					        pub fn lt(self, b: Self) -> u32 {
 | 
				
			||||||
 | 
					            b.gt(self)
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue