refactor(users/edef/refscan): switch to intrinsics
packed_simd is deprecated, but we don't need very much SIMD: * _mm256_set1_epi8 / vpbroadcastb (splat) * _mm256_cmpgt_epi8 / vpcmpgtb (comparison) * _mm256_movemask_epi8 / vpmovmskb (compress to bitmask) This also simplifies the code by only vectorising the bare minimum, since we just get a bitmask and operate in scalar mode as soon as possible. We don't need nightly Rust anymore: we're using only stable intrinsics. Change-Id: Id410b5fef2549f3c97f48049f722f1e643e68553 Reviewed-on: https://cl.tvl.fyi/c/depot/+/7687 Reviewed-by: edef <edef@edef.eu> Tested-by: BuildkiteCI
This commit is contained in:
parent
0b3c0725a2
commit
cb764e87de
3 changed files with 41 additions and 25 deletions
|
|
@ -1,6 +1,6 @@
|
|||
use packed_simd::{m8x32, u8x32};
|
||||
use self::simd::u8x32;
|
||||
|
||||
fn prefilter(haystack: u8x32) -> m8x32 {
|
||||
fn prefilter(haystack: u8x32) -> u32 {
|
||||
let alp = haystack.gt(u8x32::splat(b'a' - 1)) & haystack.lt(u8x32::splat(b'z' + 1));
|
||||
let num = haystack.gt(u8x32::splat(b'0' - 1)) & haystack.lt(u8x32::splat(b'9' + 1));
|
||||
alp | num
|
||||
|
|
@ -16,7 +16,7 @@ pub fn scan_clean(buffer: &[u8]) -> Result<&[u8], &[u8]> {
|
|||
|
||||
let mut masks = buffer
|
||||
.chunks_exact(32)
|
||||
.map(|chunk| prefilter(u8x32::from_slice_unaligned(chunk)).bitmask())
|
||||
.map(|chunk| prefilter(u8x32::from_slice_unaligned(chunk)))
|
||||
.enumerate()
|
||||
.map(|e| (e.0 * 32, e.1))
|
||||
.peekable();
|
||||
|
|
@ -51,3 +51,40 @@ mod test {
|
|||
assert_eq!(crate::scan_clean(buffer), Ok(&buffer[..]));
|
||||
}
|
||||
}
|
||||
|
||||
mod simd {
|
||||
#[cfg(target_arch = "x86")]
|
||||
use std::arch::x86 as arch;
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
use std::arch::x86_64 as arch;
|
||||
use {
|
||||
arch::{__m256i, _mm256_cmpgt_epi8, _mm256_movemask_epi8, _mm256_set1_epi8},
|
||||
std::ptr,
|
||||
};
|
||||
|
||||
#[derive(Copy, Clone)]
|
||||
pub struct u8x32(__m256i);
|
||||
|
||||
impl u8x32 {
|
||||
#[inline(always)]
|
||||
pub fn from_slice_unaligned(slice: &[u8]) -> Self {
|
||||
assert_eq!(slice.len(), 32);
|
||||
u8x32(unsafe { ptr::read_unaligned(slice.as_ptr().cast()) })
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn splat(x: u8) -> Self {
|
||||
u8x32(unsafe { _mm256_set1_epi8(x as i8) })
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn gt(self, b: Self) -> u32 {
|
||||
unsafe { _mm256_movemask_epi8(_mm256_cmpgt_epi8(self.0, b.0)) as u32 }
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn lt(self, b: Self) -> u32 {
|
||||
b.gt(self)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue