chore(users/edef): move to contrib
Change-Id: I1a6972fab8ada26917f29607fc401e376d634070
This commit is contained in:
parent
a7916624dc
commit
403d8fc897
55 changed files with 15 additions and 17 deletions
154
contrib/refscan/src/lib.rs
Normal file
154
contrib/refscan/src/lib.rs
Normal file
|
|
@ -0,0 +1,154 @@
|
|||
// SPDX-FileCopyrightText: edef <edef@edef.eu>
|
||||
// SPDX-License-Identifier: MPL-2.0
|
||||
|
||||
use self::simd::u8x32;
|
||||
|
||||
fn prefilter(haystack: u8x32) -> u32 {
|
||||
let alp = haystack.gt(u8x32::splat(b'a' - 1)) & haystack.lt(u8x32::splat(b'z' + 1));
|
||||
let num = haystack.gt(u8x32::splat(b'0' - 1)) & haystack.lt(u8x32::splat(b'9' + 1));
|
||||
alp | num
|
||||
}
|
||||
|
||||
/// scan_clean returns `Err(&buffer[..n])` of known pointer-free data,
|
||||
/// or `Ok(buffer)` if the entire buffer is pointer-free.
|
||||
pub fn scan_clean(buffer: &[u8]) -> Result<&[u8], &[u8]> {
|
||||
let buffer = {
|
||||
let n = buffer.len() & !31;
|
||||
&buffer[..n]
|
||||
};
|
||||
|
||||
let mut masks = buffer
|
||||
.chunks_exact(32)
|
||||
.map(|chunk| prefilter(u8x32::from_slice_unaligned(chunk)))
|
||||
.enumerate()
|
||||
.map(|e| (e.0 * 32, e.1))
|
||||
.peekable();
|
||||
|
||||
while let Some((offset, mask)) = masks.next() {
|
||||
let peek = masks.peek().map(|x| x.1).unwrap_or(!0 >> 1);
|
||||
let n = (!mask).leading_zeros() + (!peek).trailing_zeros();
|
||||
if n >= 32 {
|
||||
let offset = offset + mask.trailing_zeros() as usize;
|
||||
return Err(&buffer[..offset]);
|
||||
}
|
||||
}
|
||||
|
||||
Ok(buffer)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
#[test]
|
||||
fn scan_tail() {
|
||||
let buffer = b"_xfbmj7sl2ikicym9x3yq7cms5qx1w39k";
|
||||
assert_eq!(crate::scan_clean(buffer), Err(&buffer[..1]));
|
||||
}
|
||||
#[test]
|
||||
fn scan_straddle() {
|
||||
let buffer = b"________________xfbmj7sl2ikicym9x3yq7cms5qx1w39k________________";
|
||||
assert_eq!(crate::scan_clean(buffer), Err(&buffer[..16]));
|
||||
}
|
||||
#[test]
|
||||
fn scan_clean() {
|
||||
let buffer = b"x_______________xfbmj7sl2ikicym9x3yq-cms5qx1w3-k________________";
|
||||
assert_eq!(crate::scan_clean(buffer), Ok(&buffer[..]));
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
||||
mod simd {
|
||||
#[cfg(target_arch = "x86")]
|
||||
use std::arch::x86 as arch;
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
use std::arch::x86_64 as arch;
|
||||
use {
|
||||
arch::{__m256i, _mm256_cmpgt_epi8, _mm256_movemask_epi8, _mm256_set1_epi8},
|
||||
std::ptr,
|
||||
};
|
||||
|
||||
#[allow(non_camel_case_types)]
|
||||
#[derive(Copy, Clone)]
|
||||
pub struct u8x32(__m256i);
|
||||
|
||||
impl u8x32 {
|
||||
#[inline(always)]
|
||||
pub fn from_slice_unaligned(slice: &[u8]) -> Self {
|
||||
assert_eq!(slice.len(), 32);
|
||||
u8x32(unsafe { ptr::read_unaligned(slice.as_ptr().cast()) })
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn splat(x: u8) -> Self {
|
||||
u8x32(unsafe { _mm256_set1_epi8(x as i8) })
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn gt(self, b: Self) -> u32 {
|
||||
unsafe { _mm256_movemask_epi8(_mm256_cmpgt_epi8(self.0, b.0)) as u32 }
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn lt(self, b: Self) -> u32 {
|
||||
b.gt(self)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(target_arch = "aarch64")]
|
||||
mod simd {
|
||||
use std::{
|
||||
arch::aarch64::{
|
||||
uint8x16_t as u8x16, vaddv_u8, vandq_u8, vcgtq_u8, vdupq_n_u8, vget_high_u8,
|
||||
vget_low_u8, vshlq_u8,
|
||||
},
|
||||
mem, ptr,
|
||||
};
|
||||
|
||||
#[allow(non_camel_case_types)]
|
||||
#[derive(Copy, Clone)]
|
||||
#[repr(transparent)]
|
||||
pub struct u8x32([u8x16; 2]);
|
||||
|
||||
impl u8x32 {
|
||||
#[cfg(target_endian = "little")]
|
||||
#[inline(always)]
|
||||
pub fn from_slice_unaligned(slice: &[u8]) -> Self {
|
||||
assert_eq!(slice.len(), 32);
|
||||
u8x32(unsafe { ptr::read_unaligned(slice.as_ptr().cast()) })
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn splat(x: u8) -> Self {
|
||||
u8x32(unsafe {
|
||||
let x = vdupq_n_u8(x);
|
||||
[x, x]
|
||||
})
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn gt(&self, b: Self) -> u32 {
|
||||
let u8x32([al, ah]) = *self;
|
||||
let u8x32([bl, bh]) = b;
|
||||
|
||||
fn f(a: u8x16, b: u8x16) -> u32 {
|
||||
unsafe {
|
||||
let c = vshlq_u8(
|
||||
vandq_u8(vdupq_n_u8(0x80), vcgtq_u8(a, b)),
|
||||
mem::transmute([
|
||||
-7, -6, -5, -4, -3, -2, -1, 0, -7, -6, -5, -4, -3, -2, -1, 0i8,
|
||||
]),
|
||||
);
|
||||
|
||||
(vaddv_u8(vget_low_u8(c)) as u32) << 0 | (vaddv_u8(vget_high_u8(c)) as u32) << 8
|
||||
}
|
||||
}
|
||||
|
||||
f(al, bl) << 0 | f(ah, bh) << 16
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn lt(self, b: Self) -> u32 {
|
||||
b.gt(self)
|
||||
}
|
||||
}
|
||||
}
|
||||
58
contrib/refscan/src/main.rs
Normal file
58
contrib/refscan/src/main.rs
Normal file
|
|
@ -0,0 +1,58 @@
|
|||
// SPDX-FileCopyrightText: edef <edef@edef.eu>
|
||||
// SPDX-License-Identifier: MPL-2.0
|
||||
|
||||
use std::{
|
||||
collections::BTreeSet as Set,
|
||||
convert::TryInto,
|
||||
io::{self, Read},
|
||||
str,
|
||||
};
|
||||
|
||||
fn main() {
|
||||
let max_refs: Set<[u8; 32]> = include_str!("../testdata/maxrefs")
|
||||
.lines()
|
||||
.map(|l| l.as_bytes().try_into().unwrap())
|
||||
.collect();
|
||||
|
||||
let input = {
|
||||
let stdin = io::stdin();
|
||||
let mut buffer = Vec::new();
|
||||
stdin.lock().read_to_end(&mut buffer).unwrap();
|
||||
buffer
|
||||
};
|
||||
|
||||
let base = input.as_ptr() as usize;
|
||||
let mut input: &[u8] = &input;
|
||||
while input.len() >= 32 {
|
||||
match refscan::scan_clean(&input) {
|
||||
Ok(buffer) | Err(buffer) => {
|
||||
let n = buffer.len();
|
||||
input = &input[n..];
|
||||
}
|
||||
}
|
||||
|
||||
let buffer = {
|
||||
let idx = input.iter().position(|x| match x {
|
||||
b'a'..=b'z' | b'0'..=b'9' => false,
|
||||
_ => true,
|
||||
});
|
||||
idx.map(|idx| &input[..idx]).unwrap_or(input)
|
||||
};
|
||||
|
||||
for chunk in buffer.windows(32) {
|
||||
let offset = (chunk.as_ptr() as usize) - base;
|
||||
let chunk = {
|
||||
let mut fixed = [0u8; 32];
|
||||
fixed.copy_from_slice(chunk);
|
||||
fixed
|
||||
};
|
||||
if max_refs.contains(&chunk) {
|
||||
let seen = unsafe { str::from_utf8_unchecked(&chunk) };
|
||||
println!("{} {}", seen, offset);
|
||||
}
|
||||
}
|
||||
|
||||
let n = buffer.len();
|
||||
input = &input[n..];
|
||||
}
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue