import re
from dataclasses import replace
from trajectory_sdk.transforms.pii_transform import BasePiiTransform
class EmailTransform(BasePiiTransform):
pattern = re.compile(r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b')
def transform(self, trajectory):
new_steps = []
for step in trajectory.steps:
new_msgs = [
replace(msg, content=self.pattern.sub("[REDACTED]", msg.content or ""))
for msg in step.messages
]
new_steps.append(replace(step, messages=new_msgs))
return replace(trajectory, steps=new_steps)
def preview(self, trajectories):
count = sum(
len(self.pattern.findall(msg.content or ""))
for t in trajectories for s in t.steps for msg in s.messages
)
return RedactionPreview(
total_rule_counts={"EMAIL": count},
samples=[],
)