Module tiresias.client.synthetic
Expand source code
import os
import barnum
import pandas as pd
from time import time
from random import randint, random, choice
from tiresias.client.storage import register_app, insert_payload
def sample_profile():
payload = {}
zipcode, city, state = barnum.create_city_state_zip()
payload["demographics"] = [{
"age": randint(18, 100),
"gender": choice(["Male", "Female", "Other"]),
"income": randint(10, 1000) * 1000,
"city": city,
"state": state,
"zipcode": zipcode
}]
return payload
def sample_browsing(profile):
age = profile["demographics"][0]["age"] / 100.0
payload = {}
payload["history"] = []
df = pd.read_csv(os.path.join(os.path.dirname(__file__), "domains.csv"))
for _ in range(100 - randint(0, int(99 * age))):
payload["history"].append({
"timestamp": time() + randint(0, 10000),
"domain": choice(df["Root Domain"].values)
})
return payload
def sample_screen_time(profile):
payload = {}
payload["types"] = [
{"application_name": "Chrome", "application_type": "browser"},
{"application_name": "Safari", "application_type": "browser"},
{"application_name": "Firefox", "application_type": "browser"},
{"application_name": "Microsoft Edge", "application_type": "browser"},
{"application_name": "Internet Explorer", "application_type": "browser"},
{"application_name": "VSCode", "application_type": "development"},
{"application_name": "Terminal", "application_type": "development"},
{"application_name": "iTerm", "application_type": "development"},
{"application_name": "Slack", "application_type": "communication"},
{"application_name": "Skype", "application_type": "communication"},
{"application_name": "Zoom", "application_type": "communication"},
]
payload["events"] = []
df = pd.read_csv(os.path.join(os.path.dirname(__file__), "domains.csv"))
for _ in range(randint(1, 100)):
timestamp = time() + randint(0, 10000)
application_name = choice(payload["types"])["application_name"]
payload["events"].append({
"timestamp": timestamp,
"event_type": "open",
"application_name": application_name
})
payload["events"].append({
"timestamp": timestamp + randint(0, 10000),
"event_type": "close",
"application_name": application_name
})
return payload
def create_synthetic_dataset(storage_dir):
register_app(storage_dir, "profile", {
"demographics": {
"description": "",
"columns": {
"age": {"type": "float", "description": ""},
"gender": {"type": "float", "description": ""},
"income": {"type": "float", "description": ""},
"city": {"type": "float", "description": ""},
"state": {"type": "float", "description": ""},
"zipcode": {"type": "float", "description": ""},
}
}
})
profile = sample_profile()
insert_payload(storage_dir, "profile", profile)
register_app(storage_dir, "browsing", {
"history": {
"description": "",
"columns": {
"timestamp": {"type": "float", "description": "When the website was opened"},
"domain": {"type": "float", "description": "The domain (i.e. everything up to `.com`, `.net`, etc.)"},
}
},
})
insert_payload(storage_dir, "browsing", sample_browsing(profile))
register_app(storage_dir, "screen_time", {
"events": {
"description": "",
"columns": {
"timestamp": {"type": "float", "description": "When the event occurred."},
"event_type": {"type": "string", "description": "Whether the application was opened or closed."},
"application_name": {"type": "string", "description": "Standardized name for the application."},
}
},
"types": {
"description": "",
"columns": {
"application_name": {"type": "string", "description": "Standardized name for the application."},
"application_type": {"type": "string", "description": "Whether the application is for web browsing, software development, or communicaation."},
}
},
})
insert_payload(storage_dir, "screen_time", sample_screen_time(profile))
Functions
def create_synthetic_dataset(storage_dir)
-
Expand source code
def create_synthetic_dataset(storage_dir): register_app(storage_dir, "profile", { "demographics": { "description": "", "columns": { "age": {"type": "float", "description": ""}, "gender": {"type": "float", "description": ""}, "income": {"type": "float", "description": ""}, "city": {"type": "float", "description": ""}, "state": {"type": "float", "description": ""}, "zipcode": {"type": "float", "description": ""}, } } }) profile = sample_profile() insert_payload(storage_dir, "profile", profile) register_app(storage_dir, "browsing", { "history": { "description": "", "columns": { "timestamp": {"type": "float", "description": "When the website was opened"}, "domain": {"type": "float", "description": "The domain (i.e. everything up to `.com`, `.net`, etc.)"}, } }, }) insert_payload(storage_dir, "browsing", sample_browsing(profile)) register_app(storage_dir, "screen_time", { "events": { "description": "", "columns": { "timestamp": {"type": "float", "description": "When the event occurred."}, "event_type": {"type": "string", "description": "Whether the application was opened or closed."}, "application_name": {"type": "string", "description": "Standardized name for the application."}, } }, "types": { "description": "", "columns": { "application_name": {"type": "string", "description": "Standardized name for the application."}, "application_type": {"type": "string", "description": "Whether the application is for web browsing, software development, or communicaation."}, } }, }) insert_payload(storage_dir, "screen_time", sample_screen_time(profile))
def random(...)
-
random() -> x in the interval [0, 1).
def sample_browsing(profile)
-
Expand source code
def sample_browsing(profile): age = profile["demographics"][0]["age"] / 100.0 payload = {} payload["history"] = [] df = pd.read_csv(os.path.join(os.path.dirname(__file__), "domains.csv")) for _ in range(100 - randint(0, int(99 * age))): payload["history"].append({ "timestamp": time() + randint(0, 10000), "domain": choice(df["Root Domain"].values) }) return payload
def sample_profile()
-
Expand source code
def sample_profile(): payload = {} zipcode, city, state = barnum.create_city_state_zip() payload["demographics"] = [{ "age": randint(18, 100), "gender": choice(["Male", "Female", "Other"]), "income": randint(10, 1000) * 1000, "city": city, "state": state, "zipcode": zipcode }] return payload
def sample_screen_time(profile)
-
Expand source code
def sample_screen_time(profile): payload = {} payload["types"] = [ {"application_name": "Chrome", "application_type": "browser"}, {"application_name": "Safari", "application_type": "browser"}, {"application_name": "Firefox", "application_type": "browser"}, {"application_name": "Microsoft Edge", "application_type": "browser"}, {"application_name": "Internet Explorer", "application_type": "browser"}, {"application_name": "VSCode", "application_type": "development"}, {"application_name": "Terminal", "application_type": "development"}, {"application_name": "iTerm", "application_type": "development"}, {"application_name": "Slack", "application_type": "communication"}, {"application_name": "Skype", "application_type": "communication"}, {"application_name": "Zoom", "application_type": "communication"}, ] payload["events"] = [] df = pd.read_csv(os.path.join(os.path.dirname(__file__), "domains.csv")) for _ in range(randint(1, 100)): timestamp = time() + randint(0, 10000) application_name = choice(payload["types"])["application_name"] payload["events"].append({ "timestamp": timestamp, "event_type": "open", "application_name": application_name }) payload["events"].append({ "timestamp": timestamp + randint(0, 10000), "event_type": "close", "application_name": application_name }) return payload