Synthetic Population Generation¶
Generate sythetic schedules and attributes.
In [1]:
Copied!
from pathlib import Path
import pandas as pd
from caveat.data.synth import ActivityGen
from caveat.data.utils import generate_population_conditional, trace_to_pam
from caveat.evaluate.describe.times import (
joint_time_distributions_plot,
times_distributions_plot,
)
from caveat.evaluate.describe.transitions import sequence_prob_plot
from pathlib import Path
import pandas as pd
from caveat.data.synth import ActivityGen
from caveat.data.utils import generate_population_conditional, trace_to_pam
from caveat.evaluate.describe.times import (
joint_time_distributions_plot,
times_distributions_plot,
)
from caveat.evaluate.describe.transitions import sequence_prob_plot
In [2]:
Copied!
write_path = Path("tmp")
n = 1000
write_path = Path("tmp")
n = 1000
In [3]:
Copied!
# FT worker generator
ftw = ActivityGen()
config = ftw.transition_config.copy()
for _, kv in config.items():
for k, v in kv.items():
if k in ["work"]:
for i, (a, b) in enumerate(v):
v[i] = (a, b * 10)
if k in ["education", "shop", "leisure"]:
for i, (a, b) in enumerate(v):
v[i] = (a, b / 10)
ftw.build(config)
trace = ftw.run()
plan = trace_to_pam(trace, ftw.map)
plan.plot()
# FT worker generator
ftw = ActivityGen()
config = ftw.transition_config.copy()
for _, kv in config.items():
for k, v in kv.items():
if k in ["work"]:
for i, (a, b) in enumerate(v):
v[i] = (a, b * 10)
if k in ["education", "shop", "leisure"]:
for i, (a, b) in enumerate(v):
v[i] = (a, b / 10)
ftw.build(config)
trace = ftw.run()
plan = trace_to_pam(trace, ftw.map)
plan.plot()
In [4]:
Copied!
# PT worker generator
ptw = ActivityGen()
config = ptw.transition_config.copy()
for o, kv in config.items():
for k, v in kv.items():
if not o == "work":
continue
if k == "work":
for i, (a, b) in enumerate(v):
v[i] = (a, b / 100)
if k in ["shop", "leisure"]:
for i, (a, b) in enumerate(v):
v[i] = (a, b * 100)
ptw.build(config)
trace = ptw.run()
plan = trace_to_pam(trace, ptw.map)
plan.plot()
# PT worker generator
ptw = ActivityGen()
config = ptw.transition_config.copy()
for o, kv in config.items():
for k, v in kv.items():
if not o == "work":
continue
if k == "work":
for i, (a, b) in enumerate(v):
v[i] = (a, b / 100)
if k in ["shop", "leisure"]:
for i, (a, b) in enumerate(v):
v[i] = (a, b * 100)
ptw.build(config)
trace = ptw.run()
plan = trace_to_pam(trace, ptw.map)
plan.plot()
In [5]:
Copied!
# Leisure generator
neet = ActivityGen()
config = neet.transition_config.copy()
for _, kv in config.items():
for k, v in kv.items():
if k in ["work", "education"]:
for i, (a, b) in enumerate(v):
v[i] = (a, 0)
if k in ["shop", "leisure"]:
for i, (a, b) in enumerate(v):
v[i] = (a, b * 10)
neet.build(config)
trace = neet.run()
plan = trace_to_pam(trace, neet.map)
plan.plot()
# Leisure generator
neet = ActivityGen()
config = neet.transition_config.copy()
for _, kv in config.items():
for k, v in kv.items():
if k in ["work", "education"]:
for i, (a, b) in enumerate(v):
v[i] = (a, 0)
if k in ["shop", "leisure"]:
for i, (a, b) in enumerate(v):
v[i] = (a, b * 10)
neet.build(config)
trace = neet.run()
plan = trace_to_pam(trace, neet.map)
plan.plot()
In [6]:
Copied!
# FTE generator
fte = ActivityGen()
config = fte.transition_config.copy()
for _, kv in config.items():
for k, v in kv.items():
if k == "work":
for i, (a, b) in enumerate(v):
v[i] = (a, 0)
if k == "education":
for i, (a, b) in enumerate(v):
v[i] = (a, b * 100)
fte.build(config)
trace = fte.run()
plan = trace_to_pam(trace, fte.map)
plan.plot()
# FTE generator
fte = ActivityGen()
config = fte.transition_config.copy()
for _, kv in config.items():
for k, v in kv.items():
if k == "work":
for i, (a, b) in enumerate(v):
v[i] = (a, 0)
if k == "education":
for i, (a, b) in enumerate(v):
v[i] = (a, b * 100)
fte.build(config)
trace = fte.run()
plan = trace_to_pam(trace, fte.map)
plan.plot()
In [7]:
Copied!
population = generate_population_conditional(gens=(ftw, ptw, neet, fte), size=n)
population.act = population.act.map(ftw.map)
population = population[
["pid", "act", "start", "end", "duration", "gender", "age", "employment"]
]
population
population = generate_population_conditional(gens=(ftw, ptw, neet, fte), size=n)
population.act = population.act.map(ftw.map)
population = population[
["pid", "act", "start", "end", "duration", "gender", "age", "employment"]
]
population
Out[7]:
pid | act | start | end | duration | gender | age | employment | |
---|---|---|---|---|---|---|---|---|
0 | 0 | home | 0 | 375 | 375 | F | 24 | FTW |
1 | 0 | work | 375 | 1170 | 795 | F | 24 | FTW |
2 | 0 | home | 1170 | 1440 | 270 | F | 24 | FTW |
3 | 1 | home | 0 | 390 | 390 | M | 85 | NEET |
4 | 1 | shop | 390 | 450 | 60 | M | 85 | NEET |
... | ... | ... | ... | ... | ... | ... | ... | ... |
4305 | 999 | home | 0 | 390 | 390 | F | 83 | NEET |
4306 | 999 | shop | 390 | 450 | 60 | F | 83 | NEET |
4307 | 999 | home | 450 | 630 | 180 | F | 83 | NEET |
4308 | 999 | leisure | 630 | 990 | 360 | F | 83 | NEET |
4309 | 999 | home | 990 | 1440 | 450 | F | 83 | NEET |
4310 rows × 8 columns
In [8]:
Copied!
# population.to_csv(write_path / "combined.csv", index=False)
population[["pid", "act", "start", "end", "duration"]].to_csv(
write_path / "synthetic_schedules.csv", index=False
)
population[["pid", "gender", "age", "employment"]].drop_duplicates().to_csv(
write_path / "synthetic_attributes.csv", index=False
)
# population.to_csv(write_path / "combined.csv", index=False)
population[["pid", "act", "start", "end", "duration"]].to_csv(
write_path / "synthetic_schedules.csv", index=False
)
population[["pid", "gender", "age", "employment"]].drop_duplicates().to_csv(
write_path / "synthetic_attributes.csv", index=False
)
In [9]:
Copied!
cs = population.gender.map({"M": "blue", "F": "red"})
population.plot(kind="scatter", x="age", y="duration", alpha=0.1, c=cs)
cs = population.gender.map({"M": "blue", "F": "red"})
population.plot(kind="scatter", x="age", y="duration", alpha=0.1, c=cs)
Out[9]:
<Axes: xlabel='age', ylabel='duration'>
In [10]:
Copied!
def describe_col(population, col: str) -> pd.DataFrame:
description = population.groupby("act")[col].describe()[
["count", "mean", "std", "min", "max"]
]
description["attribute"] = col
return description
def describe_cols(population, cols: list[str]) -> pd.DataFrame:
description = pd.concat(
[describe_col(population, c) for c in cols], ignore_index=False
)
description = description.reset_index().set_index(["attribute", "act"])
return description
describe_cols(population, ["start", "end", "duration"]).round()
def describe_col(population, col: str) -> pd.DataFrame:
description = population.groupby("act")[col].describe()[
["count", "mean", "std", "min", "max"]
]
description["attribute"] = col
return description
def describe_cols(population, cols: list[str]) -> pd.DataFrame:
description = pd.concat(
[describe_col(population, c) for c in cols], ignore_index=False
)
description = description.reset_index().set_index(["attribute", "act"])
return description
describe_cols(population, ["start", "end", "duration"]).round()
Out[10]:
count | mean | std | min | max | ||
---|---|---|---|---|---|---|
attribute | act | |||||
start | education | 34.0 | 960.0 | 129.0 | 765.0 | 1125.0 |
home | 2085.0 | 494.0 | 490.0 | 0.0 | 1290.0 | |
leisure | 798.0 | 565.0 | 240.0 | 375.0 | 1260.0 | |
shop | 896.0 | 529.0 | 188.0 | 375.0 | 1170.0 | |
work | 497.0 | 570.0 | 299.0 | 375.0 | 1275.0 | |
end | education | 34.0 | 1038.0 | 74.0 | 885.0 | 1140.0 |
home | 2085.0 | 907.0 | 518.0 | 375.0 | 1440.0 | |
leisure | 798.0 | 898.0 | 207.0 | 420.0 | 1275.0 | |
shop | 896.0 | 623.0 | 193.0 | 390.0 | 1185.0 | |
work | 497.0 | 1023.0 | 157.0 | 735.0 | 1290.0 | |
duration | education | 34.0 | 78.0 | 60.0 | 15.0 | 195.0 |
home | 2085.0 | 413.0 | 112.0 | 15.0 | 885.0 | |
leisure | 798.0 | 333.0 | 205.0 | 15.0 | 630.0 | |
shop | 896.0 | 94.0 | 70.0 | 15.0 | 420.0 | |
work | 497.0 | 453.0 | 260.0 | 15.0 | 825.0 |
In [11]:
Copied!
subpops = {
"males": population.loc[population.gender == "M"],
"females": population.loc[population.gender == "F"],
"young": population.loc[population.age < 25],
"middle": population.loc[(population.age >= 25) & (population.age < 65)],
"old": population.loc[population.age >= 65],
}
subpops = {
"males": population.loc[population.gender == "M"],
"females": population.loc[population.gender == "F"],
"young": population.loc[population.age < 25],
"middle": population.loc[(population.age >= 25) & (population.age < 65)],
"old": population.loc[population.age >= 65],
}
In [12]:
Copied!
_ = times_distributions_plot(population, ys=subpops)
_ = times_distributions_plot(population, ys=subpops)
In [13]:
Copied!
_ = joint_time_distributions_plot(population, ys=subpops, figsize=(12, 12))
_ = joint_time_distributions_plot(population, ys=subpops, figsize=(12, 12))
In [14]:
Copied!
_ = sequence_prob_plot(population, ys=subpops, figsize=(12, 4))
_ = sequence_prob_plot(population, ys=subpops, figsize=(12, 4))
In [ ]:
Copied!