pip install Faker

Collecting Faker
  Downloading Faker-18.13.0-py3-none-any.whl (1.7 MB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 1.7/1.7 MB 9.5 MB/s eta 0:00:00:00:0100:01
Requirement already satisfied: python-dateutil>=2.4 in /srv/conda/envs/notebook/lib/python3.7/site-packages (from Faker) (2.8.2)
Requirement already satisfied: typing-extensions>=3.10.0.1 in /srv/conda/envs/notebook/lib/python3.7/site-packages (from Faker) (4.0.1)
Requirement already satisfied: six>=1.5 in /srv/conda/envs/notebook/lib/python3.7/site-packages (from python-dateutil>=2.4->Faker) (1.16.0)
Installing collected packages: Faker
Successfully installed Faker-18.13.0
Note: you may need to restart the kernel to use updated packages.


import faker
from faker import Faker
fake = Faker()

fake.name()
set_of_names= []
for x in range(101):
    name = fake.name()
    set_of_names.append(name)
len(set_of_names)

101


first_name = []
last_name = []
for name in set_of_names : 
    splited_name = name.split(" ")
    first_name.append(splited_name[0])
    last_name.append(splited_name[1])


fake = Faker('fr_FR')
set_of_adress = set()
for _ in range(101):
    adress = fake.address()
    set_of_adress.add(adress)


import random
random.seed(123)
cities = ["paris", "lyon", "marseille", "nice", "toulouse"]
list_of_cities = random.choices(cities, k= 603)
len(list_of_cities)

603


customer_id = []
for x in range (101):
    customer_id.append(str(x)+""+"ABC")


list_of_quantity =[]
random.seed(123)
for x in range(603): 
    qty = random.randint(1,11)
    list_of_quantity.append(qty)
len(list_of_quantity)

603


list_of_price =[]
random.seed(123)
for x in range(201): 
    price= random.randint(5,300)
    list_of_price.append(price)


categories = [
    "Electronics & Gadgets",
    "Home & Kitchen",
    "Health & Beauty",
    "Clothing & Accessories",
    "Toys & Games"
]

categories_code = [
    "EG",
    "HK",
    "HB",
    "CA",
    "TG"
]


numbers = []

for x in range(603): 
    numbers.append(x)
    

for num in numbers :
# Convert to a list of strings
    string_list = [str(num) for num in numbers]


code_product= []
for code in categories_code:
    for num in string_list:
        code_product.append(code+" "+num)

import random 
random.shuffle(code_product)

new = random.choices (code_product, k= 201)
len(new)

201


numbers2 = []

for y in range(201): 
    numbers2.append(y)
    

for num in numbers2 :
# Convert to a list of strings
    string_list2 = [str(num) for num in numbers2]

product_id2= []
for code in categories_code:
    for num2 in string_list2:
        product_id2.append(code+" "+num2)   
len(product_id2)

1005


code_pdt_cat = []

for x in product_id2 : 
    splited_id_pdt = x.split(" ") 
    code_pdt_cat.append( splited_id_pdt[0])
len(code_pdt_cat)

1005


from faker import Faker
import datetime

fake = Faker()
random_date = []

#using string formats for dates that Faker cannot interpret correctly. 
#The Faker library expects date inputs to be either datetime.date objects or formatted correctly.

start_date = datetime.date(2020, 1, 1)
end_date = datetime.date(2024, 12, 31)

for _ in range(1001):
    random_date_between = fake.date_between(start_date=start_date, end_date=end_date)
    # Convert the date to a string in 'YYYY-MM-DD' format
    random_date.append(random_date_between.strftime('%Y-%m-%d'))


id_invoice= []
for x in range(201) :
    id_invoice.append("F"+"0"+str(x))


#customer 
#categories
#invoice 
#invoice_item 
#product


#create data frame for customer 
import pandas as pd 
index_names = []
for i in range(101):
    index_names.append(i)
customer = pd.DataFrame(columns=['First Name', 'Last Name', 'address','c_id'], index = index_names)





customer['First Name'] = first_name 
customer['Last Name'] = last_name
customer['address'] = list(set_of_adress)
customer['c_id']= customer_id


customer


len(categories_code)

5


#create data frame for categories
index_names = []
for i in range(5):
    index_names.append(i)

categories = pd.DataFrame(columns=['id_category' , 'the category'],index = index_names)


categories['id_category'] = categories_code

categories['the category']= [
    "Electronics & Gadgets",
    "Home & Kitchen",
    "Health & Beauty",
    "Clothing & Accessories",
    "Toys & Games"
]


categories


rep_customer_id = random.choices(customer_id , k = 201)
rep_dates = random.choices(random_date, k = 201)


#create data frame for invoice

index_names = []
for i in range(201):
    index_names.append(i)

invoice = pd.DataFrame(columns=['invoice_id','customer_id', 'invoice_date',  'amount' ],index = index_names)


invoice['invoice_id'] = id_invoice
invoice['invoice_date']= rep_dates
invoice['customer_id']= rep_customer_id
invoice['amount']= list_of_price


invoice


index_names = []
for i in range(1005):
    index_names.append(i)

product = pd.DataFrame(columns=[ 'category_product', 'pdt_id'], index= index_names) 
product ['pdt_id'] = product_id2
product ['category_product'] =code_pdt_cat

product


rep_invoice_id = random.choices(id_invoice , k = 603)
rep_price_id = random.choices(new , k = 603)


#invoice_item_id sql 

index_names = []
for i in range(603):
    index_names.append(i)

invoice_item = pd.DataFrame(columns=[ 'invoice_id' , 'qty', 'product_id', 'city' ], index= index_names) 

invoice_item ['invoice_id']= rep_invoice_id
invoice_item ['qty']= list_of_quantity
invoice_item ['product_id']= rep_price_id
invoice_item ['city']= list_of_cities


invoice_item


#transform files to csv  

customer.to_csv('customers.csv', index=False)
invoice.to_csv('invoice.csv', index=False)
invoice_item.to_csv("invoice_item.csv",index=False)
categories.to_csv("category.csv",index=False)
product.to_csv("product.csv",index=False)

	First Name	Last Name	address	c_id
0	Carolyn	Washington	58, rue de Olivier\n30416 Legrand	0ABC
1	Cassandra	Herman	2, boulevard Maurice Ruiz\n58945 Bergernec	1ABC
2	Michael	Conway	5, avenue Monique Vidal\n61494 Sainte Dianedan	2ABC
3	Angela	Sloan	934, rue de Neveu\n97237 Le Gallnec	3ABC
4	Michael	Strickland	197, chemin Gomez\n40186 Baron-sur-Mer	4ABC
...	...	...	...	...
96	Seth	Perkins	60, rue de Leleu\n75174 Sainte AgatheVille	96ABC
97	Margaret	Boone	77, rue de Philippe\n63291 Martineau	97ABC
98	Dawn	Durham	97, rue de Petitjean\n68378 Sainte JulienVille	98ABC
99	Haley	Rodriguez	87, rue Albert\n01783 Gosselin-sur-Grondin	99ABC
100	Jessica	Jackson	948, boulevard de Blanchet\n75260 Reynaud-sur-...	100ABC

	invoice_id	customer_id	invoice_date	amount
0	F00	2ABC	2024-05-08	31
1	F01	43ABC	2020-07-03	142
2	F02	94ABC	2023-12-01	49
3	F03	23ABC	2024-05-01	213
4	F04	70ABC	2023-12-20	141
...	...	...	...	...
196	F0196	52ABC	2023-06-29	142
197	F0197	85ABC	2023-08-04	294
198	F0198	82ABC	2020-11-11	123
199	F0199	57ABC	2023-05-27	48
200	F0200	64ABC	2023-10-02	167

	category_product	pdt_id
0	EG	EG 0
1	EG	EG 1
2	EG	EG 2
3	EG	EG 3
4	EG	EG 4
...	...	...
1000	TG	TG 196
1001	TG	TG 197
1002	TG	TG 198
1003	TG	TG 199
1004	TG	TG 200

	invoice_id	qty	product_id	city
0	F087	1	HB 596	paris
1	F0104	5	EG 489	paris
2	F052	2	HK 441	marseille
3	F041	7	HK 230	paris
4	F0130	5	CA 235	toulouse
...	...	...	...	...
598	F015	2	CA 269	marseille
599	F073	11	HK 528	paris
600	F0167	9	EG 157	paris
601	F00	2	TG 358	lyon
602	F0194	6	TG 435	nice

	id_category	the category
0	EG	Electronics & Gadgets
1	HK	Home & Kitchen
2	HB	Health & Beauty
3	CA	Clothing & Accessories
4	TG	Toys & Games