def generate_advanced_dataset():
np.random.seed(42)
start_date = datetime(2022, 1, 1)
dates = [start_date + timedelta(days=x) for x in range(730)]
classes = ['Electronics', 'Clothing', 'Home & Garden', 'Sports', 'Books']
merchandise = {
'Electronics': ['Laptop', 'Smartphone', 'Headphones', 'Tablet', 'Smartwatch'],
'Clothes': ['T-Shirt', 'Jeans', 'Dress', 'Jacket', 'Sneakers'],
'Dwelling & Backyard': ['Furniture', 'Lamp', 'Rug', 'Plant', 'Cookware'],
'Sports activities': ['Yoga Mat', 'Dumbbell', 'Running Shoes', 'Bicycle', 'Tennis Racket'],
'Books': ['Fiction', 'Non-Fiction', 'Biography', 'Science', 'History']
}
n_transactions = 5000
information = []
for _ in vary(n_transactions):
date = np.random.alternative(dates)
class = np.random.alternative(classes)
product = np.random.alternative(productsAI Shorts)
base_prices = {
'Electronics': (200, 1500),
'Clothes': (20, 150),
'Dwelling & Backyard': (30, 500),
'Sports activities': (25, 300),
'Books': (10, 50)
}
value = np.random.uniform(*base_pricesAI Shorts)
amount = np.random.alternative([1, 1, 1, 2, 2, 3], p=[0.5, 0.2, 0.15, 0.1, 0.03, 0.02])
customer_segment = np.random.alternative(['Premium', 'Standard', 'Budget'], p=[0.2, 0.5, 0.3])
age_group = np.random.alternative(['18-25', '26-35', '36-45', '46-55', '56+'])
area = np.random.alternative(['North', 'South', 'East', 'West', 'Central'])
month = date.month
seasonal_factor = 1.0
if month in [11, 12]:
seasonal_factor = 1.5
elif month in [6, 7]:
seasonal_factor = 1.2
income = value * amount * seasonal_factor
low cost = np.random.alternative([0, 5, 10, 15, 20, 25], p=[0.4, 0.2, 0.15, 0.15, 0.07, 0.03])
marketing_channel = np.random.alternative(['Organic', 'Social Media', 'Email', 'Paid Ads'])
base_satisfaction = 4.0
if customer_segment == 'Premium':
base_satisfaction += 0.5
if low cost > 15:
base_satisfaction += 0.3
satisfaction = np.clip(base_satisfaction + np.random.regular(0, 0.5), 1, 5)
information.append({
'Date': date, 'Class': class, 'Product': product, 'Value': spherical(value, 2),
'Amount': amount, 'Income': spherical(income, 2), 'Customer_Segment': customer_segment,
'Age_Group': age_group, 'Area': area, 'Discount_%': low cost,
'Marketing_Channel': marketing_channel, 'Customer_Satisfaction': spherical(satisfaction, 2),
'Month': date.strftime('%B'), '12 months': date.12 months, 'Quarter': f'Q{(date.month-1)//3 + 1}'
})
df = pd.DataFrame(information)
df['Profit_Margin'] = spherical(df['Revenue'] * (1 - df['Discount_%']/100) * 0.3, 2)
df['Days_Since_Start'] = (df['Date'] - df['Date'].min()).dt.days
return df
Elevate your perspective with NextTech Information, the place innovation meets perception.
Uncover the newest breakthroughs, get unique updates, and join with a worldwide community of future-focused thinkers.
Unlock tomorrow’s tendencies immediately: learn extra, subscribe to our publication, and turn out to be a part of the NextTech neighborhood at NextTech-news.com

