Skip to main content

Pandas Complete Guide for NetSuite Reports

What is Pandas?

Pandas is a Python library for data manipulation. Think of it as "Excel in Python".

  • DataFrame = Like an Excel sheet (rows and columns)
  • Series = Like a single column

1. Creating DataFrames

From a List of Dictionaries (Most Common for NetSuite)

import pandas as pd

# This is how NetSuite data usually comes (JSON format)
data = [
{"name": "Customer A", "currency": "IDR", "amount": 1000000},
{"name": "Customer B", "currency": "USD", "amount": 5000},
{"name": "Customer C", "currency": "IDR", "amount": 2000000}
]

# Convert to DataFrame
df = pd.DataFrame(data)

# View the data
print(df)

Output:

         name currency   amount
0 Customer A IDR 1000000
1 Customer B USD 5000
2 Customer C IDR 2000000

From JSON File

import json
import pandas as pd

# Load JSON file
with open('data.json', 'r') as f:
data = json.load(f)

# Convert to DataFrame
df = pd.DataFrame(data)

2. Basic DataFrame Operations

# View first 5 rows
df.head()

# View last 5 rows
df.tail()

# View specific number of rows
df.head(10)

# Get shape (rows, columns)
print(df.shape) # Output: (3, 3)

# Get column names
print(df.columns) # Output: Index(['name', 'currency', 'amount'], dtype='object')

# Get data types
print(df.dtypes)

# Get summary statistics
print(df.describe())

3. Accessing Columns

# Single column (returns Series)
df['amount']

# Multiple columns (returns DataFrame)
df[['name', 'amount']]

# Using dot notation (only works for simple column names)
df.amount

4. Adding New Columns

# Simple calculation
df['amount_with_tax'] = df['amount'] * 1.11

# From multiple columns
df['total'] = df['amount'] + df['tax']

# Fixed value
df['status'] = 'Active'

5. Removing Columns

# Remove single column
df = df.drop(columns=['unwanted_column'])

# Remove multiple columns
df = df.drop(columns=['col1', 'col2', 'col3'])

# Check if column exists before removing
if 'rownumber' in df.columns:
df = df.drop(columns=['rownumber'])

6. Handling Missing Values (Null/None)

# Check for nulls
print(df.isnull().sum())

# Fill nulls with 0
df = df.fillna(0)

# Fill nulls with specific values per column
df = df.fillna({
'amount': 0,
'name': 'Unknown',
'currency': 'IDR'
})

# Drop rows with any null
df = df.dropna()

# Drop rows where specific column is null
df = df.dropna(subset=['amount'])

7. Filtering Data

Basic Filtering

# Filter by single condition
idr_only = df[df['currency'] == 'IDR']

# Filter by number comparison
large_amounts = df[df['amount'] > 1000000]

# Not equal
non_idr = df[df['currency'] != 'IDR']

Multiple Conditions

# AND condition (use &)
result = df[(df['currency'] == 'IDR') & (df['amount'] > 1000000)]

# OR condition (use |)
result = df[(df['currency'] == 'IDR') | (df['currency'] == 'USD')]

# Complex conditions
result = df[
(df['currency'] == 'IDR') &
(df['amount'] > 1000000) &
(df['status'] == 'Active')
]
Important

Always use parentheses () around each condition!

Filter by List of Values

# Using isin()
currencies = ['IDR', 'USD', 'SGD']
result = df[df['currency'].isin(currencies)]

# Not in list
result = df[~df['currency'].isin(['JPY', 'EUR'])]

Filter by String Contains

# Contains text (case sensitive)
result = df[df['name'].str.contains('Customer')]

# Contains text (case insensitive)
result = df[df['name'].str.contains('customer', case=False)]

# Starts with
result = df[df['name'].str.startswith('PT.')]

# Ends with
result = df[df['name'].str.endswith('Ltd')]

8. Aggregation & Grouping

Basic Aggregations

# Sum of a column
total = df['amount'].sum()

# Average
average = df['amount'].mean()

# Count
count = df['amount'].count()

# Min and Max
minimum = df['amount'].min()
maximum = df['amount'].max()

# Multiple aggregations at once
print(f"Sum: {df['amount'].sum()}")
print(f"Avg: {df['amount'].mean()}")
print(f"Min: {df['amount'].min()}")
print(f"Max: {df['amount'].max()}")

Group By (Like Excel Pivot Table)

# Group by currency and sum amounts
grouped = df.groupby('currency')['amount'].sum()
print(grouped)

Output:

currency
IDR 3000000
USD 5000
Name: amount, dtype: int64

Group By with Multiple Aggregations

# Multiple aggregations
grouped = df.groupby('currency')['amount'].agg(['sum', 'mean', 'count'])
print(grouped)

Output:

              sum       mean  count
currency
IDR 3000000 1500000.0 2
USD 5000 5000.0 1

Group By Multiple Columns

# Group by multiple columns
grouped = df.groupby(['currency', 'status'])['amount'].sum()

# Reset index to make it a regular DataFrame
grouped = df.groupby(['currency', 'status'])['amount'].sum().reset_index()

Custom Aggregations

# Different aggregation for different columns
result = df.groupby('currency').agg({
'amount': 'sum', # Sum the amount
'name': 'count', # Count names
'date': 'max' # Get latest date
}).reset_index()

# Rename columns after aggregation
result.columns = ['currency', 'total_amount', 'customer_count', 'latest_date']

9. Creating Subtotals (Common Pattern)

# Original data grouped by currency
currencies = df['currency'].unique()

final_df = pd.DataFrame()
for curr in currencies:
# Get data for this currency
curr_data = df[df['currency'] == curr].copy()

# Create subtotal row
subtotal = pd.DataFrame({
'name': [f'Subtotal {curr}'],
'currency': [curr],
'amount': [curr_data['amount'].sum()]
})

# Combine data + subtotal
final_df = pd.concat([final_df, curr_data, subtotal])

print(final_df)

10. Conditional Logic (If-Else)

Using np.where() - Simple If-Else

import numpy as np

# Syntax: np.where(condition, value_if_true, value_if_false)

# Example: Classify amounts
df['size'] = np.where(df['amount'] > 1000000, 'Large', 'Small')

Result:

         name   amount   size
0 Customer A 1000000 Small
1 Customer B 5000000 Large
2 Customer C 500000 Small

Multiple Conditions with np.select()

import numpy as np

# Define conditions
conditions = [
df['amount'] > 5000000, # Condition 1
df['amount'] > 1000000, # Condition 2
df['amount'] > 500000 # Condition 3
]

# Define corresponding values
choices = ['Very Large', 'Large', 'Medium']

# Default value if no condition is met
df['size'] = np.select(conditions, choices, default='Small')

Using apply() for Complex Logic

# Define a function
def classify_customer(row):
if row['amount'] > 5000000:
return 'VIP'
elif row['amount'] > 1000000:
return 'Premium'
elif row['currency'] == 'USD':
return 'International'
else:
return 'Regular'

# Apply function to each row
df['category'] = df.apply(classify_customer, axis=1)

Conditional Calculations

# Calculate different rates based on currency
df['converted_amount'] = np.where(
df['currency'] == 'USD',
df['amount'] * 15500, # USD to IDR
df['amount'] # Keep as is
)

# Multiple currency conversion
conditions = [
df['currency'] == 'USD',
df['currency'] == 'SGD',
df['currency'] == 'EUR'
]
rates = [15500, 11500, 17000]

df['idr_amount'] = np.select(
conditions,
[df['amount'] * rate for rate in rates],
default=df['amount'] # Already IDR
)

11. Sorting Data

# Sort by single column (ascending)
df = df.sort_values('amount')

# Sort descending
df = df.sort_values('amount', ascending=False)

# Sort by multiple columns
df = df.sort_values(['currency', 'amount'], ascending=[True, False])

12. Pandas Cheat Sheet

OperationCode
Create DataFramedf = pd.DataFrame(data)
View datadf.head(), df.tail()
Filterdf[df['col'] > 100]
Multiple filterdf[(cond1) & (cond2)]
Group bydf.groupby('col')['val'].sum()
Sum columndf['col'].sum()
Add columndf['new'] = df['a'] + df['b']
If-elsenp.where(cond, true_val, false_val)
Drop columndf.drop(columns=['col'])
Fill nullsdf.fillna(0)
Sortdf.sort_values('col')