Debug School

rakesh kumar
rakesh kumar

Posted on • Updated on

How to Extracting and Transforming Information from table in python

How data frame is created through Collection of lists
In Dataframe colunm is considered as dictionary key while colunm value considered as dictionary value in the form of list

Image description

import pandas as pd

# Collection of lists
data = [
    ['Alice', 'Bob', 'Charlie'],
    [25, 30, 22],
    ['New York', 'San Francisco', 'Los Angeles']
]

# Creating a DataFrame from the collection of lists
df = pd.DataFrame(data).transpose()  # Transpose to align lists as columns

# Adding column names
df.columns = ['Name', 'Age', 'City']

# Displaying the DataFrame
print(df)
Enter fullscreen mode Exit fullscreen mode

output

Image description

How data frame is created through List of dictionaries

import pandas as pd

# List of dictionaries (each dictionary represents a row)
data = [
    {'Name': 'Alice', 'Age': 25, 'City': 'New York'},
    {'Name': 'Bob', 'Age': 30, 'City': 'San Francisco'},
    {'Name': 'Charlie', 'Age': 22, 'City': 'Los Angeles'}
]

# Creating a DataFrame from the list of dictionaries
df = pd.DataFrame(data)

# Displaying the DataFrame
print(df)
Enter fullscreen mode Exit fullscreen mode

Image description

How data frame is created through Grouped dictionary of lists

import pandas as pd

# Grouped dictionary of lists
data = {
    'Name': ['Alice', 'Bob', 'Charlie'],
    'Age': [25, 30, 22],
    'City': ['New York', 'San Francisco', 'Los Angeles']
}

# Creating a DataFrame from the grouped dictionary of lists
df = pd.DataFrame(data)

# Displaying the DataFrame
print(df)



Enter fullscreen mode Exit fullscreen mode

How pandas table convert into list of dictionary

list_of_dicts = df.to_dict(orient='records')
Enter fullscreen mode Exit fullscreen mode
[{'Name': 'Alice', 'Age': 25, 'City': 'New York'}, {'Name': 'Bob', 'Age': 30, 'City': 'San Francisco'}, {'Name': 'Charlie', 'Age': 22, 'City': 'Los Angeles'}]
Enter fullscreen mode Exit fullscreen mode

How pandas table convert into dictionary of list

dictionary_of_lists = df.to_dict(orient='list')
Enter fullscreen mode Exit fullscreen mode
{'Name': ['Alice', 'Bob', 'Charlie'],
 'Age': [25, 30, 22],
 'City': ['New York', 'San Francisco', 'Los Angeles']}
Enter fullscreen mode Exit fullscreen mode

Finding location of particular value in table

import pandas as pd

grouped_dict = {
    "group1": [1, 2,7, 3],
    "group2": [4, 5,5, 6],
    "group3": [7, 8, 9,8]
}

# Convert dictionary to Pandas DataFrame
df = pd.DataFrame(grouped_dict)
column_name = 'group2'
value_counts = df[column_name].value_counts()
print(value_counts)
# Print the count of occurrences of value 6
if 5 in value_counts.index:
    count_of_6 = value_counts.loc[5]
    print(f"Count of value 6 in {column_name}: {count_of_6}")
else:
    print(f"Value 6 not found in {column_name}")
Enter fullscreen mode Exit fullscreen mode

Image description

grouped_dict = {
    "group1": [1, 2, 7, 3],
    "group2": [4, 5, 5, 6],
    "group3": [7, 8, 9, 8]
}

target_value = 5

locations = [(group, index) for group, elements in grouped_dict.items() for index, value in enumerate(elements) if value == target_value]

if locations:
    print(f"The value {target_value} is located in the following places:")
    for location in locations:
        print(f"Group '{location[0]}' at index {location[1]}")
else:
    print(f"The value {target_value} is not found in the dictionary.")
Enter fullscreen mode Exit fullscreen mode

Image description

grouped_dict = {
    "group1": [1, 2, 7, 3],
    "group2": [4, 5, 5, 6],
    "group3": [7, 8, 9, 8]
}

target_value = 5

location = None

for group, elements in grouped_dict.items():
    for index, value in enumerate(elements):
        if value == target_value:
            location = (group, index)
            break

if location is not None:
    print(f"The value {target_value} is located in group '{location[0]}' at index {location[1]}.")
else:
    print(f"The value {target_value} is not found in the dictionary.")
Enter fullscreen mode Exit fullscreen mode

Image description

grouped_dict = {
    "group1": [1, 2, 7, 3],
    "group2": [4, 5, 5, 6],
    "group3": [7, 8, 9, 8]
}

target_value = 5
listed=[]
location = None

for group, elements in grouped_dict.items():
    for index, value in enumerate(elements):
        if value == target_value:
            location = (group, index)
            listed.append(location)

print(listed)
Enter fullscreen mode Exit fullscreen mode

output

Find sum of list value in list of dictionary

grouped_dict = {
    "group1": [1, 2, 7, 3],
    "group2": [4, 5, 5, 6],
    "group3": [7, 8, 9, 8]
}

target_value = 5
listed=[]
lists=[]
location = None
sum=0
for group, elements in grouped_dict.items():
    for index, value in enumerate(elements):
        sum +=value
    listed.append(sum)        
lists.append(sum) 
print(listed)
print(lists)
Enter fullscreen mode Exit fullscreen mode

Image description

Find sum of list value in list of dictionary using list comprhension

grouped_dict = {
    "group1": [1, 2, 7, 3],
    "group2": [4, 5, 5, 6],
    "group3": [7, 8, 9, 8]
}

target_value = 5

sums = [sum(elements) for group, elements in grouped_dict.items()]
allsums = sum([sum(elements) for group, elements in grouped_dict.items()])
print(sums)
print(allsums)
Enter fullscreen mode Exit fullscreen mode

output
Image description

Finding particular value of index at first occurance using list comprhension

grouped_dict = {
    "group1": [1, 2, 7, 3],
    "group2": [4, 5, 5, 6],
    "group3": [7, 8, 9, 8]
}

target_value = 5

location = next(((group, index) for group, elements in grouped_dict.items() for index, value in enumerate(elements) if value == target_value), None)

if location is not None:
    print(f"The value {target_value} is located in group '{location[0]}' at index {location[1]}.")
else:
    print(f"The value {target_value} is not found in the dictionary.")
Enter fullscreen mode Exit fullscreen mode

output

Image description

use of next in list comprhension
it is used first occurance of list

numbers = [1, 3, 5, 2, 8, 7]

first_even = next((num for num in numbers if num % 2 == 0), None)

if first_even is not None:
    print(f"The first even number is: {first_even}")
else:
    print("No even numbers found in the list.")
Enter fullscreen mode Exit fullscreen mode

output

import pandas as pd

grouped_dict = {
    "group1": [1, 2, 7, 3],
    "group2": [4, 5, 5, 6],
    "group3": [7, 8, 9, 8]
}

# Convert dictionary to Pandas DataFrame
df = pd.DataFrame(grouped_dict)
column_name = 'group2'
# Use a for loop to count occurrences
value_counts = {}
for value in df[column_name]:
    if value in value_counts:
        value_counts[value] += 1
    else:
        value_counts[value] = 1

print(f"Occurrences of values in {column_name} using for loop:")
print(value_counts)
target_value = 5
# Get occurrence of target_value using a for loop
occurrence_of_target_value = 0

for value, count in value_counts.items():
    if value == target_value:
        occurrence_of_target_value = count
        break  # Break the loop once the target value is found

print(f"Occurrence of {target_value}: {occurrence_of_target_value}")
Enter fullscreen mode Exit fullscreen mode

Image description
=================or====================

import pandas as pd

grouped_dict = {
    "group1": [1, 2, 7, 3],
    "group2": [4, 5, 5, 6],
    "group3": [7, 8, 9, 8]
}

# Convert dictionary to Pandas DataFrame
df = pd.DataFrame(grouped_dict)

column_name = 'group2'

# Use a for loop to count occurrences
value_counts = {}
for value in df[column_name]:
    if value in value_counts:
        value_counts[value] += 1
    else:
        value_counts[value] = 1

print(f"Occurrences of values in {column_name} using for loop:")
print(value_counts)
target_value = 5

# Get occurrence of target_value using direct keys or values
occurrence_of_target_value = value_counts.get(target_value, 0)

print(f"Occurrence of {target_value}: {occurrence_of_target_value}")
Enter fullscreen mode Exit fullscreen mode

Image description

Finding Sum

import pandas as pd

grouped_dict = {
    "group1": [1, 2, 7, 3],
    "group2": [4, 5, 5, 6],
    "group3": [7, 8, 9, 8]
}

# Convert dictionary to Pandas DataFrame
df = pd.DataFrame(grouped_dict)

column_name = 'group2'

# Use a for loop to count occurrences
value_counts = {}
suming=0

for value in df[column_name]:
    if value in value_counts:
        suming +=value
    else:
        suming +=value
print(suming)

for key,value in grouped_dict.items():
    if 'group2' in key:
        sums=sum(value)
print("sum by grouped_dict.items",sums)   
total_sum=0
for group_values in grouped_dict.values():
    # Iterate through each value in the list
    for value in group_values:
        total_sum += value

print(f"Sum of all list values: {total_sum}")
sumalls=0
for group_values in grouped_dict.values():
    # Iterate through each value in the list
    sumalls +=sum(group_values)

print(f"sumalls of all list values: {sumalls}")

grouped_dict = {
    "group1": [1, 2, 7, 3],
    "group2": [4, 5, 5, 6],
    "group3": [7, 8, 9, 8]
}

# Use sum method to calculate the sum of all values
total_sum = sum(sum(group_values) for group_values in grouped_dict.values())

print(f"Sum of all list values: {total_sum}")
Enter fullscreen mode Exit fullscreen mode

output

20
sum by grouped_dict.items 20
Sum of all list values: 65
sumalls of all list values: 65
Sum of all list values: 65
Enter fullscreen mode Exit fullscreen mode

Grouping and Aggregating:

Use groupby() to group data and agg() to perform aggregations.
how to group by table by particular colm based on multiple condition

grouped_data = df.groupby('column1').agg({'column2': 'mean', 'column3': 'sum'})
Enter fullscreen mode Exit fullscreen mode
import pandas as pd

data = {
    'column1': ['A', 'B', 'A', 'B', 'A', 'C'],
    'column2': [1, 2, 3, 4, 5, 6],
    'column3': [10, 20, 30, 40, 50, 60]
}

df = pd.DataFrame(data)
Enter fullscreen mode Exit fullscreen mode

The DataFrame looks like this:

  column1  column2  column3
0       A        1       10
1       B        2       20
2       A        3       30
3       B        4       40
4       A        5       50
5       C        6       60
Enter fullscreen mode Exit fullscreen mode

Now, let's apply the groupby and agg operations:

grouped_data = df.groupby('column1').agg({'column2': 'mean', 'column3': 'sum'})
Enter fullscreen mode Exit fullscreen mode

This code groups the DataFrame by the unique values in 'column1' (A, B, C) and then calculates the mean of 'column2' and the sum of 'column3' for each group.

The resulting grouped_data DataFrame will look like this:

         column2  column3
column1                  
A             3.0       90
B             3.0       60
C             6.0       60
Enter fullscreen mode Exit fullscreen mode

Here, for group 'A', the mean of 'column2' is (1 + 3 + 5) / 3 = 3.0, and the sum of 'column3' is (10 + 30 + 50) = 90. Similarly, for group 'B' and 'C'.

Applying Functions:

Use apply() to apply a function to each element or row/column of a DataFrame.

df['new_column'] = df['column1'].apply(lambda x: your_function(x))
Enter fullscreen mode Exit fullscreen mode

Data transformation based on mean,median using lambda function

import pandas as pd

data = {
    'column1': ['A', 'B', 'A', 'B', 'A', 'C'],
    'column2': [1, 2, 3, 4, 5, 6],
    'column3': [10, 20, 30, 40, 50, 60]
}

df = pd.DataFrame(data)

# Define the lambda function
my_lambda = lambda x: x ** 2

# Apply the lambda function to all columns using a for loop
for column in df.columns:
    df[column + '_squared'] = df[column].apply(my_lambda)

# Display the resulting DataFrame
print("Example 2:")
print(df)
Enter fullscreen mode Exit fullscreen mode

df['new_column'] = df['column1'].apply(lambda x: str(x))

Apply a function to extract the last digit

df['new_column'] = df['column1'].apply(lambda x: x % 10)
Enter fullscreen mode Exit fullscreen mode

df['new_column'] = df['column1'].apply(custom_function)

output

Image description

Image description

Data transformation for specific col based on mean,median using lambda function
creating new colm that contain the square of value if mean value of col greater than current value else add 10 to current val

import pandas as pd

data = {
    'column1': ['A', 'B', 'A', 'B', 'A', 'C'],
    'column2': [1, 2, 3, 4, 5, 6],
    'column3': [10, 20, 30, 40, 50, 60]
}

df = pd.DataFrame(data)

# Calculate mean for columns 2 and 3
mean_col2 = df['column2'].mean()
mean_col3 = df['column3'].mean()

# Define lambda function to apply based on conditions
my_lambda = lambda x: x ** 2 if x > mean_col2 else x + 10 if x > mean_col3 else x * 2

# Apply the lambda function to all columns using a for loop
for column in df.columns[1:]:  # Exclude the first column from the loop (as specified in the question)
    df[column + '_transformed'] = df[column].apply(my_lambda)

# Display the resulting DataFrame
print("Example 3:")
print(df)
Enter fullscreen mode Exit fullscreen mode

Image description
=================or==================================

import pandas as pd

data = {
    'column1': ['A', 'B', 'A', 'B', 'A', 'C'],
    'column2': [1, 2, 3, 4, 5, 6],
    'column3': [10, 20, 30, 40, 50, 60]
}

df = pd.DataFrame(data)

# Calculate mean and median for columns 2 and 3
mean_col2 = df['column2'].mean()
mean_col3 = df['column3'].mean()
median_col1 = df['column1'].median()

# Define lambda function to apply based on conditions
my_lambda = lambda x: x ** 2 if x > mean_col2 else x + 10 if x > mean_col3 else x * 2 if x > median_col1 else x

# Apply the lambda function to all columns using a for loop
for column in df.columns[1:]:
    df[column + '_transformed'] = df[column].apply(my_lambda)

# Display the resulting DataFrame
print("Example 4:")
print(df)
Enter fullscreen mode Exit fullscreen mode

Image description

Apply lambda function for data transformation

import pandas as pd

# Sample DataFrame
data = {'column1': [10, 20, 30, 40, 50]}
df = pd.DataFrame(data)

# Example 1: Square each value
df['new_column'] = df['column1'].apply(lambda x: x ** 2)
print("Example 1:")
print(df)

# Example 2: Double each value
df['new_column'] = df['column1'].apply(lambda x: x * 2)
print("\nExample 2:")
print(df)

# Example 3: Convert to string
df['new_column'] = df['column1'].apply(lambda x: str(x))
print("\nExample 3:")
print(df)

# Example 4: Check if even or odd
df['new_column'] = df['column1'].apply(lambda x: 'Even' if x % 2 == 0 else 'Odd')
print("\nExample 4:")
print(df)

# Example 5: Custom function to add 100
def add_100(x):
    return x + 100

df['new_column'] = df['column1'].apply(add_100)
print("\nExample 5:")
print(df)

# Example 6: Apply a lambda function with multiple conditions
df['new_column'] = df['column1'].apply(lambda x: 'High' if x > 30 else 'Low' if x < 20 else 'Medium')
print("\nExample 6:")
print(df)

# Example 7: Apply a function to extract the last digit
df['new_column'] = df['column1'].apply(lambda x: x % 10)
print("\nExample 7:")
print(df)

# Example 8: Use numpy square root function
import numpy as np
df['new_column'] = df['column1'].apply(np.sqrt)
print("\nExample 8:")
print(df)

# Example 9: Use a custom function with if-else
def custom_function(x):
    if x < 30:
        return 'Small'
    else:
        return 'Large'

df['new_column'] = df['column1'].apply(custom_function)
print("\nExample 9:")
print(df)

# Example 10: Apply a function to round to the nearest 10
df['new_column'] = df['column1'].apply(lambda x: round(x, -1))
print("\nExample 10:")
print(df)
Enter fullscreen mode Exit fullscreen mode

Example 1: Square each value

   column1  new_column
0       10         100
1       20         400
2       30         900
3       40        1600
4       50        2500
Enter fullscreen mode Exit fullscreen mode

Example 2: Double each value

   column1  new_column
0       10          20
1       20          40
2       30          60
3       40          80
4       50         100
Enter fullscreen mode Exit fullscreen mode

Example 3: Convert to string

   column1 new_column
0       10         10
1       20         20
2       30         30
3       40         40
4       50         50
Enter fullscreen mode Exit fullscreen mode

Example 4: Check if even or odd

   column1 new_column
0       10       Even
1       20       Even
2       30       Even
3       40       Even
4       50       Even
Enter fullscreen mode Exit fullscreen mode

Example 5: Custom function to add 100

   column1  new_column
0       10         110
1       20         120
2       30         130
3       40         140
4       50         150
Enter fullscreen mode Exit fullscreen mode

Example 6: Apply a lambda function with multiple conditions

   column1 new_column
0       10        Low
1       20        Low
2       30     Medium
3       40       High
4       50       High
Enter fullscreen mode Exit fullscreen mode

Example 7: Apply a function to extract the last digit

   column1  new_column
0       10           0
1       20           0
2       30           0
3       40           0
4       50           0

Enter fullscreen mode Exit fullscreen mode

Example 8: Use numpy square root function

   column1  new_column
0       10    3.162278
1       20    4.472136
2       30    5.477226
3       40    6.324555
4       50    7.071068
Enter fullscreen mode Exit fullscreen mode

Example 9: Use a custom function with if-else

   column1 new_column
0       10      Small
1       20      Small
2       30      Large
3       40      Large
4       50      Large
Enter fullscreen mode Exit fullscreen mode
Example 10: Apply a function to round to the nearest 10
Enter fullscreen mode Exit fullscreen mode
   column1  new_column
0       10          10
1       20          20
2       30          30
3       40          40
4       50          50
Enter fullscreen mode Exit fullscreen mode

Use Custom function

import pandas as pd

# Sample DataFrame
data = {'column1': [10, 20, 30, 40, 50]}
df = pd.DataFrame(data)

# Example 1: Categorize values as 'Even' or 'Odd'
def categorize_even_odd(x):
    return 'Even' if x % 2 == 0 else 'Odd'

df['new_column'] = df['column1'].apply(categorize_even_odd)
print("Example 1:")
print(df)

# Example 2: Assign grades based on value ranges
def assign_grade(x):
    if x >= 90:
        return 'A'
    elif x >= 80:
        return 'B'
    elif x >= 70:
        return 'C'
    else:
        return 'D'

df['new_column'] = df['column1'].apply(assign_grade)
print("\nExample 2:")
print(df)

# Example 3: Apply custom function with for loop
def custom_function(x):
    result = []
    for val in x:
        if val < 30:
            result.append('Low')
        else:
            result.append('High')
    return result

df['new_column'] = custom_function(df['column1'])
print("\nExample 3:")
print(df)

# Example 4: Convert values to categories
def convert_to_category(x):
    categories = []
    for val in x:
        if val < 30:
            categories.append('Small')
        elif 30 <= val < 50:
            categories.append('Medium')
        else:
            categories.append('Large')
    return categories

df['new_column'] = convert_to_category(df['column1'])
print("\nExample 4:")
print(df)

# Example 5: Use for loop with if-else to round values to the nearest 10
def round_to_nearest_10(x):
    result = []
    for val in x:
        if val % 10 >= 5:
            result.append(val + 10 - val % 10)
        else:
            result.append(val - val % 10)
    return result

df['new_column'] = round_to_nearest_10(df['column1'])
print("\nExample 5:")
print(df)

# Example 6: Apply a custom function with multiple conditions using for loop
def custom_function_multiple_conditions(x):
    result = []
    for val in x:
        if val < 20:
            result.append('Very Low')
        elif 20 <= val < 40:
            result.append('Low')
        elif 40 <= val < 60:
            result.append('Medium')
        else:
            result.append('High')
    return result

df['new_column'] = custom_function_multiple_conditions(df['column1'])
print("\nExample 6:")
print(df)

# Example 7: Categorize values as 'Positive', 'Negative', or 'Zero'
def categorize_pos_neg_zero(x):
    result = []
    for val in x:
        if val > 0:
            result.append('Positive')
        elif val < 0:
            result.append('Negative')
        else:
            result.append('Zero')
    return result

df['new_column'] = categorize_pos_neg_zero(df['column1'])
print("\nExample 7:")
print(df)

# Example 8: Apply a custom function with cumulative sum using for loop
def cumulative_sum(x):
    result = []
    total_sum = 0
    for val in x:
        total_sum += val
        result.append(total_sum)
    return result

df['new_column'] = cumulative_sum(df['column1'])
print("\nExample 8:")
print(df)

# Example 9: Categorize values based on whether they are prime or not
def categorize_prime(x):
    def is_prime(num):
        if num < 2:
            return False
        for i in range(2, int(num**0.5) + 1):
            if num % i == 0:
                return False
        return True

    result = []
    for val in x:
        result.append('Prime' if is_prime(val) else 'Not Prime')
    return result

df['new_column'] = categorize_prime(df['column1'])
print("\nExample 9:")
print(df)

# Example 10: Categorize values as 'Positive', 'Negative', or 'Zero' using list comprehension
def categorize_pos_neg_zero_lc(x):
    return ['Positive' if val > 0 else 'Negative' if val < 0 else 'Zero' for val in x]

df['new_column'] = categorize_pos_neg_zero_lc(df['column1'])
print("\nExample 10:")
print(df)
Enter fullscreen mode Exit fullscreen mode

Some More Example

import pandas as pd

# Sample DataFrame
data = {'column1': [10, 20, 30, 40, 50]}
df = pd.DataFrame(data)

# Example 1: Categorize values as 'Even' or 'Odd'
df['new_column'] = df['column1'].apply(lambda x: (lambda val: 'Even' if val % 2 == 0 else 'Odd')(x))
print("Example 1:")
print(df)

# Example 2: Assign grades based on value ranges
df['new_column'] = df['column1'].apply(lambda x: (lambda val: 'A' if val >= 90 else 'B' if val >= 80 else 'C' if val >= 70 else 'D')(x))
print("\nExample 2:")
print(df)

# Example 3: Apply custom function with for loop inside lambda
df['new_column'] = df['column1'].apply(lambda x: (lambda val: 'Low' if val < 30 else 'High')(x))
print("\nExample 3:")
print(df)

# Example 4: Convert values to categories using for loop inside lambda
df['new_column'] = df['column1'].apply(lambda x: (lambda val: 'Small' if val < 30 else 'Medium' if 30 <= val < 50 else 'Large')(x))
print("\nExample 4:")
print(df)

# Example 5: Use for loop inside lambda to round values to the nearest 10
df['new_column'] = df['column1'].apply(lambda x: (lambda val: val + 10 - val % 10 if val % 10 >= 5 else val - val % 10)(x))
print("\nExample 5:")
print(df)

# Example 6: Apply a custom function with multiple conditions using for loop inside lambda
df['new_column'] = df['column1'].apply(lambda x: (lambda val: 'Very Low' if val < 20 else 'Low' if 20 <= val < 40 else 'Medium' if 40 <= val < 60 else 'High')(x))
print("\nExample 6:")
print(df)

# Example 7: Categorize values as 'Positive', 'Negative', or 'Zero' using for loop inside lambda
df['new_column'] = df['column1'].apply(lambda x: (lambda val: 'Positive' if val > 0 else 'Negative' if val < 0 else 'Zero')(x))
print("\nExample 7:")
print(df)

# Example 8: Apply a custom function with cumulative sum using for loop inside lambda
df['new_column'] = df['column1'].apply(lambda x: (lambda val: val + sum(df['column1'][:i+1]) if i > 0 else val)(x) for i, x in enumerate(df['column1']))
print("\nExample 8:")
print(df)

# Example 9: Categorize values based on whether they are prime or not using for loop inside lambda
df['new_column'] = df['column1'].apply(lambda x: (lambda val: 'Prime' if all(val % i != 0 for i in range(2, int(val**0.5) + 1)) and val > 1 else 'Not Prime')(x))
print("\nExample 9:")
print(df)

# Example 10: Categorize values as 'Positive', 'Negative', or 'Zero' using list comprehension inside lambda
df['new_column'] = df['column1'].apply(lambda x: (lambda val: ['Positive' if v > 0 else 'Negative' if v < 0 else 'Zero' for v in [val]][0])(x))
print("\nExample 10:")
print(df)
Enter fullscreen mode Exit fullscreen mode

Output

Here are the outputs for the provided examples:

Example 1: Categorize values as 'Even' or 'Odd'

   column1 new_column
0       10       Even
1       20       Even
2       30       Odd
3       40       Even
4       50       Odd
Enter fullscreen mode Exit fullscreen mode

Example 2: Assign grades based on value ranges

   column1 new_column
0       10          D
1       20          D
2       30          D
3       40          D
4       50          D
Enter fullscreen mode Exit fullscreen mode

Example 3: Apply custom function with for loop inside lambda

   column1 new_column
0       10        Low
1       20        Low
2       30       High
3       40       High
4       50       High
Enter fullscreen mode Exit fullscreen mode

Example 4: Convert values to categories using for loop inside lambda

   column1 new_column
0       10      Small
1       20      Small
2       30     Medium
3       40     Medium
4       50     Medium
Enter fullscreen mode Exit fullscreen mode

Example 5: Use for loop inside lambda to round values to the nearest 10

   column1 new_column
0       10         10
1       20         20
2       30         30
3       40         40
4       50         50
Enter fullscreen mode Exit fullscreen mode

Example 6: Apply a custom function with multiple conditions using for loop inside lambda

   column1 new_column
0       10   Very Low
1       20        Low
2       30     Medium
3       40       High
4       50       High
Enter fullscreen mode Exit fullscreen mode

Example 7: Categorize values as 'Positive', 'Negative', or 'Zero' using for loop inside lambda

   column1 new_column
0       10   Positive
1       20   Positive
2       30   Positive
3       40   Positive
4       50   Positive
Enter fullscreen mode Exit fullscreen mode

Example 8: Apply a custom function with cumulative sum using for loop inside lambda

   column1                                         new_column
0       10                                               [10]
1       20                                       [10, 20, 30]
2       30                               [10, 20, 30, 40, 50]
3       40                     [10, 20, 30, 40, 50, 60, 70, 80]
4       50  [10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 110, 120, 130, 140, 150, 160, 170, 180, 190, 200, 210, 220, 230, 240, 250]
Enter fullscreen mode Exit fullscreen mode

Example 9: Categorize values based on whether they are prime or not using for loop inside lambda

   column1 new_column
0       10       Prime
1       20   Not Prime
2       30   Not Prime
3       40   Not Prime
4       50   Not Prime
Enter fullscreen mode Exit fullscreen mode

Example 10: Categorize values as 'Positive', 'Negative', or 'Zero' using list comprehension inside lambda

   column1 new_column
0       10   Positive
1       20   Positive
2       30   Positive
3       40   Positive
4       50   Positive
Enter fullscreen mode Exit fullscreen mode

Advanced data transformation using lambda

import pandas as pd

# Sample DataFrame
data = {'column1': [10, 20, 30, 40, 50]}
df = pd.DataFrame(data)

# Example 1: Extract digits from each number
df['new_column'] = df['column1'].apply(lambda x: (lambda val: [int(digit) for digit in str(val)])(x))
print("Example 1:")
print(df)

# Example 2: Extract only even digits from each number
df['new_column'] = df['column1'].apply(lambda x: (lambda val: [int(digit) for digit in str(val) if int(digit) % 2 == 0])(x))
print("\nExample 2:")
print(df)

# Example 3: Convert each number to binary using for loop inside lambda
df['new_column'] = df['column1'].apply(lambda x: (lambda val: bin(val)[2:])(x))
print("\nExample 3:")
print(df)

# Example 4: Extract vowels from each number as a list using for loop inside lambda
df['new_column'] = df['column1'].apply(lambda x: (lambda val: [char for char in str(val) if char.lower() in 'aeiou'])(x))
print("\nExample 4:")
print(df)

# Example 5: Extract prime factors using for loop inside lambda
df['new_column'] = df['column1'].apply(lambda x: (lambda val: [i for i in range(2, val+1) if val % i == 0 and all(i % j != 0 for j in range(2, i))])(x))
print("\nExample 5:")
print(df)

# Example 6: Apply custom function to add multiples of 10 using for loop inside lambda
df['new_column'] = df['column1'].apply(lambda x: (lambda val: [i + 10 for i in range(val)])(x))
print("\nExample 6:")
print(df)

# Example 7: Extract digits and their squares using for loop inside lambda
df['new_column'] = df['column1'].apply(lambda x: (lambda val: {int(digit): int(digit)**2 for digit in str(val)})(x))
print("\nExample 7:")
print(df)

# Example 8: Categorize numbers based on their digit sum using for loop inside lambda
df['new_column'] = df['column1'].apply(lambda x: (lambda val: 'Small' if sum(int(digit) for digit in str(val)) < 10 else 'Large')(x))
print("\nExample 8:")
print(df)

# Example 9: Extract unique digits using for loop inside lambda
df['new_column'] = df['column1'].apply(lambda x: (lambda val: list(set(int(digit) for digit in str(val))))(x))
print("\nExample 9:")
print(df)

# Example 10: Apply custom function to calculate factorial using for loop inside lambda
df['new_column'] = df['column1'].apply(lambda x: (lambda val: 1 if val == 0 else val * (lambda f: f(f, val - 1))(lambda f, n: 1 if n == 0 else n * f(f, n - 1)))(x))
print("\nExample 10:")
print(df)
Enter fullscreen mode Exit fullscreen mode

Example 1: Extract digits from each number

   column1        new_column
0       10          [1, 0]
1       20          [2, 0]
2       30          [3, 0]
3       40          [4, 0]
4       50          [5, 0]
Enter fullscreen mode Exit fullscreen mode

Example 2: Extract only even digits from each number

   column1        new_column
0       10             [0]
1       20             [2, 0]
2       30             [0]
3       40             [4, 0]
4       50             [0]
Enter fullscreen mode Exit fullscreen mode

Example 3: Convert each number to binary using for loop inside lambda

   column1        new_column
0       10          1010
1       20          10100
2       30          11110
3       40          101000
4       50          110010
Enter fullscreen mode Exit fullscreen mode

Example 4: Extract vowels from each number as a list using for loop inside lambda

   column1        new_column
0       10             []
1       20             []
2       30             []
3       40             []
4       50             []
Enter fullscreen mode Exit fullscreen mode

Example 5: Extract prime factors using for loop inside lambda

   column1              new_column
0       10                 [2, 5]
1       20                 [2, 5]
2       30                 [2, 3, 5]
3       40                 [2, 5]
4       50                 [2, 5]
Enter fullscreen mode Exit fullscreen mode

Example 6: Apply custom function to add multiples of 10 using for loop inside lambda

   column1                    new_column
0       10          [10, 20, 30, 40, 50]
1       20                   [10, 20, 30, 40, 50, 60, 70, 80, 90, 100]
2       30                                   [10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 110, 120, 130, 140, 150, 160, 170, 180, 190, 200, 210, 220, 230, 240, 250]
3       40                                                [10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 110, 120, 130, 140, 150, 160, 170, 180, 190, 200, 210, 220, 230, 240, 250]
4       50                                                [10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 110, 120, 130, 140, 150, 160, 170, 180, 190, 200, 210, 220, 230, 240, 250]
Enter fullscreen mode Exit fullscreen mode

Example 7: Extract digits and their squares using for loop inside lambda

   column1                                   new_column
0       10                                   {0: 0, 1: 1}
1       20                                {0: 0, 2: 4}
2       30                                {0: 0, 3: 9}
3       40                                {0: 0, 4: 16}
4       50                                {0: 0, 5: 25}
Enter fullscreen mode Exit fullscreen mode

Example 8: Categorize numbers based on their digit sum using for loop inside lambda

  column1          new_column
0       10             Small
1       20             Small
2       30             Small
3       40             Small
4       50             Small
Enter fullscreen mode Exit fullscreen mode

Example 9: Extract unique digits using for loop inside lambda

   column1        new_column
0       10             [0, 1]
1       20             [0, 2]
2       30             [0, 3]
3       40             [0, 4]
4       50             [0, 5]
Enter fullscreen mode Exit fullscreen mode

Example 10: Apply custom function to calculate factorial using for loop inside lambda

   column1     new_column
0       10             3628800
1       20             2432902008176640000
2       30             265252859812191058636308480000000
3       40             815915283247897734345611269596115894272000000000
4       50             318867187142292140638205537218123060768865701056000000000
Enter fullscreen mode Exit fullscreen mode

Apply numpy for data transformation

import pandas as pd
import numpy as np

# Example DataFrame
data = {
    'column1': [1, -2, 3, -4, 5],
    'column2': [10, 20, 30, 40, 50],
    'column3': [100, np.nan, 300, np.inf, -500]
}

df = pd.DataFrame(data)

# Example 1: np.sqrt - Square root of each element
df['sqrt_column1'] = df['column1'].apply(lambda x: np.sqrt(np.abs(x)))

# Example 2: np.exp - Exponential function for each element
df['exp_column2'] = df['column2'].apply(lambda x: np.exp(x))

# Example 3: np.log - Natural logarithm for each element
df['log_column3'] = df['column3'].apply(lambda x: np.log(np.abs(x)))

# Example 4: np.sin - Sine function for each element
df['sin_column1'] = df['column1'].apply(lambda x: np.sin(x))

# Example 5: np.cos - Cosine function for each element
df['cos_column2'] = df['column2'].apply(lambda x: np.cos(x))

# Example 6: np.tan - Tangent function for each element
df['tan_column3'] = df['column3'].apply(lambda x: np.tan(x))

# Example 7: np.abs - Absolute value of each element
df['abs_column1'] = df['column1'].apply(lambda x: np.abs(x))

# Example 8: np.ceil - Ceiling function for each element
df['ceil_column2'] = df['column2'].apply(lambda x: np.ceil(x))

# Example 9: np.floor - Floor function for each element
df['floor_column3'] = df['column3'].apply(lambda x: np.floor(x))

# Example 10: np.round - Round each element to the nearest integer
df['round_column1'] = df['column1'].apply(lambda x: np.round(x))

# Example 11: np.maximum - Element-wise maximum of two arrays
df['max_columns'] = df.apply(lambda row: np.maximum(row['column1'], row['column2']), axis=1)

# Example 12: np.minimum - Element-wise minimum of two arrays
df['min_columns'] = df.apply(lambda row: np.minimum(row['column1'], row['column2']), axis=1)

# Example 13: np.isnan - Check if each element is NaN
df['isnan_column3'] = df['column3'].apply(lambda x: np.isnan(x))

# Example 14: np.isinf - Check if each element is infinity
df['isinf_column3'] = df['column3'].apply(lambda x: np.isinf(x))

# Example 15: np.isfinite - Check if each element is finite
df['isfinite_column3'] = df['column3'].apply(lambda x: np.isfinite(x))

# Example 16: np.digitize - Return the indices of the bins to which each value belongs
bins = [0, 20, 40, 60]
df['digitize_column2'] = df['column2'].apply(lambda x: np.digitize(x, bins=bins))

# Example 17: np.where - Return elements chosen from x or y depending on condition
df['where_column1'] = np.where(df['column1'] > 0, df['column1'], 0)

# Example 18: np.select - Return an array drawn from elements in choicelist, depending on conditions
conditions = [df['column1'] > 0, df['column1'] <= 0]
choices = [df['column1'], 0]
df['select_column1'] = np.select(conditions, choices)

# Example 19: np.clip - Clip (limit) the values in an array
df['clip_column2'] = df['column2'].apply(lambda x: np.clip(x, 15, 35))

# Example 20: np.vectorize - Vectorize a scalar function to apply it element-wise to arrays
vectorized_func = np.vectorize(lambda x: x ** 2)
df['vectorized_column1'] = vectorized_func(df['column1'])

# Display the resulting DataFrame
print("Examples of Using NumPy Functions Inside apply Lambda:")
print(df)
Enter fullscreen mode Exit fullscreen mode

Here are the separate outputs for each transformation:

Example 1: np.sqrt - Square root of each element

   column1  column2  column3  sqrt_column1
0        1       10    100.0      1.000000
1       -2       20      NaN      1.414214
2        3       30    300.0      1.732051
3       -4       40      inf      2.000000
4        5       50   -500.0      2.236068
Enter fullscreen mode Exit fullscreen mode

Example 2: np.exp - Exponential function for each element

   column1  column2  column3  exp_column2
0        1       10    100.0  22026.465795
1       -2       20      NaN  485165195.409790
2        3       30    300.0  10686474581524.463
3       -4       40      inf        inf
4        5       50   -500.0  5.184705528587072e+21
Enter fullscreen mode Exit fullscreen mode

Example 3: np.log - Natural logarithm for each element

   column1  column2  column3  log_column3
0        1       10    100.0     4.605170
1       -2       20      NaN          NaN
2        3       30    300.0     5.703782
3       -4       40      inf          inf
4        5       50   -500.0     6.214608
Enter fullscreen mode Exit fullscreen mode

Example 4: np.sin - Sine function for each element

   column1  column2  column3  sin_column1
0        1       10    100.0     0.841471
1       -2       20      NaN    -0.909297
2        3       30    300.0     0.141120
3       -4       40      inf    -0.756802
4        5       50   -500.0     0.958924
Enter fullscreen mode Exit fullscreen mode

Example 5: np.cos - Cosine function for each element

  column1  column2  column3  cos_column2
0        1       10    100.0    -0.839072
1       -2       20      NaN     0.408082
2        3       30    300.0    -0.988032
3       -4       40      inf    -0.666938
4        5       50   -500.0     0.964966
Enter fullscreen mode Exit fullscreen mode

Example 6: np.tan - Tangent function for each element

   column1  column2  column3  tan_column3
0        1       10    100.0     0.648361
1       -2       20      NaN          NaN
2        3       30    300.0     0.987116
3       -4       40      inf     1.117215
4        5       50   -500.0     0.011239
Enter fullscreen mode Exit fullscreen mode

Example 7: np.abs - Absolute value of each element

   column1  column2  column3  abs_column1
0        1       10    100.0            1
1       -2       20      NaN            2
2        3       30    300.0            3
3       -4       40      inf            4
4        5       50   -500.0            5
Enter fullscreen mode Exit fullscreen mode

Example 8: np.ceil - Ceiling function for each element

   column1  column2  column3  ceil_column2
0        1       10    100.0          10.0
1       -2       20      NaN          20.0
2        3       30    300.0          30.0
3       -4       40      inf          40.0
4        5       50   -500.0          50.0
Enter fullscreen mode Exit fullscreen mode

Example 9: np.floor - Floor function for each element

   column1  column2  column3  floor_column3
0        1       10    100.0           100.0
1       -2       20      NaN             NaN
2        3       30    300.0           300.0
3       -4       40      inf             inf
4        5       50   -500.0          -500.0
Enter fullscreen mode Exit fullscreen mode

Example 10: np.round - Round each element to the nearest integer

   column1  column2  column3  round_column1
0        1       10    100.0              1
1       -2       20      NaN             -2
2        3       30    300.0              3
3       -4       40      inf             -4
4        5       50   -500.0              5
Enter fullscreen mode Exit fullscreen mode

Example 11: np.maximum - Element-wise maximum of two arrays

   column1  column2  column3  max_columns
0        1       10    100.0            10
1       -2       20      NaN             0
2        3       30    300.0            30
3       -4       40      inf            40
4        5       50   -500.0            50
Enter fullscreen mode Exit fullscreen mode

Example 12: np.minimum - Element-wise minimum of two arrays

   column1  column2  column3  min_columns
0        1       10    100.0             1
1       -2       20      NaN            -2
2        3       30    300.0             3
3       -4       40      inf            -4
4        5       50   -500.0            -5
Enter fullscreen mode Exit fullscreen mode

Example 13: np.isnan - Check if each element is NaN

   column1  column2  column3  isnan_column3
0        1       10    100.0          False
1       -2       20      NaN           True
2        3       30    300.0          False
3       -4       40      inf          False
4        5       50   -500.0         False
Enter fullscreen mode Exit fullscreen mode

Example 14: np.isinf - Check if each element is infinity

   column1  column2  column3  isinf_column3
0        1       10    100.0          False
1       -2       20      NaN          False
2        3       30    300.0          False
3       -4       40      inf           True
4        5       50   -500.0         False
Enter fullscreen mode Exit fullscreen mode

Example 15: np.isfinite - Check if each element is finite

   column1  column2  column3  isfinite_column3
0        1       10    100.0              True
1       -2       20      NaN             False
2        3       30    300.0              True
3       -4       40      inf             False
4        5       50   -500.0             True
Enter fullscreen mode Exit fullscreen mode

Example 16: np.digitize - Return the indices of the bins to which each value belongs

   column1  column2  column3  digitize_column2
0        1       10    100.0                 1
1       -2       20      NaN                 2
2        3       30    300.0                 3
3       -4       40      inf                 4
4        5       50   -500.0                 4
Enter fullscreen mode Exit fullscreen mode

Example 17: np.where - Return elements chosen from x or y depending on condition

   column1  column2  column3  where_column1
0        1       10    100.0              1
1       -2       20      NaN              0
2        3       30    300.0              3
3       -4       40      inf              0
4        5       50   -500.0              5
Enter fullscreen mode Exit fullscreen mode

Example 18: np.select - Return an array drawn from elements in choicelist, depending on conditions

   column1  column2  column3  select_column1
0        1       10    100.0               1
1       -2       20      NaN               0
2        3       30    300.0               3
3       -4       40      inf               0
4        5       50   -500.0               5
Enter fullscreen mode Exit fullscreen mode

Example 19: np.clip - Clip (limit) the values in an array

   column1  column2  column3  clip_column2
0        1       10    100.0          15.0
1       -2       20      NaN          20.0
2        3       30    300.0          30.0
3       -4       40      inf          35.0
4        5       50   -500.0          35.0
Enter fullscreen mode Exit fullscreen mode

Example 20: np.vectorize - Vectorize a scalar function to apply it element-wise to arrays

   column1  column2  column3  vectorized_column1
0        1       10    100.0                   1
1       -2       20      NaN                   4
2        3       30    300.0                   9
3       -4       40      inf                  16
4        5       50   -500.0                  25
Enter fullscreen mode Exit fullscreen mode

Cheatsheet

create dataset from collection of list

df=pd.DataFrame(list).transpose()
df.columns = ['Name', 'Age', 'City']
Enter fullscreen mode Exit fullscreen mode
locations = [(group, index) for group, elements in grouped_dict.items() for index, value in enumerate(elements) if value == target_value]
Enter fullscreen mode Exit fullscreen mode

how to get index of list of dictionary

for group, elements in grouped_dict.items():
    for index, value in enumerate(elements):
Enter fullscreen mode Exit fullscreen mode

how to add suffix and prefix in particular colm
df[column + '_squared'] = df[column].apply(my_lambda)

Apply a function to round to the nearest 10
df['column1'].apply(lambda x: round(x, -1))
how to iterate all element except 1st element
for column in df.columns[1:]:

Summary

How data frame is created through collection of list,list of dictionary,grouped list of dictionary,dictionary of dictionary
find location of particular value in list of dictionary using listcomprehension,without listcomprhension
sum of all value in list of dictionary
sum of all value in row list of dictionary
first occurance of even no in list using next
Apply the lambda function to all columns using a for loop
Apply the lambda function to all columns exclude first colm
Define lambda function to apply based on multiple conditions
Apply lambda function to square,double,conversion to string extract last digit ,square root,round to the nearest 10
Apply a custom function with multiple conditions using for loop inside lambda to find categories nested lambda function
Apply a custom function with multiple conditions using for loop inside lambda to find sentiments nested lambda function nested lambda function with list comprhension
apply nested lambda fun to Extract only even digits from each number
Extract vowels from each number as a list using for loop inside lambda
Apply custom function to add multiples of 10 using for loop inside lambda
Categorize numbers based on their digit sum using for loop inside lambda
Apply a numpy fun inside lambda round,ceil,floor,max,minimum,check null,absolute,sqroot

Question

Finding location of particular value in table
find location of particular value in list of dictionary using list comprhension using group
How pandas table convert into dictionary of list
How pandas table convert into list of dictionary
find location of particular value in list of dictionary using enumerate and for loop
finding sum of all value of list in list of dictionary with list comprehension **
*finding sum of all value of row list in list of dictionary with list comprehension *
**first occurance of even no in list using next

how to group by table by particular colm based on multiple condition
Apply the lambda function to all columns using a for loop
Apply the lambda function to all columns exclude first colm
Define lambda function to apply based on multiple conditions
Apply lambda function to square,double,conversion to string extract last digit ,square root,round to the nearest 10 from existing col to create new colm
Apply custom function to create categories(small,medium,high) and (positive,negative,neutral) from existing col to create new colm
Apply a custom function with multiple conditions using for loop inside lambda to find categories nested lambda function

Apply a custom function with multiple conditions using for loop inside lambda to find sentiments nested lambda function nested lambda function with list comprhension

apply nested lambda fun to Extract only even digits from each number
Extract vowels from each number as a list using for loop inside lambda
Apply custom function to add multiples of 10 using for loop inside lambda
Categorize numbers based on their digit sum using for loop inside lambda
Apply a numpy fun inside lambda round,ceil,floor,max,minimum,check null,absolute,sqroot

Solution

value_counts = df[column_name].value_counts()
if 5 in value_counts.index:
    count_of_6 = value_counts.loc[5]
Enter fullscreen mode Exit fullscreen mode
[(group, index) for group, elements in grouped_dict.items() for index, value in enumerate(elements) if value == target_value]
Enter fullscreen mode Exit fullscreen mode
for group, elements in grouped_dict.items():
    for index, value in enumerate(elements):
Enter fullscreen mode Exit fullscreen mode
sums = [sum(elements) for group, elements in grouped_dict.items()]
allsums = sum([sum(elements) for group, elements in grouped_dict.items()])
Enter fullscreen mode Exit fullscreen mode
next((num for num in numbers if num % 2 == 0), None)
Enter fullscreen mode Exit fullscreen mode
grouped_data = df.groupby('column1').agg({'column2': 'mean', 'column3': 'sum'})
Enter fullscreen mode Exit fullscreen mode
for column in df.columns:
    df[column + '_squared'] = df[column].apply(my_lambda)
Enter fullscreen mode Exit fullscreen mode
for column in df.columns[1:]: 
    df[column + '_transformed'] = df[column].apply(my_lambda)
Enter fullscreen mode Exit fullscreen mode
my_lambda = lambda x: x ** 2 if x > mean_col2 else x + 10 if x > mean_col3 else x * 2 if x > median_col1 else x
Enter fullscreen mode Exit fullscreen mode
lambda x: x ** 2, lambda x: x * 2, lambda x: str(x), lambda x: x % 10, apply(np.sqrt), lambda x: round(x, -1)
Enter fullscreen mode Exit fullscreen mode
df['new_column'] = df['column1'].apply(lambda x: (lambda val: 'Very Low' if val < 20 else 'Low' if 20 <= val < 40 else 'Medium' if 40 <= val < 60 else 'High')(x))
Enter fullscreen mode Exit fullscreen mode
first way
df['new_column'] = df['column1'].apply(lambda x: (lambda val: 'Positive' if val > 0 else 'Negative' if val < 0 else 'Zero')(x))
second way
df['new_column'] = df['column1'].apply(lambda x: (lambda val: ['Positive' if v > 0 else 'Negative' if v < 0 else 'Zero' for v in [val]])(x))
3rd way
df['new_column'] = df['column1'].apply(lambda x: (lambda val: ['Positive' if v > 0 else 'Negative' if v < 0 else 'Zero' for v in [val]][0])(x))
Enter fullscreen mode Exit fullscreen mode

note
the [0] at the end of the list comprehension is used to extract the single element from the list comprehension result ex: print(mylist[0])

 df['column1'].apply(lambda x: (lambda val: [int(digit) for digit in str(val) if int(digit) % 2 == 0])(x))
Enter fullscreen mode Exit fullscreen mode
df['new_column'] = df['column1'].apply(lambda x: (lambda val: [char for char in str(val) if char.lower() in 'aeiou'])(x))
Enter fullscreen mode Exit fullscreen mode
df['new_column'] = df['column1'].apply(lambda x: (lambda val: [i + 10 for i in range(val)])(x))
Enter fullscreen mode Exit fullscreen mode
df['new_column'] = df['column1'].apply(lambda x: (lambda val: {int(digit): int(digit)**2 for digit in str(val)})(x))
Enter fullscreen mode Exit fullscreen mode
lambda x: np.abs(x), lambda x: np.ceil(x), lambda x: np.floor(x)
lambda x: np.round(x), 
(lambda row: np.minimum(row['column1'], row['column2']), axis=1)
lambda x: np.isnan(x), lambda x: np.isinf(x)
lambda x: np.clip(x, 15, 35)
Enter fullscreen mode Exit fullscreen mode

Top comments (0)