Differences

This shows you the differences between two versions of the page.

--- development:python:pandas [2024/08/07 03:39] – [4.3. Delete a DataFrame] tungnt
+++ development:python:pandas [2024/11/19 14:41] (current) – [5. Group/Sort Data] tungnt
@@ Line 122: / Line 122: @@
 </file>
+<code python>
+df_ut['date'] = pd.to_datetime(df_ut['date'], format='%d/%m/%Y')
+df_ut['approved_time'] = pd.to_datetime(df_ut['approved_time'], format='%d/%m/%Y %H:%M:%S')
+df_ut['created_at'] = pd.to_datetime(df_ut['created_at'], format='%d/%m/%Y %H:%M:%S')
+df_ut['updated_at'] = pd.to_datetime(df_ut['updated_at'], format='%d/%m/%Y %H:%M:%S')
+df_ut['processed_time'] = (df_ut['approved_time'] - df_ut['created_at']).dt.seconds
+df_ut['weekday'] = df_ut['created_at'].dt.day_name()
+df_ut['fee_postpaid'] = np.where(((df_ut['fee_type'] == 'POSTPAID' )) , df_ut['fee'], 0)
+</code>
 ===== 4.2. Update a DataFrame =====
@@ Line 251: / Line 260: @@
 df_groups = df.groupby(['transaction_id']).agg({'transaction_id':'size', 'fee': 'sum', 'amount': 'mean'}).rename(columns={'transaction_id':'count', 'fee': 'fee_total', 'amount': 'amount_avg'}).reset_index()
+df_ut_groups = df_ut_trans.groupby(['user_id', 'type']).agg({'type':'size', 'request_amount': 'sum', 'fee': 'sum', 'fee_fixed': 'mean', 'fee_flexible': 'mean', 'partner_fee': 'sum'}).rename(columns={'type':'count', 'request_amount': 'gmv', 'fee': 'fee', 'fee_fixed': 'fee_fixed', 'fee_flexible': 'fee_flexible', 'partner_fee': 'partner_fee'}).reset_index()
 df_groups.sort_values(by='count', ascending=False)
 df_groups.sort_values(by='avg', ascending=False)
+# Tính giá trị trung bình 1 cột
+df["internal_time"].mean()
+# Loại bỏ giá trị trùng lặp và lấy giá trị đầu
+df_group = df.groupby('uid').first().reset_index()
 </file>
@@ Line 275: / Line 293: @@
 </file>
+====== 8 . Others ======
+<file python>
+df_users_with_master.set_index('id', inplace=True)
+</file>
+===== Cập nhật dữ liệu cột của một dataframe từ một dataframe khác =====
+Giả sử:
+  * df1: DataFrame gốc cần cập nhật dữ liệu
+  * df2: DataFrame chứa dữ liệu mới để cập nhật
+  * key: cột khóa chung giữa df1 và df2
+  * column_to_update: cột mà bạn muốn cập nhật trong df1
+**Cách 1:**
+Phương thức update() cho phép cập nhật trực tiếp các giá trị trong df1 từ df2 dựa trên các chỉ số hoặc cột chung.
+<file python>
+import pandas as pd
+# Tạo DataFrame ví dụ
+df1 = pd.DataFrame({
+    'key': [1, 2, 3, 4],
+    'column_to_update': ['A', 'B', 'C', 'D']
+})
+df2 = pd.DataFrame({
+    'key': [2, 3],
+    'column_to_update': ['X', 'Y']
+})
+# Thiết lập 'key' làm chỉ số chung để cập nhật
+df1.set_index('key', inplace=True)
+df2.set_index('key', inplace=True)
+# Cập nhật df1 từ df2
+df1.update(df2)
+# Reset index nếu cần thiết
+df1.reset_index(inplace=True)
+print(df1)
+</file>
+**Cách 2:**
+Nếu chỉ muốn cập nhật một cột cụ thể, có thể dùng map() để ánh xạ giá trị từ df2 sang df1.
+<file python>
+# Tạo DataFrame gốc và DataFrame chứa dữ liệu cập nhật
+df1 = pd.DataFrame({
+    'key': [1, 2, 3, 4],
+    'column_to_update': ['A', 'B', 'C', 'D']
+})
+df2 = pd.DataFrame({
+    'key': [2, 3],
+    'column_to_update': ['X', 'Y']
+})
+# Ánh xạ giá trị từ df2 sang df1 dựa trên 'key'
+df1['column_to_update'] = df1['key'].map(df2.set_index('key')['column_to_update']).fillna(df1['column_to_update'])
+print(df1)
+</file>
+**Cách 3:**
+Dùng merge() để kết hợp hai DataFrame dựa trên key, sau đó chọn cột cập nhật từ df2.
+<file python>
+# Kết hợp df1 và df2
+df_combined = df1.merge(df2[['key', 'column_to_update']], on='key', how='left', suffixes=('', '_new'))
+# Cập nhật cột từ df2 nếu có
+df_combined['column_to_update'] = df_combined['column_to_update_new'].combine_first(df_combined['column_to_update'])
+# Bỏ cột phụ
+df_combined.drop(columns='column_to_update_new', inplace=True)
+print(df_combined)
+</file>
+===== Xóa nhiều bản ghi cùng lúc =====
+<code>
+# Xóa các dòng có index là 1 và 3
+df = df.drop(index=[1, 3], inplace=True)
+# Xóa các dòng có id là 1 và 3
+df = df[~df['id'].isin([1, 3])]
+</code>