Skip to content

Reference

FromDataframe

Bases: Upsert, Insert

Source code in keepitsql/core/from_dataframe.py
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
class FromDataframe(Upsert, Insert):
    def __init__(self, target_table, dataframe, target_schema=None):
        """Initializes a new instance of the FromDataframe class.

        Parameters
        ----------
        - target_table: str. The name of the target table for SQL operations.
        - dataframe: DataFrame. The dataframe contclass FromDataframe(Upsert, Insert):
            def __init__(self, target_table, dataframe, target_schema=None):

                Initializes a new instance of the FromDataframe class, which is designed to perform upsert and insert operations on a SQL table using data from a pandas DataFrame.

                Inherits from:
                Upsert -- A class that implements the logic for updating existing records or inserting new records if they do not exist.
                Insert -- A class that implements the logic for inserting new records into a SQL table.

                Parameters
                ----------
                target_table : str
                    The name of the target table where SQL operations will be performed.

                dataframe : DataFrame
                    The pandas DataFrame containing the data that needs to be upserted or inserted into the target table.



                Methods
                -------
                The class should implement methods inherited from `Upsert` and `Insert` classes for executing SQL operations.
                These methods will utilize `target_table`, `dataframe`, and `target_schema` as part of their operation.

                Examples
                --------
                df_data = pandas.read_csv('path/to/data.csv')
                sql_upserter = FromDataframe('my_table', df_data, 'public')
                sql_upserter.upsert()
        """
        super().__init__(target_table, target_schema, dataframe)

__init__(target_table, dataframe, target_schema=None)

Initializes a new instance of the FromDataframe class.

Parameters
  • target_table: str. The name of the target table for SQL operations.
  • dataframe: DataFrame. The dataframe contclass FromDataframe(Upsert, Insert): def init(self, target_table, dataframe, target_schema=None):
    Initializes a new instance of the FromDataframe class, which is designed to perform upsert and insert operations on a SQL table using data from a pandas DataFrame.
    
    Inherits from:
    Upsert -- A class that implements the logic for updating existing records or inserting new records if they do not exist.
    Insert -- A class that implements the logic for inserting new records into a SQL table.
    
    Parameters
    ----------
    target_table : str
        The name of the target table where SQL operations will be performed.
    
    dataframe : DataFrame
        The pandas DataFrame containing the data that needs to be upserted or inserted into the target table.
    
    Methods
    -------
    The class should implement methods inherited from `Upsert` and `Insert` classes for executing SQL operations.
    These methods will utilize `target_table`, `dataframe`, and `target_schema` as part of their operation.
    
    Examples
    --------
    df_data = pandas.read_csv('path/to/data.csv')
    sql_upserter = FromDataframe('my_table', df_data, 'public')
    sql_upserter.upsert()
    
Source code in keepitsql/core/from_dataframe.py
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
def __init__(self, target_table, dataframe, target_schema=None):
    """Initializes a new instance of the FromDataframe class.

    Parameters
    ----------
    - target_table: str. The name of the target table for SQL operations.
    - dataframe: DataFrame. The dataframe contclass FromDataframe(Upsert, Insert):
        def __init__(self, target_table, dataframe, target_schema=None):

            Initializes a new instance of the FromDataframe class, which is designed to perform upsert and insert operations on a SQL table using data from a pandas DataFrame.

            Inherits from:
            Upsert -- A class that implements the logic for updating existing records or inserting new records if they do not exist.
            Insert -- A class that implements the logic for inserting new records into a SQL table.

            Parameters
            ----------
            target_table : str
                The name of the target table where SQL operations will be performed.

            dataframe : DataFrame
                The pandas DataFrame containing the data that needs to be upserted or inserted into the target table.



            Methods
            -------
            The class should implement methods inherited from `Upsert` and `Insert` classes for executing SQL operations.
            These methods will utilize `target_table`, `dataframe`, and `target_schema` as part of their operation.

            Examples
            --------
            df_data = pandas.read_csv('path/to/data.csv')
            sql_upserter = FromDataframe('my_table', df_data, 'public')
            sql_upserter.upsert()
    """
    super().__init__(target_table, target_schema, dataframe)

Insert

Source code in keepitsql/core/insert.py
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
class Insert:
    def insert(
        self,
        column_select: list = None,
        temp_type: str = None,
    ) -> str:
        """Generates an SQL INSERT statement for inserting data from the source DataFrame into the target table. This method supports selective column insertion and can format the table name for temporary tables. The values from the DataFrame are formatted as strings, with special handling for None values and escaping single quotes.

        Parameters
        ----------
        - column_select (list of str, optional): A list specifying which columns from the source DataFrame should be included in the INSERT statement. If None, all columns are used.
        - temp_type (str, optional): Specifies the type of temporary table. This affects the naming convention used in the SQL statement. For example, 'local' or 'global' temporary tables in MSSQL. If None, a standard table name format is used.

        Returns
        -------
        - str: A complete SQL INSERT statement ready for execution. This statement includes the formatted table name, column names, and values to insert.

        Raises
        ------
        - ValueError: If `column_select` includes column names not present in the source DataFrame.
        - AttributeError: If the method is called before a source DataFrame is set.

        Notes
        -----
        - The method uses the `format_table_name` function to format the target table name based on the `temp_type` and `target_schema`.
        - Column names for the INSERT statement are prepared using the `prepare_column_select_list` function, which handles both Pandas and Polars DataFrames.
        - The source DataFrame values are converted to string format, handling None values appropriately and escaping single quotes to prevent SQL injection or syntax errors.
        - This method should be called after setting the source DataFrame using `set_source_dataframe`.

        Example usage:
        ```python
        from_dataframe = FromDataFrame(target_table="your_table_name", target_schema="your_schema_name")
        from_dataframe.set_source_dataframe(your_dataframe)
        insert_statement = from_dataframe.sql_insert(column_select=['col1', 'col2'], temp_type='local')
        print(insert_statement)
        ```
        """
        self.source_dataframe = self.dataframe[column_select] if column_select is not None else self.dataframe

        target_tbl = format_table_name(
            table_name=self.target_table,
            schema_name=self.target_schema,
            temp_table_type=temp_type,
        )

        # Assuming `prepare_sql_columnlist` handles DataFrame and returns a string of column names for SQL
        get_column_header = prepare_column_select_list(
            self.dataframe,
            column_select,
        )

        # Convert DataFrame values to strings, handle None values, and escape single quotes
        formatted_values = self.dataframe.map(
            lambda x: f"'{str(x).replace('s', 'd')}'" if x is not None else 'NULL',
        )

        # Build the values string by concatenating row values
        value_list = ',\n '.join(f"({','.join(row)})" for row in formatted_values.values)

        # Construct the full INSERT statement
        insert_statement = ist.standard_insert.format(
            table_name=target_tbl,
            column_names=get_column_header,
            insert_value_list=value_list,
        )

        return insert_statement

insert(column_select=None, temp_type=None)

Generates an SQL INSERT statement for inserting data from the source DataFrame into the target table. This method supports selective column insertion and can format the table name for temporary tables. The values from the DataFrame are formatted as strings, with special handling for None values and escaping single quotes.

Parameters
  • column_select (list of str, optional): A list specifying which columns from the source DataFrame should be included in the INSERT statement. If None, all columns are used.
  • temp_type (str, optional): Specifies the type of temporary table. This affects the naming convention used in the SQL statement. For example, 'local' or 'global' temporary tables in MSSQL. If None, a standard table name format is used.
Returns
  • str: A complete SQL INSERT statement ready for execution. This statement includes the formatted table name, column names, and values to insert.
Raises
  • ValueError: If column_select includes column names not present in the source DataFrame.
  • AttributeError: If the method is called before a source DataFrame is set.
Notes
  • The method uses the format_table_name function to format the target table name based on the temp_type and target_schema.
  • Column names for the INSERT statement are prepared using the prepare_column_select_list function, which handles both Pandas and Polars DataFrames.
  • The source DataFrame values are converted to string format, handling None values appropriately and escaping single quotes to prevent SQL injection or syntax errors.
  • This method should be called after setting the source DataFrame using set_source_dataframe.

Example usage:

from_dataframe = FromDataFrame(target_table="your_table_name", target_schema="your_schema_name")
from_dataframe.set_source_dataframe(your_dataframe)
insert_statement = from_dataframe.sql_insert(column_select=['col1', 'col2'], temp_type='local')
print(insert_statement)
Source code in keepitsql/core/insert.py
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
def insert(
    self,
    column_select: list = None,
    temp_type: str = None,
) -> str:
    """Generates an SQL INSERT statement for inserting data from the source DataFrame into the target table. This method supports selective column insertion and can format the table name for temporary tables. The values from the DataFrame are formatted as strings, with special handling for None values and escaping single quotes.

    Parameters
    ----------
    - column_select (list of str, optional): A list specifying which columns from the source DataFrame should be included in the INSERT statement. If None, all columns are used.
    - temp_type (str, optional): Specifies the type of temporary table. This affects the naming convention used in the SQL statement. For example, 'local' or 'global' temporary tables in MSSQL. If None, a standard table name format is used.

    Returns
    -------
    - str: A complete SQL INSERT statement ready for execution. This statement includes the formatted table name, column names, and values to insert.

    Raises
    ------
    - ValueError: If `column_select` includes column names not present in the source DataFrame.
    - AttributeError: If the method is called before a source DataFrame is set.

    Notes
    -----
    - The method uses the `format_table_name` function to format the target table name based on the `temp_type` and `target_schema`.
    - Column names for the INSERT statement are prepared using the `prepare_column_select_list` function, which handles both Pandas and Polars DataFrames.
    - The source DataFrame values are converted to string format, handling None values appropriately and escaping single quotes to prevent SQL injection or syntax errors.
    - This method should be called after setting the source DataFrame using `set_source_dataframe`.

    Example usage:
    ```python
    from_dataframe = FromDataFrame(target_table="your_table_name", target_schema="your_schema_name")
    from_dataframe.set_source_dataframe(your_dataframe)
    insert_statement = from_dataframe.sql_insert(column_select=['col1', 'col2'], temp_type='local')
    print(insert_statement)
    ```
    """
    self.source_dataframe = self.dataframe[column_select] if column_select is not None else self.dataframe

    target_tbl = format_table_name(
        table_name=self.target_table,
        schema_name=self.target_schema,
        temp_table_type=temp_type,
    )

    # Assuming `prepare_sql_columnlist` handles DataFrame and returns a string of column names for SQL
    get_column_header = prepare_column_select_list(
        self.dataframe,
        column_select,
    )

    # Convert DataFrame values to strings, handle None values, and escape single quotes
    formatted_values = self.dataframe.map(
        lambda x: f"'{str(x).replace('s', 'd')}'" if x is not None else 'NULL',
    )

    # Build the values string by concatenating row values
    value_list = ',\n '.join(f"({','.join(row)})" for row in formatted_values.values)

    # Construct the full INSERT statement
    insert_statement = ist.standard_insert.format(
        table_name=target_tbl,
        column_names=get_column_header,
        insert_value_list=value_list,
    )

    return insert_statement

InsertOnConflict

Bases: Insert

Class to handle inserting data into a target table with conflict resolution.

Attributes:

Name Type Description
target_table str

The name of the target table.

target_schema str

The schema of the target table.

dataframe DataFrame

The dataframe containing the data to be inserted.

Source code in keepitsql/core/upsert.py
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
class InsertOnConflict(Insert):
    """
    Class to handle inserting data into a target table with conflict resolution.

    Attributes:
        target_table (str): The name of the target table.
        target_schema (str): The schema of the target table.
        dataframe (DataFrame): The dataframe containing the data to be inserted.
    """

    def __init__(self, target_table, target_schema, dataframe):
        """
        Initializes the InsertOnConflict class with the target table, target schema, and dataframe.

        Args:
            target_table (str): The name of the target table.
            target_schema (str): The schema of the target table.
            dataframe (DataFrame): The dataframe containing the data to be inserted.
        """
        self.target_table = target_table
        self.target_schema = target_schema
        self.dataframe = dataframe

    def insert_on_conflict(
        self,
        source_table: str,
        match_condition: list,
        source_schema: str = None,
        constraint_columns: list = None,
        temp_type: str = None,
    ):
        """
        Creates a SQL insert statement with conflict resolution to insert data from the source table into the target table.

        Args:
            source_table (str): The name of the source table.
            match_condition (list): The list of columns to be used as match conditions.
            source_schema (str, optional): The schema of the source table. Defaults to None.
            column_exclusion (list, optional): The list of columns to be excluded from the insert. Defaults to None.
            temp_type (str, optional): The type of temporary table to be used. Defaults to None.
        """
        insert_stmt = self.insert()
        update_list = '\n,'.join

        update_list_col = select_dataframe_column(source_dataframe=self.dataframe, output_type='list')

        match_conditions = ','.join(match_condition)

        update_list = ',\n'.join(
            ioc.update_list.format(column=col)
            for col in update_list_col
            if col.upper() not in list(map(lambda x: x.upper(), match_condition))
        )

        on_conflict_statement = ioc.insert_on_conflict.format(
            insert_statment=insert_stmt, match_condition=match_conditions, update_list=update_list
        )

        return on_conflict_statement

__init__(target_table, target_schema, dataframe)

Initializes the InsertOnConflict class with the target table, target schema, and dataframe.

Parameters:

Name Type Description Default
target_table str

The name of the target table.

required
target_schema str

The schema of the target table.

required
dataframe DataFrame

The dataframe containing the data to be inserted.

required
Source code in keepitsql/core/upsert.py
125
126
127
128
129
130
131
132
133
134
135
136
def __init__(self, target_table, target_schema, dataframe):
    """
    Initializes the InsertOnConflict class with the target table, target schema, and dataframe.

    Args:
        target_table (str): The name of the target table.
        target_schema (str): The schema of the target table.
        dataframe (DataFrame): The dataframe containing the data to be inserted.
    """
    self.target_table = target_table
    self.target_schema = target_schema
    self.dataframe = dataframe

insert_on_conflict(source_table, match_condition, source_schema=None, constraint_columns=None, temp_type=None)

Creates a SQL insert statement with conflict resolution to insert data from the source table into the target table.

Parameters:

Name Type Description Default
source_table str

The name of the source table.

required
match_condition list

The list of columns to be used as match conditions.

required
source_schema str

The schema of the source table. Defaults to None.

None
column_exclusion list

The list of columns to be excluded from the insert. Defaults to None.

required
temp_type str

The type of temporary table to be used. Defaults to None.

None
Source code in keepitsql/core/upsert.py
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
def insert_on_conflict(
    self,
    source_table: str,
    match_condition: list,
    source_schema: str = None,
    constraint_columns: list = None,
    temp_type: str = None,
):
    """
    Creates a SQL insert statement with conflict resolution to insert data from the source table into the target table.

    Args:
        source_table (str): The name of the source table.
        match_condition (list): The list of columns to be used as match conditions.
        source_schema (str, optional): The schema of the source table. Defaults to None.
        column_exclusion (list, optional): The list of columns to be excluded from the insert. Defaults to None.
        temp_type (str, optional): The type of temporary table to be used. Defaults to None.
    """
    insert_stmt = self.insert()
    update_list = '\n,'.join

    update_list_col = select_dataframe_column(source_dataframe=self.dataframe, output_type='list')

    match_conditions = ','.join(match_condition)

    update_list = ',\n'.join(
        ioc.update_list.format(column=col)
        for col in update_list_col
        if col.upper() not in list(map(lambda x: x.upper(), match_condition))
    )

    on_conflict_statement = ioc.insert_on_conflict.format(
        insert_statment=insert_stmt, match_condition=match_conditions, update_list=update_list
    )

    return on_conflict_statement

Merge

Class to handle the merging of data from a source table into a target table using a match condition.

Attributes:

Name Type Description
target_table str

The name of the target table.

target_schema str

The schema of the target table.

dataframe DataFrame

The dataframe containing the data to be merged.

Source code in keepitsql/core/upsert.py
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
class Merge:
    """
    Class to handle the merging of data from a source table into a target table using a match condition.

    Attributes:
        target_table (str): The name of the target table.
        target_schema (str): The schema of the target table.
        dataframe (DataFrame): The dataframe containing the data to be merged.
    """

    def __init__(self, target_table, target_schema, dataframe):
        """
        Initializes the Merge class with the target table, target schema, and dataframe.

        Args:
            target_table (str): The name of the target table.
            target_schema (str): The schema of the target table.
            dataframe (DataFrame): The dataframe containing the data to be merged.
        """
        self.target_table = target_table
        self.target_schema = target_schema
        self.dataframe = dataframe

    def merge(
        self,
        source_table: str,
        match_condition: list,
        source_schema: str = None,
        constraint_columns: list = None,
        temp_type: str = None,
    ) -> str:
        """
        Creates a SQL merge statement to merge data from the source table into the target table.

        Args:
            source_table (str): The name of the source table.
            match_condition (list): The list of columns to be used as match conditions.
            source_schema (str, optional): The schema of the source table. Defaults to None.
            constraint_columns (list, optional): The list of columns that are used as constraints,
                                                such as primary keys or auto-update columns,
                                                which should not be inserted. Defaults to None.
            temp_type (str, optional): The type of temporary table to be used. Defaults to None.

        Returns:
            str: The generated SQL merge statement.
        """
        target_table = format_table_name(
            table_name=self.target_table,
            schema_name=self.target_schema,
        )
        source_table = format_table_name(
            table_name=source_table,
            schema_name=source_schema,
            temp_table_type=temp_type,
        )

        all_columns = list(self.dataframe.columns)

        if constraint_columns is None:
            constraint_columns = []

        # column_inclusion = [col for col in all_columns if col not in match_condition and col not in column_exclusion]

        join_conditions = ',\n  '.join(
            mst.merge_condition.format(source_column=col, target_column=col) for col in match_condition
        )

        matched_condition = '\n OR '.join(
            mst.when_matched_condition.format(target_column=col, source_column=col)
            for col in all_columns
            if col not in match_condition
        )

        merge_update_list = ',\n'.join(
            mst.update_list.format(target_column=col, source_column=col)
            for col in all_columns
            if col not in match_condition
        )

        merge_insert_values = ',\n'.join(
            mst.merge_insert.format(source_column=col) for col in all_columns if col not in constraint_columns
        )
        merge_insert_columns = ',\n'.join(
            mst.merge_insert_columns.format(source_column=col) for col in all_columns if col not in constraint_columns
        )

        merge_statement = mst.merge_statement.format(
            target_table=target_table,
            source_table=source_table,
            merge_join_conditions=join_conditions,
            matched_condition=matched_condition,
            update_list=merge_update_list,
            insert_columns=merge_insert_columns,
            merge_insert_value=merge_insert_values,
        )

        return merge_statement

__init__(target_table, target_schema, dataframe)

Initializes the Merge class with the target table, target schema, and dataframe.

Parameters:

Name Type Description Default
target_table str

The name of the target table.

required
target_schema str

The schema of the target table.

required
dataframe DataFrame

The dataframe containing the data to be merged.

required
Source code in keepitsql/core/upsert.py
26
27
28
29
30
31
32
33
34
35
36
37
def __init__(self, target_table, target_schema, dataframe):
    """
    Initializes the Merge class with the target table, target schema, and dataframe.

    Args:
        target_table (str): The name of the target table.
        target_schema (str): The schema of the target table.
        dataframe (DataFrame): The dataframe containing the data to be merged.
    """
    self.target_table = target_table
    self.target_schema = target_schema
    self.dataframe = dataframe

merge(source_table, match_condition, source_schema=None, constraint_columns=None, temp_type=None)

Creates a SQL merge statement to merge data from the source table into the target table.

Parameters:

Name Type Description Default
source_table str

The name of the source table.

required
match_condition list

The list of columns to be used as match conditions.

required
source_schema str

The schema of the source table. Defaults to None.

None
constraint_columns list

The list of columns that are used as constraints, such as primary keys or auto-update columns, which should not be inserted. Defaults to None.

None
temp_type str

The type of temporary table to be used. Defaults to None.

None

Returns:

Name Type Description
str str

The generated SQL merge statement.

Source code in keepitsql/core/upsert.py
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
def merge(
    self,
    source_table: str,
    match_condition: list,
    source_schema: str = None,
    constraint_columns: list = None,
    temp_type: str = None,
) -> str:
    """
    Creates a SQL merge statement to merge data from the source table into the target table.

    Args:
        source_table (str): The name of the source table.
        match_condition (list): The list of columns to be used as match conditions.
        source_schema (str, optional): The schema of the source table. Defaults to None.
        constraint_columns (list, optional): The list of columns that are used as constraints,
                                            such as primary keys or auto-update columns,
                                            which should not be inserted. Defaults to None.
        temp_type (str, optional): The type of temporary table to be used. Defaults to None.

    Returns:
        str: The generated SQL merge statement.
    """
    target_table = format_table_name(
        table_name=self.target_table,
        schema_name=self.target_schema,
    )
    source_table = format_table_name(
        table_name=source_table,
        schema_name=source_schema,
        temp_table_type=temp_type,
    )

    all_columns = list(self.dataframe.columns)

    if constraint_columns is None:
        constraint_columns = []

    # column_inclusion = [col for col in all_columns if col not in match_condition and col not in column_exclusion]

    join_conditions = ',\n  '.join(
        mst.merge_condition.format(source_column=col, target_column=col) for col in match_condition
    )

    matched_condition = '\n OR '.join(
        mst.when_matched_condition.format(target_column=col, source_column=col)
        for col in all_columns
        if col not in match_condition
    )

    merge_update_list = ',\n'.join(
        mst.update_list.format(target_column=col, source_column=col)
        for col in all_columns
        if col not in match_condition
    )

    merge_insert_values = ',\n'.join(
        mst.merge_insert.format(source_column=col) for col in all_columns if col not in constraint_columns
    )
    merge_insert_columns = ',\n'.join(
        mst.merge_insert_columns.format(source_column=col) for col in all_columns if col not in constraint_columns
    )

    merge_statement = mst.merge_statement.format(
        target_table=target_table,
        source_table=source_table,
        merge_join_conditions=join_conditions,
        matched_condition=matched_condition,
        update_list=merge_update_list,
        insert_columns=merge_insert_columns,
        merge_insert_value=merge_insert_values,
    )

    return merge_statement

Upsert

Class to handle the upserting (insert or update) of data into a target table.

Attributes:

Name Type Description
target_table str

The name of the target table.

target_schema str

The schema of the target table.

dataframe DataFrame

The dataframe containing the data to be upserted.

Source code in keepitsql/core/upsert.py
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
class Upsert:
    """
    Class to handle the upserting (insert or update) of data into a target table.

    Attributes:
        target_table (str): The name of the target table.
        target_schema (str): The schema of the target table.
        dataframe (DataFrame): The dataframe containing the data to be upserted.
    """

    def __init__(self, target_table, target_schema, dataframe):
        """
        Initializes the Upsert class with the target table, target schema, and dataframe.

        Args:
            target_table (str): The name of the target table.
            target_schema (str): The schema of the target table.
            dataframe (DataFrame): The dataframe containing the data to be upserted.
        """
        self.target_table = target_table
        self.target_schema = target_schema
        self.dataframe = dataframe
        self.__merge_instance = Merge(target_table, target_schema, dataframe)
        self.__insert_instance = InsertOnConflict(target_table, target_schema, dataframe)

    def upsert(
        self,
        source_table: str,
        match_condition: list,
        source_schema: str = None,
        constraint_columns: list = None,
        temp_type: str = None,
        dbms_output=None,
    ) -> str:
        """
        Creates an upsert statement based on the DBMS type. If the DBMS supports MERGE, it creates a merge statement.
        Otherwise, it creates an insert-on-conflict statement.

        Args:
            source_table (str): The name of the source table.
            match_condition (list): The list of columns to be used as match conditions.
            source_schema (str, optional): The schema of the source table. Defaults to None.
            constraint_columns (list, optional): The list of columns that are used as constraints, such as primary keys or
                                                auto-update columns, which should not be inserted. This is applied to
                                                database systems that use the MERGE statement. Defaults to None.
            temp_type (str, optional): The type of temporary table to be used. Defaults to None.
            dbms_output (str, optional): The DBMS type (e.g., 'mssql', 'postgresql'). Defaults to None.

        Returns:
            str: The generated upsert statement.
        """

        if get_upsert_type_by_dbms(dbms_output) == 'MERGE':
            upsert_statment = self.__merge_instance.merge(
                source_table=source_table,
                match_condition=match_condition,
                source_schema=source_schema,
                constraint_columns=constraint_columns,
                temp_type=temp_type,
            )
        else:
            upsert_statment = self.__insert_instance.insert_on_conflict(
                source_table=source_table,
                match_condition=match_condition,
                source_schema=source_schema,
                constraint_columns=constraint_columns,
                temp_type=temp_type,
            )
        return upsert_statment

__init__(target_table, target_schema, dataframe)

Initializes the Upsert class with the target table, target schema, and dataframe.

Parameters:

Name Type Description Default
target_table str

The name of the target table.

required
target_schema str

The schema of the target table.

required
dataframe DataFrame

The dataframe containing the data to be upserted.

required
Source code in keepitsql/core/upsert.py
186
187
188
189
190
191
192
193
194
195
196
197
198
199
def __init__(self, target_table, target_schema, dataframe):
    """
    Initializes the Upsert class with the target table, target schema, and dataframe.

    Args:
        target_table (str): The name of the target table.
        target_schema (str): The schema of the target table.
        dataframe (DataFrame): The dataframe containing the data to be upserted.
    """
    self.target_table = target_table
    self.target_schema = target_schema
    self.dataframe = dataframe
    self.__merge_instance = Merge(target_table, target_schema, dataframe)
    self.__insert_instance = InsertOnConflict(target_table, target_schema, dataframe)

upsert(source_table, match_condition, source_schema=None, constraint_columns=None, temp_type=None, dbms_output=None)

Creates an upsert statement based on the DBMS type. If the DBMS supports MERGE, it creates a merge statement. Otherwise, it creates an insert-on-conflict statement.

Parameters:

Name Type Description Default
source_table str

The name of the source table.

required
match_condition list

The list of columns to be used as match conditions.

required
source_schema str

The schema of the source table. Defaults to None.

None
constraint_columns list

The list of columns that are used as constraints, such as primary keys or auto-update columns, which should not be inserted. This is applied to database systems that use the MERGE statement. Defaults to None.

None
temp_type str

The type of temporary table to be used. Defaults to None.

None
dbms_output str

The DBMS type (e.g., 'mssql', 'postgresql'). Defaults to None.

None

Returns:

Name Type Description
str str

The generated upsert statement.

Source code in keepitsql/core/upsert.py
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
def upsert(
    self,
    source_table: str,
    match_condition: list,
    source_schema: str = None,
    constraint_columns: list = None,
    temp_type: str = None,
    dbms_output=None,
) -> str:
    """
    Creates an upsert statement based on the DBMS type. If the DBMS supports MERGE, it creates a merge statement.
    Otherwise, it creates an insert-on-conflict statement.

    Args:
        source_table (str): The name of the source table.
        match_condition (list): The list of columns to be used as match conditions.
        source_schema (str, optional): The schema of the source table. Defaults to None.
        constraint_columns (list, optional): The list of columns that are used as constraints, such as primary keys or
                                            auto-update columns, which should not be inserted. This is applied to
                                            database systems that use the MERGE statement. Defaults to None.
        temp_type (str, optional): The type of temporary table to be used. Defaults to None.
        dbms_output (str, optional): The DBMS type (e.g., 'mssql', 'postgresql'). Defaults to None.

    Returns:
        str: The generated upsert statement.
    """

    if get_upsert_type_by_dbms(dbms_output) == 'MERGE':
        upsert_statment = self.__merge_instance.merge(
            source_table=source_table,
            match_condition=match_condition,
            source_schema=source_schema,
            constraint_columns=constraint_columns,
            temp_type=temp_type,
        )
    else:
        upsert_statment = self.__insert_instance.insert_on_conflict(
            source_table=source_table,
            match_condition=match_condition,
            source_schema=source_schema,
            constraint_columns=constraint_columns,
            temp_type=temp_type,
        )
    return upsert_statment