Git Lab CI for docker build enabled! You can enable it using .gitlab-ci.yml in your project. Check file template at

Commit e8d92b83 authored by Vítor Vieira's avatar Vítor Vieira
Browse files

[WIP] Duplicate columns with >1 target_id

parent 386899e4
......@@ -40,7 +40,22 @@ training_df = pd.merge(finaldf, final_descriptor_df, on='smiles', how='right').d
good_units = training_df.standard_units == 'NM'
has_values = ~training_df.standard_value.isnull()
training_df = training_df[good_units & has_values]
training_df = training_df[good_units & has_values & ~training_df.target_id.isna()]
baddf = training_df[training_df.target_id.apply(lambda x: ',' in x)]
subdf_correct = training_df[~training_df.target_id.apply(lambda x: ',' in x)]
def replace_value_in_row(row, id, sep=','):
items = [s.strip() for s in row[id].split(sep)]
df_to_add = [row]*len(items)
for item, row in zip(items,df_to_add):
row[id] = item
return df_to_add
# new_rows = list(chain(*baddf.apply(lambda x: replace_value_in_row(row=x, id='target_id'), axis=1)))
# new_rows = chain(*[replace_value_in_row(baddf.iloc[i,:], 'target_id') for i in range(baddf.shape[0])])
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment