PCA and VCD (Could delete since there were no matches)#

# Comput PCS and VCD features to feed into model
import pandas as pd
half_enantiomer_data = pd.read_csv("half_enantiomer_data.csv")
# Read in pca and vcd dataframe to use as features
pca_vcd_values = pd.read_csv("../data/vcd/pca_vcd_values.csv")
pca_vcd_values
Molecule Name 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14
0 (R)-(-)-Carvone 7.464317 -43.591126 -8.046428 -0.574730 -3.580254 5.872903 -2.689295 0.910235 3.263888 1.241722 -1.817992 -10.064818 -5.517172 6.956873 0.003187
1 (1R,2S,5R)-(-)-Menthol -15.984788 -48.409755 11.599679 -6.459996 -2.895890 -21.817187 7.987366 30.168379 -4.855940 -27.966750 -15.593791 18.881794 6.680355 -3.869645 -18.299473
2 (-)-Menthyl chloride -5.850617 -48.612997 25.383460 -1.972609 -9.623228 -12.079316 -5.014842 9.790833 -11.125811 -10.948924 12.772888 -4.068723 4.196934 0.326200 0.769741
3 (-)-Borneol 0.756580 -25.884643 -4.987043 6.148153 -18.167909 7.294051 -16.954397 9.166877 10.160046 8.512541 22.090822 -60.188603 5.246429 9.022766 -17.823573
4 (-)-Isoplegol -12.599590 -33.717238 -12.966523 -18.479083 5.945713 -6.208222 -4.337392 10.783442 -12.640204 1.773643 1.927400 6.935122 5.856485 8.257695 -9.983220
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
156 β-Cholestanol 6.415996 -18.840982 20.303654 -29.238431 -26.058052 8.328641 -8.520251 19.753263 24.650594 37.600273 -1.058662 -16.200175 37.116864 -1.951506 -10.766619
157 Cholesterol Ethyl Carbonate 230.771312 139.215929 -45.562654 -28.445263 7.019739 -30.857068 18.819408 -6.542138 7.706800 -5.896378 7.642116 -5.188079 5.985704 -34.834249 -7.737224
158 Cholesterol n-Caprate 181.677425 33.151223 72.048835 -49.147260 19.664893 20.812246 -3.977717 5.351038 14.182154 -2.607771 6.333818 -0.251176 -7.470138 19.545668 -8.373291
159 Cholesterol Laurate -130.152846 164.035229 61.091725 -37.136634 -13.847154 14.540341 -4.944992 -3.105062 2.508800 1.700032 5.700678 8.617406 -4.496284 22.150449 2.389079
160 Cholesterol Myristate -132.817778 151.835799 66.528593 -50.454738 7.112731 21.576747 6.387919 -5.878909 -6.011154 7.648669 4.893929 7.866558 1.410726 11.221058 0.188386

161 rows × 16 columns

# Set index to Molecule Name
pca_vcd_values = pca_vcd_values.set_index('Molecule Name')
pca_vcd_values.head()
0 1 2 3 4 5 6 7 8 9 10 11 12 13 14
Molecule Name
(R)-(-)-Carvone 7.464317 -43.591126 -8.046428 -0.574730 -3.580254 5.872903 -2.689295 0.910235 3.263888 1.241722 -1.817992 -10.064818 -5.517172 6.956873 0.003187
(1R,2S,5R)-(-)-Menthol -15.984788 -48.409755 11.599679 -6.459996 -2.895890 -21.817187 7.987366 30.168379 -4.855940 -27.966750 -15.593791 18.881794 6.680355 -3.869645 -18.299473
(-)-Menthyl chloride -5.850617 -48.612997 25.383460 -1.972609 -9.623228 -12.079316 -5.014842 9.790833 -11.125811 -10.948924 12.772888 -4.068723 4.196934 0.326200 0.769741
(-)-Borneol 0.756580 -25.884643 -4.987043 6.148153 -18.167909 7.294051 -16.954397 9.166877 10.160046 8.512541 22.090822 -60.188603 5.246429 9.022766 -17.823573
(-)-Isoplegol -12.599590 -33.717238 -12.966523 -18.479083 5.945713 -6.208222 -4.337392 10.783442 -12.640204 1.773643 1.927400 6.935122 5.856485 8.257695 -9.983220
# Copy original dataframe
half_enantiomer_data_copy = half_enantiomer_data
half_enantiomer_data_copy;
# Keep the columns in the gme df that match the index names in original dataset
common_index = half_enantiomer_data_copy.index.intersection(pca_vcd_values.index)
half_enantiomer_data_copy = half_enantiomer_data_copy.loc[common_index]
pca_vcd_features = pca_vcd_values.loc[common_index]
# Combine original dataset with pca_vcd
pca_vcd = half_enantiomer_data_copy.join(pca_vcd_features, how="inner")
pca_vcd
Unnamed: 0 Unnamed: 0.1 Molecule Name Pubchem ID # Note SMILES String Other SMILES Method Contributor Detection Threshold ... 5 6 7 8 9 10 11 12 13 14

0 rows × 32 columns