Source code for generate_mrf_turtles

import pandas as pd
from rdflib import Graph, Literal, Namespace, URIRef
from rdflib.namespace import RDF, SKOS, XSD
from pathlib import Path


[docs] def generate_mrf_turtles(param_data_path,output_file_path): preferred_labels = { 'NIR HDPE':'near infrared sorting machine of HDPE plastics', 'NIR PET':'near infrared sorting machine of PET plastics', 'glass breaker':'glass breaker sorting machine', 'disc screen 1':"disc screen sorting machine", 'disc screen 2':"disc screen sorting machine", 'disc screen 3':"disc screen sorting machine", 'eddy':"eddy current separator", 'magnet':'magnet sorting machine for ferrous metals', 'vacuum':'vacuum sorting machine for film plastic', 'optical glass':"optical sorting machine for glass", } disc_descr = ("An inclined plane filled with a series of parallel rods " + "with discs spread along each rod such that large materials travel " + "over the top while smaller materials fall between the discs") opticalglass_decr = " ".join(["Identifies pre-determined material(s) using optical", "technology (e.g., cameras, lasers, sensors) and removes the identified material" "from the stream using bursts of compressed air"]) description = { 'eddy':"Uses magnetic fields to remove aluminum and other non-ferrous metals", 'magnet':"Uses magnetic fields to remove ferrous metals", 'disc screen 1':disc_descr, 'disc screen 2':disc_descr, 'disc screen 3':disc_descr, 'optical glass':opticalglass_decr, } # New graph g = Graph() # Set up namespaces #mrf_equipment_url = "https://vocab.sentier.dev/model-terms/mrf-equipment/" #mrf_url = "https://vocab.sentier.dev/products/material-recovery-facility/" units = Namespace("https://vocab.sentier.dev/units/") qudt = Namespace("http://qudt.org/schema/qudt/") sorting_machines = Namespace("http://data.europa.eu/xsp/cn2024/847410000080") model_terms = Namespace("https://vocab.sentier.dev/model-terms/generic") g.bind("units", units) g.bind("qudt", qudt) g.bind("sorting_machines", sorting_machines) g.bind("skos", SKOS) # the existing Efficiency concept in dds vocabulary efficiency_qk = URIRef("https://vocab.sentier.dev/units/quantity-kind/Efficiency") sorting_machine = URIRef("https://publications.europa.eu/resource/authority/cpv/cpv/43411000") sequence = URIRef("http://semanticscience.org/resource/SIO_001118") # make the concept thingy scheme = URIRef(sorting_machines) g.add((scheme, RDF.type, SKOS.ConceptScheme)) # add general concepts # sequence g.add((URIRef("https://vocab.sentier.dev/model-terms/generic/sequence"), RDF.type,SKOS.Concept)) g.add((URIRef("https://vocab.sentier.dev/model-terms/generic/sequence"), SKOS.exactMatch,sequence)) # mixed waste # sorting machines path_to_file = Path(__file__).parent / param_data_path df = pd.read_csv(path_to_file,sep=';') for _equipment in df.equipment.unique(): uri = URIRef(f"{sorting_machines}{_equipment.replace(' ', '_')}") g.add((uri, RDF.type, SKOS.Concept)) g.add((uri, SKOS.prefLabel, Literal(preferred_labels.get(_equipment,_equipment), lang="en"))) #g.add((uri, SKOS.inScheme, scheme)) g.add((uri, qudt.hasQuantityKind, efficiency_qk)) g.add((uri, SKOS.broader, sorting_machine)) if _equipment in description: g.add((uri,SKOS.definition, Literal(description.get(_equipment),lang="en"))) g.serialize(destination=output_file_path, format="turtle")
if __name__ == '__main__':
[docs] param_data_path = "corrected_mrf_equipment_efficiency.csv"
output_file_path = "mrf_equipment_efficiency_ontology.ttl" generate_mrf_turtles(param_data_path,output_file_path)