Understanding -fanalyzer

`analyzer-pass.cc`

This is the top level file.

class pass_analyzer : public ipa_opt_pass_d
{
public:
   pass_analyzer(gcc::context *ctxt)
   : ipa_opt_pass_d (pass_data_analyzer, ctxt,
                     NULL, /* generate_summary */
                     NULL, /* write_summary */
                     NULL, /* read_summary */
                     NULL, /* write_optimization_summary */
                     NULL, /* read_optimization_summary */
                     NULL, /* stmt_fixup */
                     0, /* function_transform_todo_flags_start */
                     NULL, /* function_transform */
                     NULL) /* variable_transform */
  {}

  /* opt_pass methods: */
  bool gate (function *) FINAL OVERRIDE;
  unsigned int execute (function *) FINAL OVERRIDE;
}; // class pass_analyzer

This is the execute function:

unsigned int
pass_analyzer::execute (function *)
{
  ana::run_checkers ();
  return 0;
}

/* External entrypoint to the analysis "engine".
   Set up any dumps, then call impl_run_checkers.  */
void run_checkers ()

`impl_run_checkers`

  /* If using LTO, ensure that the cgraph nodes have function bodies.  */
  cgraph_node *node;
  FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node)
    node->get_untransformed_body ();
    
  /* Create the supergraph.  */
  supergraph sg (logger);
  
  auto_delete_vec <state_machine> checkers;
  make_checkers (checkers, logger);

/* Create instances of the various state machines, each using LOGGER,
   and populate OUT with them.  */
void
make_checkers (auto_delete_vec <state_machine> &out, logger *logger)
{
  out.safe_push (make_malloc_state_machine (logger));

/* Internal interface to this file. */
state_machine * make_malloc_state_machine (logger *logger)

malloc_state_machine::malloc_state_machine (logger *logger)
: state_machine ("malloc", logger)
{
  m_start = add_state ("start");
  m_unchecked = add_state ("unchecked");
  m_null = add_state ("null");
  m_nonnull = add_state ("nonnull");
  m_freed = add_state ("freed");
  m_non_heap = add_state ("non-heap");
  m_stop = add_state ("stop");
}

Question #0 Do all state machines defined have non-trivial transitions?
Answer: Yes
Question #1 Does Boomerang need a state machine?
Answer: #TODO
Note: Let’s just focus on understanding the double free analysis…

  /* Return true if STMT is a function call recognized by this sm.  */
  virtual bool on_stmt (sm_context *sm_ctxt,
                        const supernode *node,
                        const gimple *stmt) const = 0;

Question #2: What does on_stmt mean semantically?
Answer: Ok, we have a stmt… We have a node that corresponds to (I am assuming) the point just before the statement. However, that is never used in the function on_transition.

  /* Called by state_machine in response to pattern matches:
     if VAR is in state FROM, transition it to state TO, potentially
     recording the "origin" of the state as ORIGIN.
     Use NODE and STMT for location information.  */
   virtual void on_transition (const supernode *node, const gimple *stmt,
                              tree var,
                              state_machine::state_t from,
                              state_machine::state_t to,
                              tree origin = NULL_TREE) = 0;

Going back to impl_run_checkers

  /* Extrinsic state shared by nodes in the graph.  */
  const extrinsic_state ext_state (checkers);

  const analysis_plan plan (sg, logger);

  /* The exploded graph.  */
  exploded_graph eg (sg, logger, ext_state, purge_map, plan,
                     analyzer_verbosity);

Ok, it seems that extrinsic_state will provide the checkers to exploded supergraph. So, let’s check that out.

  /* Add entrypoints to the graph for externally-callable functions.  */
  eg.build_initial_worklist ();
  
  /* Now process the worklist, exploring the <point, state> graph.  */
  eg.process_worklist ();

/* The main loop of the analysis.
   Take freshly-created exploded_nodes from the worklist, calling
   process_node on them to explore the <point, state> graph.
   Add edges to their successors, potentially creating new successors
   (which are also added to the worklist).  */

void
exploded_graph::process_worklist ()
{
  logger * const logger = get_logger ();
  LOG_SCOPE (logger);
  auto_timevar tv (TV_ANALYZER_WORKLIST);

  while (m_worklist.length () > 0)
    {
      exploded_node *node = m_worklist.take_next ();
      gcc_assert (node->get_status () == exploded_node::STATUS_WORKLIST);
      gcc_assert (node->m_succs.length () == 0
                  || node == m_origin);

      if (logger)
        logger->log ("next to process: EN: %i", node->m_index);

   // ...
   
    process_node (node);

/* The core of exploded_graph::process_worklist (the main analysis loop),
   handling one node in the worklist.

   Get successor <point, state> pairs for NODE, calling get_or_create on
   them, and adding an exploded_edge to each successors.

   Freshly-created nodes will be added to the worklist.  */

void
exploded_graph::process_node (exploded_node *node)

  program_state next_state (state);

Question #2 Is this IFDS?
Answer No, because the program state can be any of the following. Could this be IDE? It seems like something in between… or maybe just IDE… ```c++ /* A class for representing the state of interest at a given path of analysis.

Currently this is a combination of: (a) a region_model, giving: (a.1) a hierarchy of memory regions (a.2) values for the regions (a.3) inequalities between values (b) sm_state_maps per state machine, giving a sparse mapping of values to states. */

class program_state


This is how you merge states...

/* Attempt to merge this state with OTHER, both using EXT_STATE. Write the result to *OUT. If the states were merged successfully, return true. */

bool program_state::can_merge_with_p (const program_state &other, const extrinsic_state &ext_state, program_state *out) const

bool region_model::can_merge_with_p (const region_model &other_model, region_model *out_model, svalue_id_merger_mapping *sid_mapping) const ```